from __future__ import annotations from typing import Any from core.tools.base import BaseTool, ToolContext from core.tools.registry import registry from core.events import bus class ResearchTool(BaseTool): """ High-level research orchestrator. Combines: - search - intelligent ranking - crawling into a structured report. """ name = "research" description = "Autonomous web research pipeline" # ========================= # EXECUTE # ========================= def execute(self, payload: dict[str, Any], ctx: ToolContext): query = payload.get("query") depth = payload.get("depth", 1) max_sources = payload.get("max_sources", 3) if not isinstance(query, str): raise ValueError("query must be string") bus.log( "RESEARCH", "research_execute", "INFO", { "query": query, "depth": depth, "max_sources": max_sources } ) # Step 1: search search_results = registry.run( "search", {"action": "search", "query": query, "limit": 10}, ctx ) results = search_results.get("results", []) if not results: return { "query": query, "error": "No search results found" } # Step 2: intelligent ranking ranked = registry.run( "intelligent_search", { "action": "rank", "query": query, "results": results }, ctx ) ranked_list = ranked.get("ranked", [])[:max_sources] # Step 3: crawl top sources pages = [] for item in ranked_list: url = item.get("url") if not url: continue page = registry.run( "crawler", { "action": "fetch", "url": url }, ctx ) pages.append({ "url": url, "title": item.get("title"), "text": page.get("text", ""), "score": item.get("score", 0) }) # Step 4: synthesize structure return { "query": query, "sources_used": len(pages), "sources": pages, "summary_hint": self._build_hint(pages) } # ========================= # SIMPLE SYNTHESIS HELPER # ========================= def _build_hint(self, pages: list[dict[str, Any]]) -> str: """ Lightweight heuristic summary hint. This is NOT a full LLM summary — just structure guidance. """ if not pages: return "No data available." topics = [] for p in pages: text = p.get("text", "") # crude keyword extraction (lightweight, no deps) words = text.split() keywords = [w for w in words if len(w) > 6][:10] topics.append({ "url": p.get("url"), "keywords": keywords }) return ( "Key extracted themes per source:\n" + "\n".join( f"- {t['url']}: {', '.join(t['keywords'][:5])}" for t in topics ) ) # ========================= # REGISTER # ========================= registry.register(ResearchTool())