149 lines
3.5 KiB
Python
149 lines
3.5 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
from core.tools.base import BaseTool, ToolContext
|
|
from core.tools.registry import registry
|
|
from core.events import bus
|
|
|
|
|
|
class ResearchTool(BaseTool):
|
|
"""
|
|
High-level research orchestrator.
|
|
|
|
Combines:
|
|
- search
|
|
- intelligent ranking
|
|
- crawling
|
|
into a structured report.
|
|
"""
|
|
|
|
name = "research"
|
|
description = "Autonomous web research pipeline"
|
|
|
|
# =========================
|
|
# EXECUTE
|
|
# =========================
|
|
|
|
def execute(self, payload: dict[str, Any], ctx: ToolContext):
|
|
query = payload.get("query")
|
|
depth = payload.get("depth", 1)
|
|
max_sources = payload.get("max_sources", 3)
|
|
|
|
if not isinstance(query, str):
|
|
raise ValueError("query must be string")
|
|
|
|
bus.log(
|
|
"RESEARCH",
|
|
"research_execute",
|
|
"INFO",
|
|
{
|
|
"query": query,
|
|
"depth": depth,
|
|
"max_sources": max_sources
|
|
}
|
|
)
|
|
|
|
# Step 1: search
|
|
search_results = registry.run(
|
|
"search",
|
|
{"action": "search", "query": query, "limit": 10},
|
|
ctx
|
|
)
|
|
|
|
results = search_results.get("results", [])
|
|
|
|
if not results:
|
|
return {
|
|
"query": query,
|
|
"error": "No search results found"
|
|
}
|
|
|
|
# Step 2: intelligent ranking
|
|
ranked = registry.run(
|
|
"intelligent_search",
|
|
{
|
|
"action": "rank",
|
|
"query": query,
|
|
"results": results
|
|
},
|
|
ctx
|
|
)
|
|
|
|
ranked_list = ranked.get("ranked", [])[:max_sources]
|
|
|
|
# Step 3: crawl top sources
|
|
pages = []
|
|
|
|
for item in ranked_list:
|
|
url = item.get("url")
|
|
|
|
if not url:
|
|
continue
|
|
|
|
page = registry.run(
|
|
"crawler",
|
|
{
|
|
"action": "fetch",
|
|
"url": url
|
|
},
|
|
ctx
|
|
)
|
|
|
|
pages.append({
|
|
"url": url,
|
|
"title": item.get("title"),
|
|
"text": page.get("text", ""),
|
|
"score": item.get("score", 0)
|
|
})
|
|
|
|
# Step 4: synthesize structure
|
|
return {
|
|
"query": query,
|
|
"sources_used": len(pages),
|
|
"sources": pages,
|
|
"summary_hint": self._build_hint(pages)
|
|
}
|
|
|
|
# =========================
|
|
# SIMPLE SYNTHESIS HELPER
|
|
# =========================
|
|
|
|
def _build_hint(self, pages: list[dict[str, Any]]) -> str:
|
|
"""
|
|
Lightweight heuristic summary hint.
|
|
|
|
This is NOT a full LLM summary — just structure guidance.
|
|
"""
|
|
|
|
if not pages:
|
|
return "No data available."
|
|
|
|
topics = []
|
|
|
|
for p in pages:
|
|
text = p.get("text", "")
|
|
|
|
# crude keyword extraction (lightweight, no deps)
|
|
words = text.split()
|
|
keywords = [w for w in words if len(w) > 6][:10]
|
|
|
|
topics.append({
|
|
"url": p.get("url"),
|
|
"keywords": keywords
|
|
})
|
|
|
|
return (
|
|
"Key extracted themes per source:\n"
|
|
+ "\n".join(
|
|
f"- {t['url']}: {', '.join(t['keywords'][:5])}"
|
|
for t in topics
|
|
)
|
|
)
|
|
|
|
|
|
# =========================
|
|
# REGISTER
|
|
# =========================
|
|
|
|
registry.register(ResearchTool()) |
