Files
python-mcp/tools/research.py
AuroraCrimsonRose e471f9bc54 Added many tools
2026-06-03 06:01:06 -05:00

149 lines
3.5 KiB
Python

from __future__ import annotations
from typing import Any
from core.tools.base import BaseTool, ToolContext
from core.tools.registry import registry
from core.events import bus
class ResearchTool(BaseTool):
"""
High-level research orchestrator.
Combines:
- search
- intelligent ranking
- crawling
into a structured report.
"""
name = "research"
description = "Autonomous web research pipeline"
# =========================
# EXECUTE
# =========================
def execute(self, payload: dict[str, Any], ctx: ToolContext):
query = payload.get("query")
depth = payload.get("depth", 1)
max_sources = payload.get("max_sources", 3)
if not isinstance(query, str):
raise ValueError("query must be string")
bus.log(
"RESEARCH",
"research_execute",
"INFO",
{
"query": query,
"depth": depth,
"max_sources": max_sources
}
)
# Step 1: search
search_results = registry.run(
"search",
{"action": "search", "query": query, "limit": 10},
ctx
)
results = search_results.get("results", [])
if not results:
return {
"query": query,
"error": "No search results found"
}
# Step 2: intelligent ranking
ranked = registry.run(
"intelligent_search",
{
"action": "rank",
"query": query,
"results": results
},
ctx
)
ranked_list = ranked.get("ranked", [])[:max_sources]
# Step 3: crawl top sources
pages = []
for item in ranked_list:
url = item.get("url")
if not url:
continue
page = registry.run(
"crawler",
{
"action": "fetch",
"url": url
},
ctx
)
pages.append({
"url": url,
"title": item.get("title"),
"text": page.get("text", ""),
"score": item.get("score", 0)
})
# Step 4: synthesize structure
return {
"query": query,
"sources_used": len(pages),
"sources": pages,
"summary_hint": self._build_hint(pages)
}
# =========================
# SIMPLE SYNTHESIS HELPER
# =========================
def _build_hint(self, pages: list[dict[str, Any]]) -> str:
"""
Lightweight heuristic summary hint.
This is NOT a full LLM summary — just structure guidance.
"""
if not pages:
return "No data available."
topics = []
for p in pages:
text = p.get("text", "")
# crude keyword extraction (lightweight, no deps)
words = text.split()
keywords = [w for w in words if len(w) > 6][:10]
topics.append({
"url": p.get("url"),
"keywords": keywords
})
return (
"Key extracted themes per source:\n"
+ "\n".join(
f"- {t['url']}: {', '.join(t['keywords'][:5])}"
for t in topics
)
)
# =========================
# REGISTER
# =========================
registry.register(ResearchTool())