Added many tools
This commit is contained in:
149
tools/research.py
Normal file
149
tools/research.py
Normal file
@@ -0,0 +1,149 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from core.tools.base import BaseTool, ToolContext
|
||||
from core.tools.registry import registry
|
||||
from core.events import bus
|
||||
|
||||
|
||||
class ResearchTool(BaseTool):
|
||||
"""
|
||||
High-level research orchestrator.
|
||||
|
||||
Combines:
|
||||
- search
|
||||
- intelligent ranking
|
||||
- crawling
|
||||
into a structured report.
|
||||
"""
|
||||
|
||||
name = "research"
|
||||
description = "Autonomous web research pipeline"
|
||||
|
||||
# =========================
|
||||
# EXECUTE
|
||||
# =========================
|
||||
|
||||
def execute(self, payload: dict[str, Any], ctx: ToolContext):
|
||||
query = payload.get("query")
|
||||
depth = payload.get("depth", 1)
|
||||
max_sources = payload.get("max_sources", 3)
|
||||
|
||||
if not isinstance(query, str):
|
||||
raise ValueError("query must be string")
|
||||
|
||||
bus.log(
|
||||
"RESEARCH",
|
||||
"research_execute",
|
||||
"INFO",
|
||||
{
|
||||
"query": query,
|
||||
"depth": depth,
|
||||
"max_sources": max_sources
|
||||
}
|
||||
)
|
||||
|
||||
# Step 1: search
|
||||
search_results = registry.run(
|
||||
"search",
|
||||
{"action": "search", "query": query, "limit": 10},
|
||||
ctx
|
||||
)
|
||||
|
||||
results = search_results.get("results", [])
|
||||
|
||||
if not results:
|
||||
return {
|
||||
"query": query,
|
||||
"error": "No search results found"
|
||||
}
|
||||
|
||||
# Step 2: intelligent ranking
|
||||
ranked = registry.run(
|
||||
"intelligent_search",
|
||||
{
|
||||
"action": "rank",
|
||||
"query": query,
|
||||
"results": results
|
||||
},
|
||||
ctx
|
||||
)
|
||||
|
||||
ranked_list = ranked.get("ranked", [])[:max_sources]
|
||||
|
||||
# Step 3: crawl top sources
|
||||
pages = []
|
||||
|
||||
for item in ranked_list:
|
||||
url = item.get("url")
|
||||
|
||||
if not url:
|
||||
continue
|
||||
|
||||
page = registry.run(
|
||||
"crawler",
|
||||
{
|
||||
"action": "fetch",
|
||||
"url": url
|
||||
},
|
||||
ctx
|
||||
)
|
||||
|
||||
pages.append({
|
||||
"url": url,
|
||||
"title": item.get("title"),
|
||||
"text": page.get("text", ""),
|
||||
"score": item.get("score", 0)
|
||||
})
|
||||
|
||||
# Step 4: synthesize structure
|
||||
return {
|
||||
"query": query,
|
||||
"sources_used": len(pages),
|
||||
"sources": pages,
|
||||
"summary_hint": self._build_hint(pages)
|
||||
}
|
||||
|
||||
# =========================
|
||||
# SIMPLE SYNTHESIS HELPER
|
||||
# =========================
|
||||
|
||||
def _build_hint(self, pages: list[dict[str, Any]]) -> str:
|
||||
"""
|
||||
Lightweight heuristic summary hint.
|
||||
|
||||
This is NOT a full LLM summary — just structure guidance.
|
||||
"""
|
||||
|
||||
if not pages:
|
||||
return "No data available."
|
||||
|
||||
topics = []
|
||||
|
||||
for p in pages:
|
||||
text = p.get("text", "")
|
||||
|
||||
# crude keyword extraction (lightweight, no deps)
|
||||
words = text.split()
|
||||
keywords = [w for w in words if len(w) > 6][:10]
|
||||
|
||||
topics.append({
|
||||
"url": p.get("url"),
|
||||
"keywords": keywords
|
||||
})
|
||||
|
||||
return (
|
||||
"Key extracted themes per source:\n"
|
||||
+ "\n".join(
|
||||
f"- {t['url']}: {', '.join(t['keywords'][:5])}"
|
||||
for t in topics
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
# =========================
|
||||
# REGISTER
|
||||
# =========================
|
||||
|
||||
registry.register(ResearchTool())
|
||||
Reference in New Issue
Block a user
