Added many tools
This commit is contained in:
117
tools/search.py
Normal file
117
tools/search.py
Normal file
@@ -0,0 +1,117 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import re
|
||||
|
||||
from core.tools.base import BaseTool, ToolContext
|
||||
from core.tools.registry import registry
|
||||
from core.events import bus
|
||||
|
||||
|
||||
class SearchTool(BaseTool):
|
||||
"""
|
||||
Lightweight web search tool using DuckDuckGo HTML endpoint.
|
||||
|
||||
Designed for:
|
||||
- query → results
|
||||
- agent retrieval step before crawling
|
||||
"""
|
||||
|
||||
name = "search"
|
||||
description = "Web search (DuckDuckGo HTML scraping)"
|
||||
|
||||
# =========================
|
||||
# EXECUTE
|
||||
# =========================
|
||||
|
||||
def execute(self, payload: dict[str, Any], ctx: ToolContext):
|
||||
action = str(payload.get("action", "search")).strip()
|
||||
|
||||
bus.log(
|
||||
"SEARCH",
|
||||
"search_execute",
|
||||
"INFO",
|
||||
{"action": action}
|
||||
)
|
||||
|
||||
match action:
|
||||
case "search":
|
||||
return self.search(payload)
|
||||
|
||||
case _:
|
||||
raise ValueError(f"Unknown search action: {action}")
|
||||
|
||||
# =========================
|
||||
# SEARCH
|
||||
# =========================
|
||||
|
||||
def search(self, payload: dict[str, Any]):
|
||||
query = payload.get("query")
|
||||
limit = payload.get("limit", 5)
|
||||
|
||||
if not isinstance(query, str):
|
||||
raise ValueError("query must be string")
|
||||
if not isinstance(limit, int):
|
||||
limit = 5
|
||||
|
||||
encoded = urllib.parse.quote(query)
|
||||
|
||||
url = f"https://duckduckgo.com/html/?q={encoded}"
|
||||
|
||||
req = urllib.request.Request(
|
||||
url,
|
||||
headers={
|
||||
"User-Agent": "MCP-Search/1.0"
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=6) as resp:
|
||||
html = resp.read().decode("utf-8", errors="ignore")
|
||||
|
||||
results = self._parse_results(html)
|
||||
|
||||
return {
|
||||
"query": query,
|
||||
"results": results[:limit],
|
||||
"count": len(results)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"query": query,
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
# =========================
|
||||
# PARSER
|
||||
# =========================
|
||||
|
||||
def _parse_results(self, html: str) -> list[dict[str, Any]]:
|
||||
"""
|
||||
DuckDuckGo HTML parsing (lightweight heuristic).
|
||||
"""
|
||||
|
||||
results = []
|
||||
|
||||
# Extract result blocks
|
||||
links = re.findall(r'<a rel="nofollow" class="result__a" href="(.*?)".*?>(.*?)</a>', html)
|
||||
|
||||
for url, title in links:
|
||||
clean_title = re.sub("<.*?>", "", title)
|
||||
|
||||
results.append({
|
||||
"title": clean_title,
|
||||
"url": url,
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# =========================
|
||||
# REGISTER
|
||||
# =========================
|
||||
|
||||
registry.register(SearchTool())
|
||||
Reference in New Issue
Block a user
