Added many tools

2026-06-03 06:01:06 -05:00
parent 3723d2381d
commit e471f9bc54
28 changed files with 3488 additions and 205 deletions
--- a/tools/search.py
+++ b/tools/search.py
@@ -0,0 +1,117 @@
+from __future__ import annotations
+
+from typing import Any
+import urllib.request
+import urllib.parse
+import re
+
+from core.tools.base import BaseTool, ToolContext
+from core.tools.registry import registry
+from core.events import bus
+
+
+class SearchTool(BaseTool):
+    """
+    Lightweight web search tool using DuckDuckGo HTML endpoint.
+
+    Designed for:
+    - query → results
+    - agent retrieval step before crawling
+    """
+
+    name = "search"
+    description = "Web search (DuckDuckGo HTML scraping)"
+
+    # =========================
+    # EXECUTE
+    # =========================
+
+    def execute(self, payload: dict[str, Any], ctx: ToolContext):
+        action = str(payload.get("action", "search")).strip()
+
+        bus.log(
+            "SEARCH",
+            "search_execute",
+            "INFO",
+            {"action": action}
+        )
+
+        match action:
+            case "search":
+                return self.search(payload)
+
+            case _:
+                raise ValueError(f"Unknown search action: {action}")
+
+    # =========================
+    # SEARCH
+    # =========================
+
+    def search(self, payload: dict[str, Any]):
+        query = payload.get("query")
+        limit = payload.get("limit", 5)
+
+        if not isinstance(query, str):
+            raise ValueError("query must be string")
+        if not isinstance(limit, int):
+            limit = 5
+
+        encoded = urllib.parse.quote(query)
+
+        url = f"https://duckduckgo.com/html/?q={encoded}"
+
+        req = urllib.request.Request(
+            url,
+            headers={
+                "User-Agent": "MCP-Search/1.0"
+            }
+        )
+
+        try:
+            with urllib.request.urlopen(req, timeout=6) as resp:
+                html = resp.read().decode("utf-8", errors="ignore")
+
+            results = self._parse_results(html)
+
+            return {
+                "query": query,
+                "results": results[:limit],
+                "count": len(results)
+            }
+
+        except Exception as e:
+            return {
+                "query": query,
+                "error": str(e)
+            }
+
+    # =========================
+    # PARSER
+    # =========================
+
+    def _parse_results(self, html: str) -> list[dict[str, Any]]:
+        """
+        DuckDuckGo HTML parsing (lightweight heuristic).
+        """
+
+        results = []
+
+        # Extract result blocks
+        links = re.findall(r'<a rel="nofollow" class="result__a" href="(.*?)".*?>(.*?)</a>', html)
+
+        for url, title in links:
+            clean_title = re.sub("<.*?>", "", title)
+
+            results.append({
+                "title": clean_title,
+                "url": url,
+            })
+
+        return results
+
+
+# =========================
+# REGISTER
+# =========================
+
+registry.register(SearchTool())