python-mcp/tools/intelligent_search.py

from __future__ import annotations

from typing import Any
from urllib.parse import urlparse

from core.tools.base import BaseTool, ToolContext
from core.tools.registry import registry
from core.events import bus


class IntelligentSearchTool(BaseTool):
    """
    Intelligent wrapper over basic search results.

    Enhances:
    - ranking
    - deduplication
    - best-result selection
    """

    name = "intelligent_search"
    description = "Rerank and filter search results for best relevance"

    # =========================
    # EXECUTE
    # =========================

    def execute(self, payload: dict[str, Any], ctx: ToolContext):
        action = str(payload.get("action", "rank")).strip()

        bus.log(
            "SEARCH",
            "intelligent_search_execute",
            "INFO",
            {"action": action}
        )

        match action:
            case "rank":
                return self.rank(payload)

            case "best":
                return self.best(payload)

            case _:
                raise ValueError(f"Unknown action: {action}")

    # =========================
    # RANK RESULTS
    # =========================

    def rank(self, payload: dict[str, Any]):
        results = payload.get("results")
        query = payload.get("query", "")

        if not isinstance(results, list):
            raise ValueError("results must be list")

        scored = []

        for r in results:
            if not isinstance(r, dict):
                continue

            title = r.get("title", "")
            url = r.get("url", "")

            score = self._score(query, title, url)

            scored.append({
                "title": title,
                "url": url,
                "score": score
            })

        scored.sort(key=lambda x: x["score"], reverse=True)

        return {
            "query": query,
            "ranked": scored
        }

    # =========================
    # BEST RESULT ONLY
    # =========================

    def best(self, payload: dict[str, Any]):
        results = payload.get("results")
        query = payload.get("query", "")

        if not isinstance(results, list):
            raise ValueError("results must be list")

        best_item = None
        best_score = -1

        seen_domains = set()

        for r in results:
            if not isinstance(r, dict):
                continue

            title = r.get("title", "")
            url = r.get("url", "")

            domain = self._domain(url)

            # simple dedupe
            if domain in seen_domains:
                continue

            seen_domains.add(domain)

            score = self._score(query, title, url)

            if score > best_score:
                best_score = score
                best_item = {
                    "title": title,
                    "url": url,
                    "score": score
                }

        return {
            "query": query,
            "best": best_item
        }

    # =========================
    # SCORING FUNCTION
    # =========================

    def _score(self, query: str, title: str, url: str) -> float:
        """
        Lightweight heuristic ranking system.

        Replace later with LLM scoring if desired.
        """

        q = query.lower()
        t = title.lower()
        u = url.lower()

        score = 0.0

        # keyword overlap
        for word in q.split():
            if word in t:
                score += 2.0
            if word in u:
                score += 1.0

        # title boost
        if q in t:
            score += 5.0

        # HTTPS boost
        if url.startswith("https"):
            score += 0.5

        # domain quality heuristic
        domain = self._domain(url)
        if domain.endswith(".edu") or domain.endswith(".org"):
            score += 1.5

        return score

    # =========================
    # DOMAIN HELPERS
    # =========================

    def _domain(self, url: str) -> str:
        try:
            return urlparse(url).netloc.lower()
        except Exception:
            return ""


# =========================
# REGISTER
# =========================

registry.register(IntelligentSearchTool())