""" Web Search Tool - Search the web using DuckDuckGo (no API key required). """ import json import logging from langchain.tools import tool from deerflow.config import get_app_config logger = logging.getLogger(__name__) DEFAULT_BACKEND = "auto" DEFAULT_REGION = "wt-wt" DEFAULT_SAFESEARCH = "moderate" DEFAULT_WIKIPEDIA_REGION = "us-en" WIKIPEDIA_BACKENDS = {"auto", "all", "wikipedia"} WIKIPEDIA_LANGUAGE_ALIASES = { "jp": "ja", "kr": "ko", "tzh": "zh", "wt": "en", } def _normalize_backend(backend: str | list[str] | tuple[str, ...] | None) -> str: if backend is None: return DEFAULT_BACKEND if isinstance(backend, (list, tuple)): return ",".join(str(part).strip() for part in backend if str(part).strip()) or DEFAULT_BACKEND return str(backend).strip() or DEFAULT_BACKEND def _normalize_setting(value: str | None, default: str) -> str: return str(value).strip() if value else default def _backend_includes_wikipedia(backend: str | list[str] | tuple[str, ...] | None) -> bool: backend = _normalize_backend(backend) return any(part.strip().lower() in WIKIPEDIA_BACKENDS for part in backend.split(",")) def _contains_codepoint(query: str, ranges: tuple[tuple[int, int], ...]) -> bool: return any(start <= ord(char) <= end for char in query for start, end in ranges) def _infer_wikipedia_region(query: str) -> str: """Pick a valid Wikipedia language region when DDGS' worldwide region is used.""" if _contains_codepoint(query, ((0x3040, 0x30FF), (0x31F0, 0x31FF))): return "jp-ja" if _contains_codepoint(query, ((0xAC00, 0xD7AF), (0x1100, 0x11FF), (0x3130, 0x318F))): return "kr-ko" if _contains_codepoint(query, ((0x3400, 0x9FFF),)): return "cn-zh" if _contains_codepoint(query, ((0x0400, 0x04FF),)): return "ru-ru" if _contains_codepoint(query, ((0x0370, 0x03FF),)): return "gr-el" if _contains_codepoint(query, ((0x0590, 0x05FF),)): return "il-he" if _contains_codepoint(query, ((0x0600, 0x06FF),)): return "xa-ar" return DEFAULT_WIKIPEDIA_REGION def _resolve_ddgs_region(query: str, region: str | None, backend: str | list[str] | tuple[str, ...] | None) -> str: """ DDGS' wikipedia engine treats the second part of region as a Wikipedia subdomain. Its default worldwide region, wt-wt, becomes wt.wikipedia.org. """ normalized_region = _normalize_setting(region, DEFAULT_REGION).lower() if not _backend_includes_wikipedia(backend): return normalized_region if normalized_region == DEFAULT_REGION: return _infer_wikipedia_region(query) if "-" not in normalized_region: return DEFAULT_WIKIPEDIA_REGION country, language = normalized_region.split("-", 1) return f"{country}-{WIKIPEDIA_LANGUAGE_ALIASES.get(language, language)}" def _search_text( query: str, max_results: int = 5, region: str | None = DEFAULT_REGION, safesearch: str | None = DEFAULT_SAFESEARCH, backend: str | list[str] | tuple[str, ...] | None = DEFAULT_BACKEND, ) -> list[dict]: """ Execute text search using DuckDuckGo. Args: query: Search keywords max_results: Maximum number of results region: Search region safesearch: Safe search level backend: DDGS backend(s), e.g. "auto", "duckduckgo", or "duckduckgo,brave" Returns: List of search results """ try: from ddgs import DDGS except ImportError: logger.error("ddgs library not installed. Run: pip install ddgs") return [] ddgs = DDGS(timeout=30) try: backend = _normalize_backend(backend) safesearch = _normalize_setting(safesearch, DEFAULT_SAFESEARCH) effective_region = _resolve_ddgs_region(query, region, backend) results = ddgs.text( query, region=effective_region, safesearch=safesearch, max_results=max_results, backend=backend, ) return list(results) if results else [] except Exception as e: logger.error(f"Failed to search web: {e}") return [] @tool("web_search", parse_docstring=True) def web_search_tool( query: str, max_results: int = 5, ) -> str: """Search the web for information. Use this tool to find current information, news, articles, and facts from the internet. Args: query: Search keywords describing what you want to find. Be specific for better results. max_results: Maximum number of results to return. Default is 5. """ config = get_app_config().get_tool_config("web_search") region = DEFAULT_REGION safesearch = DEFAULT_SAFESEARCH backend = DEFAULT_BACKEND if config is not None: # Override tool call defaults from config if set. max_results = config.model_extra.get("max_results", max_results) region = config.model_extra.get("region", region) safesearch = config.model_extra.get("safesearch", safesearch) backend = config.model_extra.get("backend", backend) results = _search_text( query=query, max_results=max_results, region=region, safesearch=safesearch, backend=backend, ) if not results: return json.dumps({"error": "No results found", "query": query}, ensure_ascii=False) normalized_results = [ { "title": r.get("title", ""), "url": r.get("href", r.get("link", "")), "content": r.get("body", r.get("snippet", "")), } for r in results ] output = { "query": query, "total_results": len(normalized_results), "results": normalized_results, } return json.dumps(output, indent=2, ensure_ascii=False)