From 0bbbbc06f49410878b4d52400ce4756b2917e4d9 Mon Sep 17 00:00:00 2001 From: Ryker_Feng <90562015+18062706139fcz@users.noreply.github.com> Date: Thu, 18 Jun 2026 07:36:35 +0800 Subject: [PATCH] feat(community): add Serper Google Images provider for image_search (#3575) * feat(community): add Serper Google Images provider for image_search Add a Serper-backed `image_search` tool alongside the existing Serper `web_search` provider, so users with a SERPER_API_KEY can pull Google Images results as reference images for downstream image generation. - Share request/response handling between web_search and image_search via `_serper_post` / `_response_items`, with bounded `max_results` (capped at 10) and query normalization. - Add a best-effort SSRF guard (`_safe_public_url`) that rejects non-http(s), localhost and private/non-global IP image URLs; filtered entries are dropped and never consume the result limit. - doctor: flag literal `api_key` values in config as a warning and steer users toward `.env` + `$SERPER_API_KEY`. - Docs/config: document the Serper image_search provider and SERPER_API_KEY, and discourage committing literal keys to config.yaml. - Tests: cover the provider end-to-end (100% line coverage on tools.py) and the doctor literal-key warning path. * fix(community): block obfuscated IPv4 literals in Serper image SSRF guard The image_search SSRF guard only rejected dotted-decimal IP literals; encoded forms such as decimal (http://2130706433/), hex (0x7f000001) and octal (0177.0.0.1) raised ValueError in ip_address() and were allowed through, even though many HTTP clients resolve them to private addresses like 127.0.0.1. Add _decode_ipv4() to permissively decode these inet_aton-style encodings and apply the same is_global check; hostnames that do not decode to an IP (e.g. cafe.com) are still treated as hosts and left to fetch-time re-validation. Addresses PR review feedback. Tests cover decimal/hex/octal loopback and private encodings plus non-IP edge cases; tools.py stays at 100% line coverage. * test(community): cover IPv4-mapped IPv6 URL filtering * fix(community): address Serper image search review feedback - Block trailing-dot hostname SSRF bypass (localhost./127.0.0.1.) in _safe_public_url by stripping the FQDN root label before checks. - Keep a filtered image/thumbnail URL empty instead of collapsing onto its counterpart, preserving the high-res/preview contract. - Evaluate the SSRF guard once per field rather than twice. - Treat a null-typed organic/images field as "no results" rather than a malformed payload. - doctor.py: when a config $VAR is unset, fall through to the default env var before reporting it as not set. --- backend/docs/CONFIGURATION.md | 2 + .../deerflow/community/serper/__init__.py | 4 +- .../deerflow/community/serper/tools.py | 296 +++++- backend/tests/test_doctor.py | 131 ++- backend/tests/test_serper_tools.py | 917 +++++++++++++++++- config.example.yaml | 18 +- frontend/src/content/en/harness/tools.mdx | 24 +- frontend/src/content/zh/harness/tools.mdx | 21 + scripts/doctor.py | 83 +- 9 files changed, 1409 insertions(+), 87 deletions(-) diff --git a/backend/docs/CONFIGURATION.md b/backend/docs/CONFIGURATION.md index cd32b3f7c..3417e70bd 100644 --- a/backend/docs/CONFIGURATION.md +++ b/backend/docs/CONFIGURATION.md @@ -236,6 +236,7 @@ tools: **Built-in Tools**: - `web_search` - Search the web (DuckDuckGo, Tavily, Brave, Exa, InfoQuest, Firecrawl) - `web_fetch` - Fetch web pages (Jina AI, Exa, InfoQuest, Firecrawl) +- `image_search` - Search for reference images (DuckDuckGo, InfoQuest, Serper) - `ls` - List directory contents - `read_file` - Read file contents - `write_file` - Write file contents @@ -414,6 +415,7 @@ models: - `NOVITA_API_KEY` - Novita API key (OpenAI-compatible endpoint) - `TAVILY_API_KEY` - Tavily search API key - `BRAVE_SEARCH_API_KEY` - Brave Search API key +- `SERPER_API_KEY` - Serper (Google Search/Images API) key for `web_search` and `image_search` - `DEER_FLOW_PROJECT_ROOT` - Project root for relative runtime paths - `DEER_FLOW_CONFIG_PATH` - Custom config file path - `DEER_FLOW_EXTENSIONS_CONFIG_PATH` - Custom extensions config file path diff --git a/backend/packages/harness/deerflow/community/serper/__init__.py b/backend/packages/harness/deerflow/community/serper/__init__.py index 876167859..9ec66554d 100644 --- a/backend/packages/harness/deerflow/community/serper/__init__.py +++ b/backend/packages/harness/deerflow/community/serper/__init__.py @@ -1,3 +1,3 @@ -from .tools import web_search_tool +from .tools import image_search_tool, web_search_tool -__all__ = ["web_search_tool"] +__all__ = ["image_search_tool", "web_search_tool"] diff --git a/backend/packages/harness/deerflow/community/serper/tools.py b/backend/packages/harness/deerflow/community/serper/tools.py index 1cad11fb8..005a3090e 100644 --- a/backend/packages/harness/deerflow/community/serper/tools.py +++ b/backend/packages/harness/deerflow/community/serper/tools.py @@ -1,13 +1,15 @@ """ -Web Search Tool - Search the web using Serper (Google Search API). +Web and image search tools powered by Serper (Google Search API). -Serper provides real-time Google Search results via a JSON API. -An API key is required. Sign up at https://serper.dev to get one. +Serper provides real-time Google Search and Google Images results via a JSON +API. An API key is required. Sign up at https://serper.dev to get one. """ import json import logging import os +from ipaddress import IPv4Address, ip_address +from urllib.parse import urlparse import httpx from langchain.tools import tool @@ -16,43 +18,168 @@ from deerflow.config import get_app_config logger = logging.getLogger(__name__) -_SERPER_ENDPOINT = "https://google.serper.dev/search" -_api_key_warned = False +_SERPER_SEARCH_ENDPOINT = "https://google.serper.dev/search" +_SERPER_IMAGES_ENDPOINT = "https://google.serper.dev/images" +_SERPER_MAX_RESULTS = 10 +_api_key_warned: set[str] = set() -def _get_api_key() -> str | None: - config = get_app_config().get_tool_config("web_search") +def _get_api_key(tool_name: str) -> str | None: + config = get_app_config().get_tool_config(tool_name) if config is not None: api_key = config.model_extra.get("api_key") if isinstance(api_key, str) and api_key.strip(): - return api_key - return os.getenv("SERPER_API_KEY") + return api_key.strip() + env_key = os.getenv("SERPER_API_KEY") + if isinstance(env_key, str) and env_key.strip(): + return env_key.strip() + return None -@tool("web_search", parse_docstring=True) -def web_search_tool(query: str, max_results: int = 5) -> str: - """Search the web for information using Google Search via Serper. +def _coerce_max_results(value: object, default: int = 5, max_allowed: int = _SERPER_MAX_RESULTS) -> int: + """Coerce config/parameter input into a bounded positive result count.""" + try: + count = int(value) + except (TypeError, ValueError): + return default + if count <= 0: + return default + return min(count, max_allowed) - Args: - query: Search keywords describing what you want to find. Be specific for better results. - max_results: Maximum number of search results to return. Default is 5. + +def _missing_key_error(query: str, tool_name: str) -> str: + if tool_name not in _api_key_warned: + _api_key_warned.add(tool_name) + logger.warning("Serper API key is not set for '%s'. Set SERPER_API_KEY in your environment or provide api_key in config.yaml. Sign up at https://serper.dev", tool_name) + return json.dumps( + {"error": "SERPER_API_KEY is not configured", "query": query}, + ensure_ascii=False, + ) + + +def _unexpected_format_error(query: str) -> str: + return json.dumps( + {"error": "Serper returned an unexpected response format", "query": query}, + ensure_ascii=False, + ) + + +def _response_items(data: dict, field: str, query: str) -> tuple[list[dict] | None, str | None]: + items = data.get(field) + # Treat a missing or null field as "no results" (some APIs return + # ``{"organic": null}`` to signal that) rather than a malformed payload. + if items is None: + return [], None + if not isinstance(items, list): + logger.error("Serper returned unexpected '%s' payload type: %s", field, type(items).__name__) + return None, _unexpected_format_error(query) + return [item for item in items if isinstance(item, dict)], None + + +def _clean_query(query: str) -> str: + """Normalize a raw query into the value actually sent to Serper.""" + query = query.strip() + if len(query) > 500: + query = query[:500] + return query + + +def _decode_ipv4(host: str) -> IPv4Address | None: + """Decode obfuscated IPv4 literals that ``ip_address`` rejects. + + Mirrors the permissive ``inet_aton`` parsing many HTTP clients use, so that + integer (``2130706433``), hex (``0x7f000001``) and octal (``0177.0.0.1``) + encodings of an address are recognized. Returns an ``IPv4Address`` when the + host decodes to one, otherwise ``None`` (e.g. real domains like + ``cafe.com`` fail to decode and are left for the caller to treat as a host). """ - global _api_key_warned + parts = host.split(".") + if not 1 <= len(parts) <= 4: + return None - config = get_app_config().get_tool_config("web_search") - if config is not None and "max_results" in config.model_extra: - max_results = config.model_extra.get("max_results", max_results) + values: list[int] = [] + for part in parts: + if not part: + return None + try: + if part.startswith(("0x", "0X")): + values.append(int(part, 16)) + elif part.startswith("0") and len(part) > 1: + values.append(int(part, 8)) + else: + values.append(int(part, 10)) + except ValueError: + return None - api_key = _get_api_key() - if not api_key: - if not _api_key_warned: - _api_key_warned = True - logger.warning("Serper API key is not set. Set SERPER_API_KEY in your environment or provide api_key in config.yaml. Sign up at https://serper.dev") - return json.dumps( - {"error": "SERPER_API_KEY is not configured", "query": query}, - ensure_ascii=False, - ) + *leading, last = values + for value in leading: + if not 0 <= value <= 0xFF: + return None + max_last = (1 << (8 * (4 - len(leading)))) - 1 + if not 0 <= last <= max_last: + return None + result = 0 + for value in leading: + result = (result << 8) | value + result = (result << (8 * (4 - len(leading)))) | last + return ip_address(result) + + +def _is_url_present(value: object) -> bool: + """Return ``True`` when *value* is a non-empty URL string. + + Used to distinguish a field that was *absent* (eligible for cross-field + fallback) from one that was *present but filtered* by the SSRF guard (which + must stay empty rather than collapse onto its counterpart). + """ + return isinstance(value, str) and bool(value.strip()) + + +def _safe_public_url(value: object) -> str: + """Return ``value`` only if it is a safe, public http(s) URL, else "". + + This is a best-effort SSRF guard that rejects non-http(s) schemes, + ``localhost``, and private/non-global IP literals (including obfuscated + decimal/hex/octal encodings). It only inspects the URL string and cannot + catch public hostnames that resolve to internal IPs (e.g. DNS rebinding); + any consumer that actually downloads these URLs must re-validate the + resolved IP at fetch time. + """ + if not isinstance(value, str): + return "" + url = value.strip() + parsed = urlparse(url) + if parsed.scheme not in {"http", "https"} or not parsed.netloc or not parsed.hostname: + return "" + + # Strip a single trailing dot (FQDN root label). ``localhost.`` and + # ``127.0.0.1.`` resolve to loopback on common resolvers but would + # otherwise slip past the localhost/IP checks below. + host = parsed.hostname.lower().rstrip(".") + if not host: + return "" + if host == "localhost" or host.endswith(".localhost"): + return "" + + try: + ip = ip_address(host) + except ValueError: + ip = _decode_ipv4(host) + if ip is None: + return url + return url if ip.is_global else "" + + +def _serper_post(endpoint: str, api_key: str, query: str, max_results: int) -> tuple[dict | None, str | None]: + """Send a POST request to a Serper endpoint. + + ``query`` is expected to already be normalized via :func:`_clean_query`. + + Returns a ``(data, error_json)`` tuple: on success ``data`` is the parsed + JSON response and ``error_json`` is ``None``; on failure ``data`` is ``None`` + and ``error_json`` is a serialized structured error ready to return. + """ headers = { "X-API-KEY": api_key, "Content-Type": "application/json", @@ -61,23 +188,56 @@ def web_search_tool(query: str, max_results: int = 5) -> str: try: with httpx.Client(timeout=30) as client: - response = client.post(_SERPER_ENDPOINT, headers=headers, json=payload) + response = client.post(endpoint, headers=headers, json=payload) response.raise_for_status() data = response.json() + if not isinstance(data, dict): + logger.error("Serper returned an unexpected payload type: %s", type(data).__name__) + return None, _unexpected_format_error(query) + return data, None except httpx.HTTPStatusError as e: - logger.error(f"Serper API returned HTTP {e.response.status_code}: {e.response.text}") - return json.dumps( + resp_text = (e.response.text or "")[:500] + logger.error("Serper API returned HTTP %s: %s", e.response.status_code, resp_text) + return None, json.dumps( {"error": f"Serper API error: HTTP {e.response.status_code}", "query": query}, ensure_ascii=False, ) except Exception as e: - logger.error(f"Serper search failed: {type(e).__name__}: {e}") - return json.dumps({"error": str(e), "query": query}, ensure_ascii=False) + logger.error("Serper request failed: %s: %s", type(e).__name__, str(e)[:500]) + return None, json.dumps({"error": str(e)[:500], "query": query}, ensure_ascii=False) - organic = data.get("organic", []) + +@tool("web_search", parse_docstring=True) +def web_search_tool(query: str, max_results: int = 5) -> str: + """Search the web for information using Google Search via Serper. + + Args: + query: Search keywords describing what you want to find. Be specific for better results. + max_results: Maximum number of search results to return. Default is 5, capped at 10. + """ + config = get_app_config().get_tool_config("web_search") + if config is not None and "max_results" in config.model_extra: + max_results = config.model_extra.get("max_results", max_results) + max_results = _coerce_max_results(max_results) + query = _clean_query(query) + + api_key = _get_api_key("web_search") + if not api_key: + return _missing_key_error(query, "web_search") + + data, error_json = _serper_post(_SERPER_SEARCH_ENDPOINT, api_key, query, max_results) + if error_json is not None: + return error_json + + organic, error_json = _response_items(data, "organic", query) + if error_json is not None: + return error_json if not organic: return json.dumps({"error": "No results found", "query": query}, ensure_ascii=False) + # Search result links are returned verbatim (not passed through + # _safe_public_url): they are surfaced as citations for the model to read, + # not fetched/downloaded by this tool, unlike image_search image URLs. normalized_results = [ { "title": r.get("title", ""), @@ -93,3 +253,71 @@ def web_search_tool(query: str, max_results: int = 5) -> str: "results": normalized_results, } return json.dumps(output, indent=2, ensure_ascii=False) + + +@tool("image_search", parse_docstring=True) +def image_search_tool(query: str, max_results: int = 5) -> str: + """Search for images online using Google Images via Serper. Use this tool BEFORE image generation to find reference images for characters, portraits, objects, scenes, or any content requiring visual accuracy. + + The returned image URLs can be used as reference images in image generation to significantly improve quality. + + Args: + query: Search keywords describing the images you want to find. Be specific for better results (e.g., "Japanese woman street photography 1990s" instead of just "woman"). + max_results: Maximum number of images to return. Default is 5, capped at 10. + """ + config = get_app_config().get_tool_config("image_search") + if config is not None and "max_results" in config.model_extra: + max_results = config.model_extra.get("max_results", max_results) + max_results = _coerce_max_results(max_results) + query = _clean_query(query) + + api_key = _get_api_key("image_search") + if not api_key: + return _missing_key_error(query, "image_search") + + data, error_json = _serper_post(_SERPER_IMAGES_ENDPOINT, api_key, query, max_results) + if error_json is not None: + return error_json + + images, error_json = _response_items(data, "images", query) + if error_json is not None: + return error_json + if not images: + return json.dumps({"error": "No images found", "query": query}, ensure_ascii=False) + + normalized_results = [] + for r in images: + raw_image = r.get("imageUrl") + raw_thumb = r.get("thumbnailUrl") + # Evaluate the (non-trivial) SSRF guard once per field instead of twice. + safe_image = _safe_public_url(raw_image) + safe_thumb = _safe_public_url(raw_thumb) + # Cross-fall back only when the other field was *absent*. A field that + # was present but failed the SSRF filter is left empty rather than + # collapsed onto its counterpart, so a dropped high-res URL never + # silently masquerades as the preview (and vice versa), preserving the + # high-res/preview contract callers rely on. + image_url = safe_image or (safe_thumb if not _is_url_present(raw_image) else "") + thumbnail_url = safe_thumb or (safe_image if not _is_url_present(raw_thumb) else "") + if not image_url and not thumbnail_url: + continue + normalized_results.append( + { + "title": r.get("title", ""), + "image_url": image_url, + "thumbnail_url": thumbnail_url, + } + ) + if len(normalized_results) >= max_results: + break + + if not normalized_results: + return json.dumps({"error": "No safe image URLs found", "query": query}, ensure_ascii=False) + + output = { + "query": query, + "total_results": len(normalized_results), + "results": normalized_results, + "usage_hint": "Use the 'image_url' values as reference images in image generation. Download them first if needed.", + } + return json.dumps(output, indent=2, ensure_ascii=False) diff --git a/backend/tests/test_doctor.py b/backend/tests/test_doctor.py index eed56e227..be7b7e68a 100644 --- a/backend/tests/test_doctor.py +++ b/backend/tests/test_doctor.py @@ -214,13 +214,14 @@ class TestCheckWebSearch: assert result.fix is not None assert "BRAVE_SEARCH_API_KEY" in result.fix - def test_brave_with_inline_api_key_ok(self, tmp_path, monkeypatch): + def test_brave_with_inline_api_key_warns(self, tmp_path, monkeypatch): monkeypatch.delenv("BRAVE_SEARCH_API_KEY", raising=False) cfg = tmp_path / "config.yaml" cfg.write_text('config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.brave.tools:web_search_tool\n api_key: "inline-key"\n') result = doctor.check_web_search(cfg) - assert result.status == "ok" - assert "api_key configured" in result.detail + assert result.status == "warn" + assert "literal api_key set in config" in result.detail + assert "BRAVE_SEARCH_API_KEY" in (result.fix or "") def test_brave_with_api_key_env_ref_ok(self, tmp_path, monkeypatch): monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "bsa-test") @@ -228,7 +229,61 @@ class TestCheckWebSearch: cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.brave.tools:web_search_tool\n api_key: $BRAVE_SEARCH_API_KEY\n") result = doctor.check_web_search(cfg) assert result.status == "ok" - assert "api_key" in result.detail + assert "BRAVE_SEARCH_API_KEY set from config" in result.detail + + def test_serper_with_key_ok(self, tmp_path, monkeypatch): + monkeypatch.setenv("SERPER_API_KEY", "test-key") + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n") + result = doctor.check_web_search(cfg) + assert result.status == "ok" + assert "serper" in result.detail + + def test_serper_without_key_warns(self, tmp_path, monkeypatch): + monkeypatch.delenv("SERPER_API_KEY", raising=False) + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n") + result = doctor.check_web_search(cfg) + assert result.status == "warn" + assert "SERPER_API_KEY" in (result.fix or "") + + def test_serper_inline_api_key_warns(self, tmp_path, monkeypatch): + monkeypatch.delenv("SERPER_API_KEY", raising=False) + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: inline-key\n") + result = doctor.check_web_search(cfg) + assert result.status == "warn" + assert "literal api_key set in config" in result.detail + assert "SERPER_API_KEY" in (result.fix or "") + + def test_serper_config_env_ref_ok(self, tmp_path, monkeypatch): + monkeypatch.setenv("SERPER_API_KEY", "test-key") + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: $SERPER_API_KEY\n") + result = doctor.check_web_search(cfg) + assert result.status == "ok" + assert "SERPER_API_KEY set from config" in result.detail + + def test_serper_unresolved_env_ref_falls_back_to_default_var(self, tmp_path, monkeypatch): + # The referenced $VAR is unset, but the default SERPER_API_KEY is set, + # which the tool uses as a runtime fallback; report ok rather than warn. + monkeypatch.delenv("MY_CUSTOM_SERPER_KEY", raising=False) + monkeypatch.setenv("SERPER_API_KEY", "test-key") + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: $MY_CUSTOM_SERPER_KEY\n") + result = doctor.check_web_search(cfg) + assert result.status == "ok" + assert "SERPER_API_KEY set" in result.detail + + def test_serper_unresolved_env_ref_without_default_warns(self, tmp_path, monkeypatch): + # Neither the referenced $VAR nor the default SERPER_API_KEY is set. + monkeypatch.delenv("MY_CUSTOM_SERPER_KEY", raising=False) + monkeypatch.delenv("SERPER_API_KEY", raising=False) + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: $MY_CUSTOM_SERPER_KEY\n") + result = doctor.check_web_search(cfg) + assert result.status == "warn" + assert "SERPER_API_KEY" in (result.fix or "") def test_no_search_tool_warns(self, tmp_path): cfg = tmp_path / "config.yaml" @@ -284,6 +339,74 @@ class TestCheckWebFetch: assert result.status == "fail" +# --------------------------------------------------------------------------- +# check_image_search +# --------------------------------------------------------------------------- + + +class TestCheckImageSearch: + def test_ddg_always_ok(self, tmp_path): + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.image_search.tools:image_search_tool\n") + result = doctor.check_image_search(cfg) + assert result.status == "ok" + assert "DuckDuckGo" in result.detail + + def test_serper_with_key_ok(self, tmp_path, monkeypatch): + monkeypatch.setenv("SERPER_API_KEY", "test-key") + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n") + result = doctor.check_image_search(cfg) + assert result.status == "ok" + assert "serper" in result.detail + + def test_serper_without_key_warns(self, tmp_path, monkeypatch): + monkeypatch.delenv("SERPER_API_KEY", raising=False) + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n") + result = doctor.check_image_search(cfg) + assert result.status == "warn" + assert "SERPER_API_KEY" in (result.fix or "") + + def test_serper_inline_api_key_warns(self, tmp_path, monkeypatch): + monkeypatch.delenv("SERPER_API_KEY", raising=False) + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n api_key: inline-key\n") + result = doctor.check_image_search(cfg) + assert result.status == "warn" + assert "literal api_key set in config" in result.detail + assert "SERPER_API_KEY" in (result.fix or "") + + def test_serper_config_env_ref_without_env_warns(self, tmp_path, monkeypatch): + monkeypatch.delenv("SERPER_API_KEY", raising=False) + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n api_key: $SERPER_API_KEY\n") + result = doctor.check_image_search(cfg) + assert result.status == "warn" + assert "SERPER_API_KEY" in (result.fix or "") + + def test_infoquest_with_key_ok(self, tmp_path, monkeypatch): + monkeypatch.setenv("INFOQUEST_API_KEY", "test-key") + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.infoquest.tools:image_search_tool\n") + result = doctor.check_image_search(cfg) + assert result.status == "ok" + assert "infoquest" in result.detail + + def test_no_image_search_tool_warns(self, tmp_path): + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools: []\n") + result = doctor.check_image_search(cfg) + assert result.status == "warn" + assert result.fix is not None + + def test_invalid_provider_use_fails(self, tmp_path): + cfg = tmp_path / "config.yaml" + cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.not_real.tools:image_search_tool\n") + result = doctor.check_image_search(cfg) + assert result.status == "fail" + + # --------------------------------------------------------------------------- # check_env_file # --------------------------------------------------------------------------- diff --git a/backend/tests/test_serper_tools.py b/backend/tests/test_serper_tools.py index 2e53b0351..7df5311ec 100644 --- a/backend/tests/test_serper_tools.py +++ b/backend/tests/test_serper_tools.py @@ -12,9 +12,9 @@ def reset_api_key_warned(): """Reset the module-level warning flag before each test.""" import deerflow.community.serper.tools as serper_mod - serper_mod._api_key_warned = False + serper_mod._api_key_warned = set() yield - serper_mod._api_key_warned = False + serper_mod._api_key_warned = set() @pytest.fixture @@ -42,6 +42,13 @@ def _make_serper_response(organic: list) -> MagicMock: return mock_resp +def _make_serper_images_response(images: list) -> MagicMock: + mock_resp = MagicMock() + mock_resp.json.return_value = {"images": images} + mock_resp.raise_for_status = MagicMock() + return mock_resp + + class TestGetApiKey: def test_returns_config_key_when_present(self): with patch("deerflow.community.serper.tools.get_app_config") as mock: @@ -51,7 +58,7 @@ class TestGetApiKey: from deerflow.community.serper.tools import _get_api_key - assert _get_api_key() == "from-config" + assert _get_api_key("web_search") == "from-config" def test_falls_back_to_env_when_config_key_empty(self): with patch("deerflow.community.serper.tools.get_app_config") as mock: @@ -61,7 +68,7 @@ class TestGetApiKey: with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}): from deerflow.community.serper.tools import _get_api_key - assert _get_api_key() == "env-key" + assert _get_api_key("web_search") == "env-key" def test_falls_back_to_env_when_config_key_whitespace(self): with patch("deerflow.community.serper.tools.get_app_config") as mock: @@ -71,7 +78,7 @@ class TestGetApiKey: with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}): from deerflow.community.serper.tools import _get_api_key - assert _get_api_key() == "env-key" + assert _get_api_key("web_search") == "env-key" def test_falls_back_to_env_when_config_key_null(self): with patch("deerflow.community.serper.tools.get_app_config") as mock: @@ -81,7 +88,7 @@ class TestGetApiKey: with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}): from deerflow.community.serper.tools import _get_api_key - assert _get_api_key() == "env-key" + assert _get_api_key("web_search") == "env-key" def test_falls_back_to_env_when_no_config(self): with patch("deerflow.community.serper.tools.get_app_config") as mock: @@ -89,7 +96,7 @@ class TestGetApiKey: with patch.dict("os.environ", {"SERPER_API_KEY": "env-only"}): from deerflow.community.serper.tools import _get_api_key - assert _get_api_key() == "env-only" + assert _get_api_key("web_search") == "env-only" def test_returns_none_when_no_key_anywhere(self): with patch("deerflow.community.serper.tools.get_app_config") as mock: @@ -100,7 +107,236 @@ class TestGetApiKey: os.environ.pop("SERPER_API_KEY", None) from deerflow.community.serper.tools import _get_api_key - assert _get_api_key() is None + assert _get_api_key("web_search") is None + + def test_returns_none_when_env_key_whitespace(self): + with patch("deerflow.community.serper.tools.get_app_config") as mock: + mock.return_value.get_tool_config.return_value = None + with patch.dict("os.environ", {"SERPER_API_KEY": " "}): + from deerflow.community.serper.tools import _get_api_key + + assert _get_api_key("web_search") is None + + def test_reads_config_for_requested_tool_name(self): + with patch("deerflow.community.serper.tools.get_app_config") as mock: + tool_config = MagicMock() + tool_config.model_extra = {"api_key": "image-key"} + mock.return_value.get_tool_config.return_value = tool_config + + from deerflow.community.serper.tools import _get_api_key + + assert _get_api_key("image_search") == "image-key" + mock.return_value.get_tool_config.assert_called_with("image_search") + + +class TestCoerceMaxResults: + def test_returns_value_when_valid_positive_int(self): + from deerflow.community.serper.tools import _coerce_max_results + + assert _coerce_max_results(3) == 3 + + def test_returns_value_for_numeric_string(self): + from deerflow.community.serper.tools import _coerce_max_results + + assert _coerce_max_results("7") == 7 + + def test_caps_value_at_default_maximum(self): + from deerflow.community.serper.tools import _coerce_max_results + + assert _coerce_max_results(999) == 10 + + def test_respects_custom_maximum(self): + from deerflow.community.serper.tools import _coerce_max_results + + assert _coerce_max_results(999, max_allowed=3) == 3 + + def test_returns_default_for_non_numeric_string(self): + from deerflow.community.serper.tools import _coerce_max_results + + assert _coerce_max_results("oops") == 5 + + def test_returns_default_for_none(self): + from deerflow.community.serper.tools import _coerce_max_results + + assert _coerce_max_results(None) == 5 + + def test_returns_default_for_non_coercible_object(self): + from deerflow.community.serper.tools import _coerce_max_results + + assert _coerce_max_results(object()) == 5 + + def test_returns_default_for_zero(self): + from deerflow.community.serper.tools import _coerce_max_results + + assert _coerce_max_results(0) == 5 + + def test_returns_default_for_negative(self): + from deerflow.community.serper.tools import _coerce_max_results + + assert _coerce_max_results(-3) == 5 + + def test_respects_custom_default(self): + from deerflow.community.serper.tools import _coerce_max_results + + assert _coerce_max_results("bad", default=2) == 2 + + +class TestMissingKeyError: + def test_warns_once_per_tool_name(self, caplog): + import logging + + import deerflow.community.serper.tools as serper_mod + + with caplog.at_level(logging.WARNING): + serper_mod._missing_key_error("q1", "web_search") + serper_mod._missing_key_error("q2", "web_search") + + warnings = [r for r in caplog.records if r.levelno == logging.WARNING] + assert len(warnings) == 1 + assert "web_search" in warnings[0].getMessage() + + def test_warns_separately_for_each_tool(self, caplog): + import logging + + import deerflow.community.serper.tools as serper_mod + + with caplog.at_level(logging.WARNING): + serper_mod._missing_key_error("q1", "web_search") + serper_mod._missing_key_error("q2", "image_search") + + warned_tools = {r.getMessage() for r in caplog.records if r.levelno == logging.WARNING} + assert any("web_search" in m for m in warned_tools) + assert any("image_search" in m for m in warned_tools) + + def test_returns_structured_error_json(self): + import deerflow.community.serper.tools as serper_mod + + parsed = json.loads(serper_mod._missing_key_error("hello", "web_search")) + assert parsed["error"] == "SERPER_API_KEY is not configured" + assert parsed["query"] == "hello" + + +class TestSafePublicUrl: + def test_https_public_hostname_passes(self): + from deerflow.community.serper.tools import _safe_public_url + + assert _safe_public_url("https://example.com/i.jpg") == "https://example.com/i.jpg" + + def test_public_ip_literal_passes(self): + from deerflow.community.serper.tools import _safe_public_url + + assert _safe_public_url("https://8.8.8.8/i.jpg") == "https://8.8.8.8/i.jpg" + + def test_localhost_is_filtered(self): + from deerflow.community.serper.tools import _safe_public_url + + assert _safe_public_url("http://localhost/x.jpg") == "" + + def test_localhost_subdomain_is_filtered(self): + from deerflow.community.serper.tools import _safe_public_url + + assert _safe_public_url("http://foo.localhost/x.jpg") == "" + + def test_trailing_dot_localhost_is_filtered(self): + from deerflow.community.serper.tools import _safe_public_url + + # FQDN root label: localhost. still resolves to loopback. + assert _safe_public_url("http://localhost./x.jpg") == "" + + def test_trailing_dot_loopback_ip_is_filtered(self): + from deerflow.community.serper.tools import _safe_public_url + + assert _safe_public_url("http://127.0.0.1./x.jpg") == "" + + def test_trailing_dot_private_ip_is_filtered(self): + from deerflow.community.serper.tools import _safe_public_url + + assert _safe_public_url("http://10.0.0.1./x.jpg") == "" + + def test_trailing_dot_public_host_passes(self): + from deerflow.community.serper.tools import _safe_public_url + + # A trailing dot on a public host is harmless and must not be rejected. + assert _safe_public_url("https://example.com./i.jpg") == "https://example.com./i.jpg" + + def test_private_ip_is_filtered(self): + from deerflow.community.serper.tools import _safe_public_url + + assert _safe_public_url("http://10.0.0.1/x.jpg") == "" + + def test_ipv4_mapped_ipv6_loopback_is_filtered(self): + from deerflow.community.serper.tools import _safe_public_url + + assert _safe_public_url("http://[::ffff:127.0.0.1]/x.jpg") == "" + + def test_non_http_scheme_is_filtered(self): + from deerflow.community.serper.tools import _safe_public_url + + assert _safe_public_url("file:///etc/passwd") == "" + + def test_non_string_is_filtered(self): + from deerflow.community.serper.tools import _safe_public_url + + assert _safe_public_url(None) == "" + + def test_decimal_encoded_loopback_is_filtered(self): + from deerflow.community.serper.tools import _safe_public_url + + # 2130706433 == 127.0.0.1 + assert _safe_public_url("http://2130706433/x.jpg") == "" + + def test_hex_encoded_loopback_is_filtered(self): + from deerflow.community.serper.tools import _safe_public_url + + # 0x7f000001 == 127.0.0.1 + assert _safe_public_url("http://0x7f000001/x.jpg") == "" + + def test_octal_encoded_loopback_is_filtered(self): + from deerflow.community.serper.tools import _safe_public_url + + # 0177.0.0.1 == 127.0.0.1 + assert _safe_public_url("http://0177.0.0.1/x.jpg") == "" + + def test_decimal_encoded_private_ip_is_filtered(self): + from deerflow.community.serper.tools import _safe_public_url + + # 167772161 == 10.0.0.1 + assert _safe_public_url("http://167772161/x.jpg") == "" + + def test_decimal_encoded_public_ip_passes(self): + from deerflow.community.serper.tools import _safe_public_url + + # 134744072 == 8.8.8.8 + assert _safe_public_url("http://134744072/i.jpg") == "http://134744072/i.jpg" + + def test_domain_with_hex_chars_is_not_treated_as_ip(self): + from deerflow.community.serper.tools import _safe_public_url + + assert _safe_public_url("https://cafe.com/i.jpg") == "https://cafe.com/i.jpg" + + def test_out_of_range_octet_is_not_treated_as_ip(self): + from deerflow.community.serper.tools import _safe_public_url + + # 999.1.1.1 is not a valid IPv4 literal; treat as a hostname, not blocked. + assert _safe_public_url("https://999.1.1.1/i.jpg") == "https://999.1.1.1/i.jpg" + + def test_too_many_octets_is_not_treated_as_ip(self): + from deerflow.community.serper.tools import _safe_public_url + + # More than 4 dotted parts cannot be an IPv4 literal; treat as hostname. + assert _safe_public_url("https://1.2.3.4.5/i.jpg") == "https://1.2.3.4.5/i.jpg" + + def test_empty_octet_is_not_treated_as_ip(self): + from deerflow.community.serper.tools import _safe_public_url + + # Empty dotted part (e.g. trailing/leading dot) cannot decode to an IP. + assert _safe_public_url("https://1.2..3/i.jpg") == "https://1.2..3/i.jpg" + + def test_trailing_octet_out_of_range_is_not_treated_as_ip(self): + from deerflow.community.serper.tools import _safe_public_url + + # Leading octets are valid but the trailing block exceeds its range. + assert _safe_public_url("https://1.2.3.999/i.jpg") == "https://1.2.3.999/i.jpg" class TestWebSearchTool: @@ -144,6 +380,47 @@ class TestWebSearchTool: assert parsed["total_results"] == 3 assert len(parsed["results"]) == 3 + def test_invalid_config_max_results_falls_back_to_default(self, mock_config_with_key): + mock_config_with_key.return_value.get_tool_config.return_value.model_extra = { + "api_key": "test-key", + "max_results": "oops", + } + organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(10)] + mock_resp = _make_serper_response(organic) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_post = mock_client_cls.return_value.__enter__.return_value.post + mock_post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["total_results"] == 5 + assert mock_post.call_args.kwargs["json"]["num"] == 5 + + def test_config_max_results_is_capped(self, mock_config_with_key): + mock_config_with_key.return_value.get_tool_config.return_value.model_extra = { + "api_key": "test-key", + "max_results": 999, + } + organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(20)] + mock_resp = _make_serper_response(organic) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_post = mock_client_cls.return_value.__enter__.return_value.post + mock_post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["total_results"] == 10 + assert len(parsed["results"]) == 10 + assert mock_post.call_args.kwargs["json"]["num"] == 10 + def test_max_results_parameter_accepted(self, mock_config_no_key): """Tool accepts max_results as a call parameter when config does not override it.""" organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(10)] @@ -254,6 +531,23 @@ class TestWebSearchTool: assert "error" in parsed + def test_http_status_error_from_response_returns_structured_error(self, mock_config_with_key): + mock_error_response = MagicMock() + mock_error_response.status_code = 403 + mock_error_response.text = "Forbidden" + mock_error_response.raise_for_status.side_effect = httpx.HTTPStatusError("403", request=MagicMock(), response=mock_error_response) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_error_response + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert "error" in parsed + assert "403" in parsed["error"] + def test_sends_correct_headers_and_payload(self, mock_config_with_key): organic = [{"title": "T", "link": "https://x.com", "snippet": "S"}] mock_resp = _make_serper_response(organic) @@ -306,3 +600,610 @@ class TestWebSearchTool: parsed = json.loads(result) assert parsed["results"][0] == {"title": "", "url": "", "content": ""} + + def test_malformed_json_response_returns_error(self, mock_config_with_key): + mock_resp = MagicMock() + mock_resp.json.side_effect = json.JSONDecodeError(" Expecting value", "doc", 0) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert "error" in parsed + + def test_non_dict_json_response_returns_error(self, mock_config_with_key): + """A valid but non-dict payload (e.g. a list) must not crash the tool.""" + mock_resp = MagicMock() + mock_resp.json.return_value = ["unexpected", "list"] + mock_resp.raise_for_status = MagicMock() + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert "error" in parsed + assert parsed["query"] == "test" + + def test_non_list_organic_returns_error(self, mock_config_with_key): + mock_resp = MagicMock() + mock_resp.json.return_value = {"organic": {"unexpected": "dict"}} + mock_resp.raise_for_status = MagicMock() + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["error"] == "Serper returned an unexpected response format" + + def test_null_organic_field_is_treated_as_no_results(self, mock_config_with_key): + """A null-typed field (some APIs use it for "no results") is not a format error.""" + mock_resp = MagicMock() + mock_resp.json.return_value = {"organic": None} + mock_resp.raise_for_status = MagicMock() + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["error"] == "No results found" + + def test_non_dict_organic_items_are_ignored(self, mock_config_with_key): + mock_resp = _make_serper_response(["bad", {"title": "T", "link": "https://x.com", "snippet": "S"}]) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["total_results"] == 1 + assert parsed["results"][0]["title"] == "T" + + def test_timeout_returns_error(self, mock_config_with_key): + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.side_effect = httpx.TimeoutException("Read timed out") + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert "error" in parsed + assert "timed out" in parsed["error"].lower() + + def test_long_query_is_truncated(self, mock_config_with_key): + organic = [{"title": "T", "link": "https://x.com", "snippet": "S"}] + mock_resp = _make_serper_response(organic) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_post = mock_client_cls.return_value.__enter__.return_value.post + mock_post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + long_query = "a" * 1000 + web_search_tool.invoke({"query": long_query}) + payload = mock_post.call_args.kwargs["json"] + + assert payload["q"] == "a" * 500 + + def test_query_is_stripped(self, mock_config_with_key): + organic = [{"title": "T", "link": "https://x.com", "snippet": "S"}] + mock_resp = _make_serper_response(organic) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_post = mock_client_cls.return_value.__enter__.return_value.post + mock_post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + web_search_tool.invoke({"query": " hello world "}) + payload = mock_post.call_args.kwargs["json"] + + assert payload["q"] == "hello world" + + +class TestImageSearchTool: + def test_basic_search_returns_normalized_results(self, mock_config_with_key): + images = [ + { + "title": "Cat 1", + "imageUrl": "https://example.com/cat1.jpg", + "thumbnailUrl": "https://example.com/cat1_thumb.jpg", + }, + { + "title": "Cat 2", + "imageUrl": "https://example.com/cat2.jpg", + "thumbnailUrl": "https://example.com/cat2_thumb.jpg", + }, + ] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "cat photo"}) + parsed = json.loads(result) + + assert parsed["query"] == "cat photo" + assert parsed["total_results"] == 2 + assert parsed["results"][0]["title"] == "Cat 1" + assert parsed["results"][0]["image_url"] == "https://example.com/cat1.jpg" + assert parsed["results"][0]["thumbnail_url"] == "https://example.com/cat1_thumb.jpg" + assert parsed["usage_hint"] == "Use the 'image_url' values as reference images in image generation. Download them first if needed." + + def test_sends_correct_headers_and_payload_to_images_endpoint(self, mock_config_with_key): + images = [{"title": "T", "imageUrl": "https://x.com/i.jpg", "thumbnailUrl": "https://x.com/t.jpg"}] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_post = mock_client_cls.return_value.__enter__.return_value.post + mock_post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + image_search_tool.invoke({"query": "hello world"}) + + call_args = mock_post.call_args + endpoint = call_args.args[0] + headers = call_args.kwargs["headers"] + payload = call_args.kwargs["json"] + + assert endpoint == "https://google.serper.dev/images" + assert headers["X-API-KEY"] == "test-serper-key" + assert payload["q"] == "hello world" + assert payload["num"] == 5 + + def test_image_url_falls_back_to_thumbnail(self, mock_config_with_key): + images = [{"title": "Only thumb", "thumbnailUrl": "https://x.com/thumb.jpg"}] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["results"][0]["image_url"] == "https://x.com/thumb.jpg" + assert parsed["results"][0]["thumbnail_url"] == "https://x.com/thumb.jpg" + + def test_thumbnail_url_falls_back_to_image(self, mock_config_with_key): + images = [{"title": "Only image", "imageUrl": "https://x.com/full.jpg"}] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["results"][0]["image_url"] == "https://x.com/full.jpg" + assert parsed["results"][0]["thumbnail_url"] == "https://x.com/full.jpg" + + def test_filtered_image_url_does_not_collapse_onto_thumbnail(self, mock_config_with_key): + """A present-but-unsafe imageUrl must not be replaced by the safe thumbnail.""" + images = [{"title": "T", "imageUrl": "http://10.0.0.1/full.jpg", "thumbnailUrl": "https://example.com/t.jpg"}] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + # The high-res field stays empty rather than masquerading as the preview. + assert parsed["results"][0]["image_url"] == "" + assert parsed["results"][0]["thumbnail_url"] == "https://example.com/t.jpg" + + def test_filtered_thumbnail_does_not_collapse_onto_image(self, mock_config_with_key): + """A present-but-unsafe thumbnailUrl must not be replaced by the safe image.""" + images = [{"title": "T", "imageUrl": "https://example.com/full.jpg", "thumbnailUrl": "http://127.0.0.1/t.jpg"}] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["results"][0]["image_url"] == "https://example.com/full.jpg" + assert parsed["results"][0]["thumbnail_url"] == "" + + def test_respects_max_results_from_config(self, mock_config_with_key): + mock_config_with_key.return_value.get_tool_config.return_value.model_extra = { + "api_key": "test-key", + "max_results": 3, + } + images = [{"title": f"I{i}", "imageUrl": f"https://x.com/{i}.jpg"} for i in range(10)] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["total_results"] == 3 + assert len(parsed["results"]) == 3 + + def test_config_max_results_is_capped(self, mock_config_with_key): + mock_config_with_key.return_value.get_tool_config.return_value.model_extra = { + "api_key": "test-key", + "max_results": 999, + } + images = [{"title": f"I{i}", "imageUrl": f"https://x.com/{i}.jpg"} for i in range(20)] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_post = mock_client_cls.return_value.__enter__.return_value.post + mock_post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["total_results"] == 10 + assert len(parsed["results"]) == 10 + assert mock_post.call_args.kwargs["json"]["num"] == 10 + + def test_empty_images_returns_error_json(self, mock_config_with_key): + mock_resp = _make_serper_images_response([]) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "no results"}) + parsed = json.loads(result) + + assert "error" in parsed + assert parsed["error"] == "No images found" + assert parsed["query"] == "no results" + + def test_missing_api_key_returns_error_json(self, mock_config_no_key): + with patch.dict("os.environ", {}, clear=True): + import os + + os.environ.pop("SERPER_API_KEY", None) + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert "error" in parsed + assert "SERPER_API_KEY" in parsed["error"] + + def test_http_error_returns_structured_error(self, mock_config_with_key): + mock_error_response = MagicMock() + mock_error_response.status_code = 403 + mock_error_response.text = "Forbidden" + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.side_effect = httpx.HTTPStatusError("403", request=MagicMock(), response=mock_error_response) + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert "error" in parsed + assert "403" in parsed["error"] + + def test_network_exception_returns_error_json(self, mock_config_with_key): + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.side_effect = Exception("timeout") + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert "error" in parsed + + def test_uses_env_key_when_config_absent(self): + with patch("deerflow.community.serper.tools.get_app_config") as mock: + mock.return_value.get_tool_config.return_value = None + with patch.dict("os.environ", {"SERPER_API_KEY": "env-only-key"}): + images = [{"title": "T", "imageUrl": "https://x.com/i.jpg"}] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_post = mock_client_cls.return_value.__enter__.return_value.post + mock_post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + image_search_tool.invoke({"query": "env key test"}) + headers = mock_post.call_args.kwargs["headers"] + + assert headers["X-API-KEY"] == "env-only-key" + + def test_max_results_parameter_accepted(self, mock_config_no_key): + """Tool accepts max_results as a call parameter when config does not override it.""" + images = [{"title": f"I{i}", "imageUrl": f"https://x.com/{i}.jpg"} for i in range(10)] + mock_resp = _make_serper_images_response(images) + + with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}): + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test", "max_results": 2}) + parsed = json.loads(result) + + assert parsed["total_results"] == 2 + + def test_config_max_results_overrides_parameter(self): + """Config max_results overrides the parameter passed at call time.""" + with patch("deerflow.community.serper.tools.get_app_config") as mock: + tool_config = MagicMock() + tool_config.model_extra = {"api_key": "test-key", "max_results": 3} + mock.return_value.get_tool_config.return_value = tool_config + + images = [{"title": f"I{i}", "imageUrl": f"https://x.com/{i}.jpg"} for i in range(10)] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test", "max_results": 8}) + parsed = json.loads(result) + + assert parsed["total_results"] == 3 + + def test_missing_api_key_logs_warning_once(self, mock_config_no_key, caplog): + import logging + + with patch.dict("os.environ", {}, clear=True): + import os + + os.environ.pop("SERPER_API_KEY", None) + + from deerflow.community.serper.tools import image_search_tool + + with caplog.at_level(logging.WARNING, logger="deerflow.community.serper.tools"): + image_search_tool.invoke({"query": "q1"}) + image_search_tool.invoke({"query": "q2"}) + + warnings = [r for r in caplog.records if r.levelno == logging.WARNING] + assert len(warnings) == 1 + + def test_malformed_json_response_returns_error(self, mock_config_with_key): + mock_resp = MagicMock() + mock_resp.json.side_effect = json.JSONDecodeError(" Expecting value", "doc", 0) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert "error" in parsed + + def test_non_dict_json_response_returns_error(self, mock_config_with_key): + """A valid but non-dict payload (e.g. a list) must not crash the tool.""" + mock_resp = MagicMock() + mock_resp.json.return_value = ["unexpected", "list"] + mock_resp.raise_for_status = MagicMock() + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert "error" in parsed + assert parsed["query"] == "test" + + def test_non_list_images_returns_error(self, mock_config_with_key): + mock_resp = MagicMock() + mock_resp.json.return_value = {"images": {"unexpected": "dict"}} + mock_resp.raise_for_status = MagicMock() + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["error"] == "Serper returned an unexpected response format" + + def test_null_images_field_is_treated_as_no_results(self, mock_config_with_key): + """A null-typed images field is "no images", not a malformed payload.""" + mock_resp = MagicMock() + mock_resp.json.return_value = {"images": None} + mock_resp.raise_for_status = MagicMock() + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["error"] == "No images found" + + def test_non_dict_image_items_are_ignored(self, mock_config_with_key): + images = ["bad", {"title": "T", "imageUrl": "https://x.com/i.jpg"}] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["total_results"] == 1 + assert parsed["results"][0]["image_url"] == "https://x.com/i.jpg" + + def test_timeout_returns_error(self, mock_config_with_key): + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.side_effect = httpx.TimeoutException("Read timed out") + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert "error" in parsed + assert "timed out" in parsed["error"].lower() + + def test_long_query_is_truncated(self, mock_config_with_key): + images = [{"title": "T", "imageUrl": "https://x.com/i.jpg"}] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_post = mock_client_cls.return_value.__enter__.return_value.post + mock_post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + long_query = "a" * 1000 + image_search_tool.invoke({"query": long_query}) + payload = mock_post.call_args.kwargs["json"] + + assert payload["q"] == "a" * 500 + + def test_query_is_stripped(self, mock_config_with_key): + images = [{"title": "T", "imageUrl": "https://x.com/i.jpg"}] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_post = mock_client_cls.return_value.__enter__.return_value.post + mock_post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + image_search_tool.invoke({"query": " cat photo "}) + payload = mock_post.call_args.kwargs["json"] + + assert payload["q"] == "cat photo" + + def test_partial_fields_in_image_result_returns_error(self, mock_config_with_key): + """Missing image URLs should not be reported as usable results.""" + images = [{}] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["error"] == "No safe image URLs found" + assert parsed["query"] == "test" + + def test_unsafe_image_urls_are_filtered(self, mock_config_with_key): + images = [ + {"title": "Local", "imageUrl": "file:///etc/passwd", "thumbnailUrl": "http://127.0.0.1/thumb.jpg"}, + {"title": "Data", "imageUrl": "data:image/png;base64,abc", "thumbnailUrl": "http://10.0.0.1/thumb.jpg"}, + {"title": "Safe", "imageUrl": "https://example.com/i.jpg", "thumbnailUrl": "http://example.com/t.jpg"}, + ] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["total_results"] == 1 + assert parsed["results"][0]["title"] == "Safe" + assert parsed["results"][0]["image_url"] == "https://example.com/i.jpg" + assert parsed["results"][0]["thumbnail_url"] == "http://example.com/t.jpg" + + def test_all_unsafe_image_urls_return_error(self, mock_config_with_key): + images = [ + {"title": "Local", "imageUrl": "file:///etc/passwd", "thumbnailUrl": "http://127.0.0.1/thumb.jpg"}, + {"title": "Private", "imageUrl": "http://10.0.0.1/image.jpg", "thumbnailUrl": "data:image/png;base64,abc"}, + ] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["error"] == "No safe image URLs found" + assert parsed["query"] == "test" + + def test_unsafe_image_urls_do_not_consume_result_limit(self, mock_config_with_key): + mock_config_with_key.return_value.get_tool_config.return_value.model_extra = { + "api_key": "test-key", + "max_results": 1, + } + images = [ + {"title": "Unsafe", "imageUrl": "file:///etc/passwd", "thumbnailUrl": "http://127.0.0.1/thumb.jpg"}, + {"title": "Safe", "imageUrl": "https://example.com/i.jpg", "thumbnailUrl": "https://example.com/t.jpg"}, + ] + mock_resp = _make_serper_images_response(images) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import image_search_tool + + result = image_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["total_results"] == 1 + assert parsed["results"][0]["title"] == "Safe" + + +def test_package_exports_image_search_tool(): + from deerflow.community.serper import image_search_tool + from deerflow.community.serper.tools import image_search_tool as direct_image_search_tool + + assert image_search_tool is direct_image_search_tool diff --git a/config.example.yaml b/config.example.yaml index 9ac6e177d..24b65bbcb 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -522,13 +522,13 @@ tools: # Web search tool (uses Serper - Google Search API, requires SERPER_API_KEY) # Serper provides real-time Google Search results. Sign up at https://serper.dev - # Note: set SERPER_API_KEY in your environment before starting the app, or - # uncomment and fill in api_key below (the $VAR syntax is resolved at startup). + # Note: set SERPER_API_KEY in your environment before starting the app. + # Avoid putting literal API keys in config.yaml; use the $VAR form instead. # - name: web_search # group: web # use: deerflow.community.serper.tools:web_search_tool - # max_results: 5 - # # api_key: $SERPER_API_KEY # Optional if SERPER_API_KEY env var is set + # max_results: 5 # capped at 10 by the Serper provider + # # api_key: $SERPER_API_KEY # Optional explicit env-var reference # Web search tool (uses Brave Search API, requires BRAVE_SEARCH_API_KEY) # Brave Search returns results from an independent index. Sign up at @@ -637,6 +637,16 @@ tools: # # Image size filter. Options: "l" (large), "m" (medium), "i" (icon). # image_size: "i" + # Image search tool (uses Serper - Google Images API, requires SERPER_API_KEY) + # Serper provides real-time Google Images results. Sign up at https://serper.dev + # Note: set SERPER_API_KEY in your environment before starting the app. + # Avoid putting literal API keys in config.yaml; use the $VAR form instead. + # - name: image_search + # group: web + # use: deerflow.community.serper.tools:image_search_tool + # max_results: 5 # capped at 10 by the Serper provider + # # api_key: $SERPER_API_KEY # Optional explicit env-var reference + # File operations tools - name: ls group: file:read diff --git a/frontend/src/content/en/harness/tools.mdx b/frontend/src/content/en/harness/tools.mdx index bde4c43b2..090b0932f 100644 --- a/frontend/src/content/en/harness/tools.mdx +++ b/frontend/src/content/en/harness/tools.mdx @@ -212,13 +212,31 @@ tools: ### Image search + + ```yaml tools: - use: deerflow.community.image_search.tools:image_search_tool - # Or use InfoQuest: - # - use: deerflow.community.infoquest.tools:image_search_tool - # api_key: $INFOQUEST_API_KEY ``` +No API key required. Default configuration, good for development and general use. + + +```yaml +tools: + - use: deerflow.community.infoquest.tools:image_search_tool + api_key: $INFOQUEST_API_KEY +``` +Requires an InfoQuest API key. + + +```yaml +tools: + - use: deerflow.community.serper.tools:image_search_tool + api_key: $SERPER_API_KEY +``` +Google Images results via Serper. Requires a [Serper](https://serper.dev) API key. Reuses `SERPER_API_KEY` with the Serper `web_search` tool. + + ## Tool groups diff --git a/frontend/src/content/zh/harness/tools.mdx b/frontend/src/content/zh/harness/tools.mdx index 190b6e7ec..c0704b6b6 100644 --- a/frontend/src/content/zh/harness/tools.mdx +++ b/frontend/src/content/zh/harness/tools.mdx @@ -191,10 +191,31 @@ tools: ### 图像搜索 + + ```yaml tools: - use: deerflow.community.image_search.tools:image_search_tool ``` +无需 API Key。默认配置,适合开发和通用用途。 + + +```yaml +tools: + - use: deerflow.community.infoquest.tools:image_search_tool + api_key: $INFOQUEST_API_KEY +``` +需要 InfoQuest API Key。 + + +```yaml +tools: + - use: deerflow.community.serper.tools:image_search_tool + api_key: $SERPER_API_KEY +``` +通过 Serper 获取 Google 图片结果。需要 [Serper](https://serper.dev) API Key,与 Serper `web_search` 工具复用同一个 `SERPER_API_KEY`。 + + ## 工具组 diff --git a/scripts/doctor.py b/scripts/doctor.py index cd248b144..9ad4d3bb9 100644 --- a/scripts/doctor.py +++ b/scripts/doctor.py @@ -103,6 +103,7 @@ def _split_use_path(use: str) -> tuple[str, str] | None: # Check result container # --------------------------------------------------------------------------- + class CheckResult: def __init__( self, @@ -129,6 +130,7 @@ class CheckResult: # Individual checks # --------------------------------------------------------------------------- + def check_python() -> CheckResult: v = sys.version_info version_str = f"{v.major}.{v.minor}.{v.micro}" @@ -198,11 +200,7 @@ def check_nginx() -> CheckResult: return CheckResult( "nginx", "fail", - fix=( - "macOS: brew install nginx\n" - "Ubuntu: sudo apt install nginx\n" - "Windows: use WSL or Docker mode" - ), + fix=("macOS: brew install nginx\nUbuntu: sudo apt install nginx\nWindows: use WSL or Docker mode"), ) @@ -404,11 +402,7 @@ def check_llm_auth(config_path: Path) -> list[CheckResult]: ) if use == "deerflow.models.claude_provider:ClaudeChatModel": - credential_paths = [ - Path(os.environ["CLAUDE_CODE_CREDENTIALS_PATH"]).expanduser() - for env_name in ("CLAUDE_CODE_CREDENTIALS_PATH",) - if os.environ.get(env_name) - ] + credential_paths = [Path(os.environ["CLAUDE_CODE_CREDENTIALS_PATH"]).expanduser() for env_name in ("CLAUDE_CODE_CREDENTIALS_PATH",) if os.environ.get(env_name)] credential_paths.append(Path("~/.claude/.credentials.json").expanduser()) has_oauth_env = any( os.environ.get(name) @@ -428,10 +422,7 @@ def check_llm_auth(config_path: Path) -> list[CheckResult]: CheckResult( f"Claude auth available (model: {model_name})", "fail", - fix=( - "Set ANTHROPIC_API_KEY / CLAUDE_CODE_OAUTH_TOKEN, " - "or place credentials at ~/.claude/.credentials.json" - ), + fix=("Set ANTHROPIC_API_KEY / CLAUDE_CODE_OAUTH_TOKEN, or place credentials at ~/.claude/.credentials.json"), ) ) except Exception as exc: @@ -458,7 +449,6 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes data = _load_yaml_file(config_path) tool_entries = [t for t in data.get("tools", []) if t.get("name") == tool_name] - tool_uses = [t.get("use", "") for t in tool_entries] if not tool_entries: return CheckResult( label, @@ -470,6 +460,7 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes free_providers = { "web_search": {"ddg_search": "DuckDuckGo (no key needed)"}, "web_fetch": {"jina_ai": "Jina AI Reader (no key needed)"}, + "image_search": {"deerflow.community.image_search.tools": "DuckDuckGo Images (no key needed)"}, } key_providers = { "web_search": { @@ -478,35 +469,57 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes "exa": "EXA_API_KEY", "firecrawl": "FIRECRAWL_API_KEY", "brave": "BRAVE_SEARCH_API_KEY", + "serper": "SERPER_API_KEY", }, "web_fetch": { "infoquest": "INFOQUEST_API_KEY", "exa": "EXA_API_KEY", "firecrawl": "FIRECRAWL_API_KEY", }, + "image_search": { + "infoquest": "INFOQUEST_API_KEY", + "serper": "SERPER_API_KEY", + }, } - for tool_entry in tool_entries: - use = tool_entry.get("use", "") + def _configured_key_detail(tool: dict, default_var: str) -> tuple[Status, str] | None: + api_key = tool.get("api_key") + if isinstance(api_key, str) and api_key.strip(): + key = api_key.strip() + if key.startswith("$"): + env_name = key[1:] + val = os.environ.get(env_name) + if val and val.strip(): + return ("ok", f"{env_name} set from config") + # The referenced var is unset; fall through to the default + # env var below, which tools use as a runtime fallback. + else: + return ("warn", "literal api_key set in config") + + val = os.environ.get(default_var) + return ("ok", f"{default_var} set") if val and val.strip() else None + + for tool in tool_entries: + use = tool.get("use", "") for provider, detail in free_providers.get(tool_name, {}).items(): if provider in use: return CheckResult(label, "ok", detail) - for tool_entry in tool_entries: - use = tool_entry.get("use", "") + for tool in tool_entries: + use = tool.get("use", "") for provider, var in key_providers.get(tool_name, {}).items(): if provider in use: - configured_key = tool_entry.get("api_key") - if isinstance(configured_key, str) and configured_key.strip(): - if configured_key.startswith("$"): - ref_var = configured_key[1:] - if os.environ.get(ref_var): - return CheckResult(label, "ok", f"{provider} ({ref_var} set via api_key)") - else: - return CheckResult(label, "ok", f"{provider} (api_key configured)") - val = os.environ.get(var) - if val: - return CheckResult(label, "ok", f"{provider} ({var} set)") + key_status = _configured_key_detail(tool, var) + if key_status: + status, detail = key_status + if status == "warn": + return CheckResult( + label, + "warn", + f"{provider} ({detail})", + fix=f"Move the API key to .env as {var}= and reference it as ${var}", + ) + return CheckResult(label, "ok", f"{provider} ({detail})") return CheckResult( label, "warn", @@ -514,7 +527,8 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes fix=f"Add {var}= to .env, or run 'make setup'", ) - for use in tool_uses: + for tool in tool_entries: + use = tool.get("use", "") split = _split_use_path(use) if split is None: return CheckResult( @@ -544,6 +558,10 @@ def check_web_fetch(config_path: Path) -> CheckResult: return check_web_tool(config_path, tool_name="web_fetch", label="web fetch configured") +def check_image_search(config_path: Path) -> CheckResult: + return check_web_tool(config_path, tool_name="image_search", label="image search configured") + + def check_frontend_env(project_root: Path) -> CheckResult: env_path = project_root / "frontend" / ".env" if env_path.exists(): @@ -641,6 +659,7 @@ def check_env_file(project_root: Path) -> CheckResult: # Main # --------------------------------------------------------------------------- + def main() -> int: project_root = Path(__file__).resolve().parents[1] config_path = project_root / "config.yaml" @@ -691,7 +710,7 @@ def main() -> int: sections.append(("LLM Provider", llm_checks)) # ── Web Capabilities ───────────────────────────────────────────────────── - search_checks = [check_web_search(config_path), check_web_fetch(config_path)] + search_checks = [check_web_search(config_path), check_web_fetch(config_path), check_image_search(config_path)] sections.append(("Web Capabilities", search_checks)) # ── Sandbox ──────────────────────────────────────────────────────────────