feat(community): add Serper Google Images provider for image_search (#3575)

* feat(community): add Serper Google Images provider for image_search

Add a Serper-backed `image_search` tool alongside the existing Serper
`web_search` provider, so users with a SERPER_API_KEY can pull Google
Images results as reference images for downstream image generation.

- Share request/response handling between web_search and image_search
  via `_serper_post` / `_response_items`, with bounded `max_results`
  (capped at 10) and query normalization.
- Add a best-effort SSRF guard (`_safe_public_url`) that rejects
  non-http(s), localhost and private/non-global IP image URLs; filtered
  entries are dropped and never consume the result limit.
- doctor: flag literal `api_key` values in config as a warning and steer
  users toward `.env` + `$SERPER_API_KEY`.
- Docs/config: document the Serper image_search provider and SERPER_API_KEY,
  and discourage committing literal keys to config.yaml.
- Tests: cover the provider end-to-end (100% line coverage on tools.py)
  and the doctor literal-key warning path.

* fix(community): block obfuscated IPv4 literals in Serper image SSRF guard

The image_search SSRF guard only rejected dotted-decimal IP literals; encoded
forms such as decimal (http://2130706433/), hex (0x7f000001) and octal
(0177.0.0.1) raised ValueError in ip_address() and were allowed through, even
though many HTTP clients resolve them to private addresses like 127.0.0.1.

Add _decode_ipv4() to permissively decode these inet_aton-style encodings and
apply the same is_global check; hostnames that do not decode to an IP (e.g.
cafe.com) are still treated as hosts and left to fetch-time re-validation.

Addresses PR review feedback. Tests cover decimal/hex/octal loopback and
private encodings plus non-IP edge cases; tools.py stays at 100% line coverage.

* test(community): cover IPv4-mapped IPv6 URL filtering

* fix(community): address Serper image search review feedback

- Block trailing-dot hostname SSRF bypass (localhost./127.0.0.1.) in
  _safe_public_url by stripping the FQDN root label before checks.
- Keep a filtered image/thumbnail URL empty instead of collapsing onto
  its counterpart, preserving the high-res/preview contract.
- Evaluate the SSRF guard once per field rather than twice.
- Treat a null-typed organic/images field as "no results" rather than a
  malformed payload.
- doctor.py: when a config $VAR is unset, fall through to the default env
  var before reporting it as not set.
This commit is contained in:
Ryker_Feng
2026-06-18 07:36:35 +08:00
committed by GitHub
parent ec16b6650d
commit 0bbbbc06f4
9 changed files with 1409 additions and 87 deletions
+2
View File
@@ -236,6 +236,7 @@ tools:
**Built-in Tools**:
- `web_search` - Search the web (DuckDuckGo, Tavily, Brave, Exa, InfoQuest, Firecrawl)
- `web_fetch` - Fetch web pages (Jina AI, Exa, InfoQuest, Firecrawl)
- `image_search` - Search for reference images (DuckDuckGo, InfoQuest, Serper)
- `ls` - List directory contents
- `read_file` - Read file contents
- `write_file` - Write file contents
@@ -414,6 +415,7 @@ models:
- `NOVITA_API_KEY` - Novita API key (OpenAI-compatible endpoint)
- `TAVILY_API_KEY` - Tavily search API key
- `BRAVE_SEARCH_API_KEY` - Brave Search API key
- `SERPER_API_KEY` - Serper (Google Search/Images API) key for `web_search` and `image_search`
- `DEER_FLOW_PROJECT_ROOT` - Project root for relative runtime paths
- `DEER_FLOW_CONFIG_PATH` - Custom config file path
- `DEER_FLOW_EXTENSIONS_CONFIG_PATH` - Custom extensions config file path
@@ -1,3 +1,3 @@
from .tools import web_search_tool
from .tools import image_search_tool, web_search_tool
__all__ = ["web_search_tool"]
__all__ = ["image_search_tool", "web_search_tool"]
@@ -1,13 +1,15 @@
"""
Web Search Tool - Search the web using Serper (Google Search API).
Web and image search tools powered by Serper (Google Search API).
Serper provides real-time Google Search results via a JSON API.
An API key is required. Sign up at https://serper.dev to get one.
Serper provides real-time Google Search and Google Images results via a JSON
API. An API key is required. Sign up at https://serper.dev to get one.
"""
import json
import logging
import os
from ipaddress import IPv4Address, ip_address
from urllib.parse import urlparse
import httpx
from langchain.tools import tool
@@ -16,43 +18,168 @@ from deerflow.config import get_app_config
logger = logging.getLogger(__name__)
_SERPER_ENDPOINT = "https://google.serper.dev/search"
_api_key_warned = False
_SERPER_SEARCH_ENDPOINT = "https://google.serper.dev/search"
_SERPER_IMAGES_ENDPOINT = "https://google.serper.dev/images"
_SERPER_MAX_RESULTS = 10
_api_key_warned: set[str] = set()
def _get_api_key() -> str | None:
config = get_app_config().get_tool_config("web_search")
def _get_api_key(tool_name: str) -> str | None:
config = get_app_config().get_tool_config(tool_name)
if config is not None:
api_key = config.model_extra.get("api_key")
if isinstance(api_key, str) and api_key.strip():
return api_key
return os.getenv("SERPER_API_KEY")
return api_key.strip()
env_key = os.getenv("SERPER_API_KEY")
if isinstance(env_key, str) and env_key.strip():
return env_key.strip()
return None
@tool("web_search", parse_docstring=True)
def web_search_tool(query: str, max_results: int = 5) -> str:
"""Search the web for information using Google Search via Serper.
def _coerce_max_results(value: object, default: int = 5, max_allowed: int = _SERPER_MAX_RESULTS) -> int:
"""Coerce config/parameter input into a bounded positive result count."""
try:
count = int(value)
except (TypeError, ValueError):
return default
if count <= 0:
return default
return min(count, max_allowed)
Args:
query: Search keywords describing what you want to find. Be specific for better results.
max_results: Maximum number of search results to return. Default is 5.
"""
global _api_key_warned
config = get_app_config().get_tool_config("web_search")
if config is not None and "max_results" in config.model_extra:
max_results = config.model_extra.get("max_results", max_results)
api_key = _get_api_key()
if not api_key:
if not _api_key_warned:
_api_key_warned = True
logger.warning("Serper API key is not set. Set SERPER_API_KEY in your environment or provide api_key in config.yaml. Sign up at https://serper.dev")
def _missing_key_error(query: str, tool_name: str) -> str:
if tool_name not in _api_key_warned:
_api_key_warned.add(tool_name)
logger.warning("Serper API key is not set for '%s'. Set SERPER_API_KEY in your environment or provide api_key in config.yaml. Sign up at https://serper.dev", tool_name)
return json.dumps(
{"error": "SERPER_API_KEY is not configured", "query": query},
ensure_ascii=False,
)
def _unexpected_format_error(query: str) -> str:
return json.dumps(
{"error": "Serper returned an unexpected response format", "query": query},
ensure_ascii=False,
)
def _response_items(data: dict, field: str, query: str) -> tuple[list[dict] | None, str | None]:
items = data.get(field)
# Treat a missing or null field as "no results" (some APIs return
# ``{"organic": null}`` to signal that) rather than a malformed payload.
if items is None:
return [], None
if not isinstance(items, list):
logger.error("Serper returned unexpected '%s' payload type: %s", field, type(items).__name__)
return None, _unexpected_format_error(query)
return [item for item in items if isinstance(item, dict)], None
def _clean_query(query: str) -> str:
"""Normalize a raw query into the value actually sent to Serper."""
query = query.strip()
if len(query) > 500:
query = query[:500]
return query
def _decode_ipv4(host: str) -> IPv4Address | None:
"""Decode obfuscated IPv4 literals that ``ip_address`` rejects.
Mirrors the permissive ``inet_aton`` parsing many HTTP clients use, so that
integer (``2130706433``), hex (``0x7f000001``) and octal (``0177.0.0.1``)
encodings of an address are recognized. Returns an ``IPv4Address`` when the
host decodes to one, otherwise ``None`` (e.g. real domains like
``cafe.com`` fail to decode and are left for the caller to treat as a host).
"""
parts = host.split(".")
if not 1 <= len(parts) <= 4:
return None
values: list[int] = []
for part in parts:
if not part:
return None
try:
if part.startswith(("0x", "0X")):
values.append(int(part, 16))
elif part.startswith("0") and len(part) > 1:
values.append(int(part, 8))
else:
values.append(int(part, 10))
except ValueError:
return None
*leading, last = values
for value in leading:
if not 0 <= value <= 0xFF:
return None
max_last = (1 << (8 * (4 - len(leading)))) - 1
if not 0 <= last <= max_last:
return None
result = 0
for value in leading:
result = (result << 8) | value
result = (result << (8 * (4 - len(leading)))) | last
return ip_address(result)
def _is_url_present(value: object) -> bool:
"""Return ``True`` when *value* is a non-empty URL string.
Used to distinguish a field that was *absent* (eligible for cross-field
fallback) from one that was *present but filtered* by the SSRF guard (which
must stay empty rather than collapse onto its counterpart).
"""
return isinstance(value, str) and bool(value.strip())
def _safe_public_url(value: object) -> str:
"""Return ``value`` only if it is a safe, public http(s) URL, else "".
This is a best-effort SSRF guard that rejects non-http(s) schemes,
``localhost``, and private/non-global IP literals (including obfuscated
decimal/hex/octal encodings). It only inspects the URL string and cannot
catch public hostnames that resolve to internal IPs (e.g. DNS rebinding);
any consumer that actually downloads these URLs must re-validate the
resolved IP at fetch time.
"""
if not isinstance(value, str):
return ""
url = value.strip()
parsed = urlparse(url)
if parsed.scheme not in {"http", "https"} or not parsed.netloc or not parsed.hostname:
return ""
# Strip a single trailing dot (FQDN root label). ``localhost.`` and
# ``127.0.0.1.`` resolve to loopback on common resolvers but would
# otherwise slip past the localhost/IP checks below.
host = parsed.hostname.lower().rstrip(".")
if not host:
return ""
if host == "localhost" or host.endswith(".localhost"):
return ""
try:
ip = ip_address(host)
except ValueError:
ip = _decode_ipv4(host)
if ip is None:
return url
return url if ip.is_global else ""
def _serper_post(endpoint: str, api_key: str, query: str, max_results: int) -> tuple[dict | None, str | None]:
"""Send a POST request to a Serper endpoint.
``query`` is expected to already be normalized via :func:`_clean_query`.
Returns a ``(data, error_json)`` tuple: on success ``data`` is the parsed
JSON response and ``error_json`` is ``None``; on failure ``data`` is ``None``
and ``error_json`` is a serialized structured error ready to return.
"""
headers = {
"X-API-KEY": api_key,
"Content-Type": "application/json",
@@ -61,23 +188,56 @@ def web_search_tool(query: str, max_results: int = 5) -> str:
try:
with httpx.Client(timeout=30) as client:
response = client.post(_SERPER_ENDPOINT, headers=headers, json=payload)
response = client.post(endpoint, headers=headers, json=payload)
response.raise_for_status()
data = response.json()
if not isinstance(data, dict):
logger.error("Serper returned an unexpected payload type: %s", type(data).__name__)
return None, _unexpected_format_error(query)
return data, None
except httpx.HTTPStatusError as e:
logger.error(f"Serper API returned HTTP {e.response.status_code}: {e.response.text}")
return json.dumps(
resp_text = (e.response.text or "")[:500]
logger.error("Serper API returned HTTP %s: %s", e.response.status_code, resp_text)
return None, json.dumps(
{"error": f"Serper API error: HTTP {e.response.status_code}", "query": query},
ensure_ascii=False,
)
except Exception as e:
logger.error(f"Serper search failed: {type(e).__name__}: {e}")
return json.dumps({"error": str(e), "query": query}, ensure_ascii=False)
logger.error("Serper request failed: %s: %s", type(e).__name__, str(e)[:500])
return None, json.dumps({"error": str(e)[:500], "query": query}, ensure_ascii=False)
organic = data.get("organic", [])
@tool("web_search", parse_docstring=True)
def web_search_tool(query: str, max_results: int = 5) -> str:
"""Search the web for information using Google Search via Serper.
Args:
query: Search keywords describing what you want to find. Be specific for better results.
max_results: Maximum number of search results to return. Default is 5, capped at 10.
"""
config = get_app_config().get_tool_config("web_search")
if config is not None and "max_results" in config.model_extra:
max_results = config.model_extra.get("max_results", max_results)
max_results = _coerce_max_results(max_results)
query = _clean_query(query)
api_key = _get_api_key("web_search")
if not api_key:
return _missing_key_error(query, "web_search")
data, error_json = _serper_post(_SERPER_SEARCH_ENDPOINT, api_key, query, max_results)
if error_json is not None:
return error_json
organic, error_json = _response_items(data, "organic", query)
if error_json is not None:
return error_json
if not organic:
return json.dumps({"error": "No results found", "query": query}, ensure_ascii=False)
# Search result links are returned verbatim (not passed through
# _safe_public_url): they are surfaced as citations for the model to read,
# not fetched/downloaded by this tool, unlike image_search image URLs.
normalized_results = [
{
"title": r.get("title", ""),
@@ -93,3 +253,71 @@ def web_search_tool(query: str, max_results: int = 5) -> str:
"results": normalized_results,
}
return json.dumps(output, indent=2, ensure_ascii=False)
@tool("image_search", parse_docstring=True)
def image_search_tool(query: str, max_results: int = 5) -> str:
"""Search for images online using Google Images via Serper. Use this tool BEFORE image generation to find reference images for characters, portraits, objects, scenes, or any content requiring visual accuracy.
The returned image URLs can be used as reference images in image generation to significantly improve quality.
Args:
query: Search keywords describing the images you want to find. Be specific for better results (e.g., "Japanese woman street photography 1990s" instead of just "woman").
max_results: Maximum number of images to return. Default is 5, capped at 10.
"""
config = get_app_config().get_tool_config("image_search")
if config is not None and "max_results" in config.model_extra:
max_results = config.model_extra.get("max_results", max_results)
max_results = _coerce_max_results(max_results)
query = _clean_query(query)
api_key = _get_api_key("image_search")
if not api_key:
return _missing_key_error(query, "image_search")
data, error_json = _serper_post(_SERPER_IMAGES_ENDPOINT, api_key, query, max_results)
if error_json is not None:
return error_json
images, error_json = _response_items(data, "images", query)
if error_json is not None:
return error_json
if not images:
return json.dumps({"error": "No images found", "query": query}, ensure_ascii=False)
normalized_results = []
for r in images:
raw_image = r.get("imageUrl")
raw_thumb = r.get("thumbnailUrl")
# Evaluate the (non-trivial) SSRF guard once per field instead of twice.
safe_image = _safe_public_url(raw_image)
safe_thumb = _safe_public_url(raw_thumb)
# Cross-fall back only when the other field was *absent*. A field that
# was present but failed the SSRF filter is left empty rather than
# collapsed onto its counterpart, so a dropped high-res URL never
# silently masquerades as the preview (and vice versa), preserving the
# high-res/preview contract callers rely on.
image_url = safe_image or (safe_thumb if not _is_url_present(raw_image) else "")
thumbnail_url = safe_thumb or (safe_image if not _is_url_present(raw_thumb) else "")
if not image_url and not thumbnail_url:
continue
normalized_results.append(
{
"title": r.get("title", ""),
"image_url": image_url,
"thumbnail_url": thumbnail_url,
}
)
if len(normalized_results) >= max_results:
break
if not normalized_results:
return json.dumps({"error": "No safe image URLs found", "query": query}, ensure_ascii=False)
output = {
"query": query,
"total_results": len(normalized_results),
"results": normalized_results,
"usage_hint": "Use the 'image_url' values as reference images in image generation. Download them first if needed.",
}
return json.dumps(output, indent=2, ensure_ascii=False)
+127 -4
View File
@@ -214,13 +214,14 @@ class TestCheckWebSearch:
assert result.fix is not None
assert "BRAVE_SEARCH_API_KEY" in result.fix
def test_brave_with_inline_api_key_ok(self, tmp_path, monkeypatch):
def test_brave_with_inline_api_key_warns(self, tmp_path, monkeypatch):
monkeypatch.delenv("BRAVE_SEARCH_API_KEY", raising=False)
cfg = tmp_path / "config.yaml"
cfg.write_text('config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.brave.tools:web_search_tool\n api_key: "inline-key"\n')
result = doctor.check_web_search(cfg)
assert result.status == "ok"
assert "api_key configured" in result.detail
assert result.status == "warn"
assert "literal api_key set in config" in result.detail
assert "BRAVE_SEARCH_API_KEY" in (result.fix or "")
def test_brave_with_api_key_env_ref_ok(self, tmp_path, monkeypatch):
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "bsa-test")
@@ -228,7 +229,61 @@ class TestCheckWebSearch:
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.brave.tools:web_search_tool\n api_key: $BRAVE_SEARCH_API_KEY\n")
result = doctor.check_web_search(cfg)
assert result.status == "ok"
assert "api_key" in result.detail
assert "BRAVE_SEARCH_API_KEY set from config" in result.detail
def test_serper_with_key_ok(self, tmp_path, monkeypatch):
monkeypatch.setenv("SERPER_API_KEY", "test-key")
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n")
result = doctor.check_web_search(cfg)
assert result.status == "ok"
assert "serper" in result.detail
def test_serper_without_key_warns(self, tmp_path, monkeypatch):
monkeypatch.delenv("SERPER_API_KEY", raising=False)
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n")
result = doctor.check_web_search(cfg)
assert result.status == "warn"
assert "SERPER_API_KEY" in (result.fix or "")
def test_serper_inline_api_key_warns(self, tmp_path, monkeypatch):
monkeypatch.delenv("SERPER_API_KEY", raising=False)
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: inline-key\n")
result = doctor.check_web_search(cfg)
assert result.status == "warn"
assert "literal api_key set in config" in result.detail
assert "SERPER_API_KEY" in (result.fix or "")
def test_serper_config_env_ref_ok(self, tmp_path, monkeypatch):
monkeypatch.setenv("SERPER_API_KEY", "test-key")
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: $SERPER_API_KEY\n")
result = doctor.check_web_search(cfg)
assert result.status == "ok"
assert "SERPER_API_KEY set from config" in result.detail
def test_serper_unresolved_env_ref_falls_back_to_default_var(self, tmp_path, monkeypatch):
# The referenced $VAR is unset, but the default SERPER_API_KEY is set,
# which the tool uses as a runtime fallback; report ok rather than warn.
monkeypatch.delenv("MY_CUSTOM_SERPER_KEY", raising=False)
monkeypatch.setenv("SERPER_API_KEY", "test-key")
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: $MY_CUSTOM_SERPER_KEY\n")
result = doctor.check_web_search(cfg)
assert result.status == "ok"
assert "SERPER_API_KEY set" in result.detail
def test_serper_unresolved_env_ref_without_default_warns(self, tmp_path, monkeypatch):
# Neither the referenced $VAR nor the default SERPER_API_KEY is set.
monkeypatch.delenv("MY_CUSTOM_SERPER_KEY", raising=False)
monkeypatch.delenv("SERPER_API_KEY", raising=False)
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: $MY_CUSTOM_SERPER_KEY\n")
result = doctor.check_web_search(cfg)
assert result.status == "warn"
assert "SERPER_API_KEY" in (result.fix or "")
def test_no_search_tool_warns(self, tmp_path):
cfg = tmp_path / "config.yaml"
@@ -284,6 +339,74 @@ class TestCheckWebFetch:
assert result.status == "fail"
# ---------------------------------------------------------------------------
# check_image_search
# ---------------------------------------------------------------------------
class TestCheckImageSearch:
def test_ddg_always_ok(self, tmp_path):
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.image_search.tools:image_search_tool\n")
result = doctor.check_image_search(cfg)
assert result.status == "ok"
assert "DuckDuckGo" in result.detail
def test_serper_with_key_ok(self, tmp_path, monkeypatch):
monkeypatch.setenv("SERPER_API_KEY", "test-key")
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n")
result = doctor.check_image_search(cfg)
assert result.status == "ok"
assert "serper" in result.detail
def test_serper_without_key_warns(self, tmp_path, monkeypatch):
monkeypatch.delenv("SERPER_API_KEY", raising=False)
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n")
result = doctor.check_image_search(cfg)
assert result.status == "warn"
assert "SERPER_API_KEY" in (result.fix or "")
def test_serper_inline_api_key_warns(self, tmp_path, monkeypatch):
monkeypatch.delenv("SERPER_API_KEY", raising=False)
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n api_key: inline-key\n")
result = doctor.check_image_search(cfg)
assert result.status == "warn"
assert "literal api_key set in config" in result.detail
assert "SERPER_API_KEY" in (result.fix or "")
def test_serper_config_env_ref_without_env_warns(self, tmp_path, monkeypatch):
monkeypatch.delenv("SERPER_API_KEY", raising=False)
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n api_key: $SERPER_API_KEY\n")
result = doctor.check_image_search(cfg)
assert result.status == "warn"
assert "SERPER_API_KEY" in (result.fix or "")
def test_infoquest_with_key_ok(self, tmp_path, monkeypatch):
monkeypatch.setenv("INFOQUEST_API_KEY", "test-key")
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.infoquest.tools:image_search_tool\n")
result = doctor.check_image_search(cfg)
assert result.status == "ok"
assert "infoquest" in result.detail
def test_no_image_search_tool_warns(self, tmp_path):
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools: []\n")
result = doctor.check_image_search(cfg)
assert result.status == "warn"
assert result.fix is not None
def test_invalid_provider_use_fails(self, tmp_path):
cfg = tmp_path / "config.yaml"
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.not_real.tools:image_search_tool\n")
result = doctor.check_image_search(cfg)
assert result.status == "fail"
# ---------------------------------------------------------------------------
# check_env_file
# ---------------------------------------------------------------------------
+909 -8
View File
@@ -12,9 +12,9 @@ def reset_api_key_warned():
"""Reset the module-level warning flag before each test."""
import deerflow.community.serper.tools as serper_mod
serper_mod._api_key_warned = False
serper_mod._api_key_warned = set()
yield
serper_mod._api_key_warned = False
serper_mod._api_key_warned = set()
@pytest.fixture
@@ -42,6 +42,13 @@ def _make_serper_response(organic: list) -> MagicMock:
return mock_resp
def _make_serper_images_response(images: list) -> MagicMock:
mock_resp = MagicMock()
mock_resp.json.return_value = {"images": images}
mock_resp.raise_for_status = MagicMock()
return mock_resp
class TestGetApiKey:
def test_returns_config_key_when_present(self):
with patch("deerflow.community.serper.tools.get_app_config") as mock:
@@ -51,7 +58,7 @@ class TestGetApiKey:
from deerflow.community.serper.tools import _get_api_key
assert _get_api_key() == "from-config"
assert _get_api_key("web_search") == "from-config"
def test_falls_back_to_env_when_config_key_empty(self):
with patch("deerflow.community.serper.tools.get_app_config") as mock:
@@ -61,7 +68,7 @@ class TestGetApiKey:
with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}):
from deerflow.community.serper.tools import _get_api_key
assert _get_api_key() == "env-key"
assert _get_api_key("web_search") == "env-key"
def test_falls_back_to_env_when_config_key_whitespace(self):
with patch("deerflow.community.serper.tools.get_app_config") as mock:
@@ -71,7 +78,7 @@ class TestGetApiKey:
with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}):
from deerflow.community.serper.tools import _get_api_key
assert _get_api_key() == "env-key"
assert _get_api_key("web_search") == "env-key"
def test_falls_back_to_env_when_config_key_null(self):
with patch("deerflow.community.serper.tools.get_app_config") as mock:
@@ -81,7 +88,7 @@ class TestGetApiKey:
with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}):
from deerflow.community.serper.tools import _get_api_key
assert _get_api_key() == "env-key"
assert _get_api_key("web_search") == "env-key"
def test_falls_back_to_env_when_no_config(self):
with patch("deerflow.community.serper.tools.get_app_config") as mock:
@@ -89,7 +96,7 @@ class TestGetApiKey:
with patch.dict("os.environ", {"SERPER_API_KEY": "env-only"}):
from deerflow.community.serper.tools import _get_api_key
assert _get_api_key() == "env-only"
assert _get_api_key("web_search") == "env-only"
def test_returns_none_when_no_key_anywhere(self):
with patch("deerflow.community.serper.tools.get_app_config") as mock:
@@ -100,7 +107,236 @@ class TestGetApiKey:
os.environ.pop("SERPER_API_KEY", None)
from deerflow.community.serper.tools import _get_api_key
assert _get_api_key() is None
assert _get_api_key("web_search") is None
def test_returns_none_when_env_key_whitespace(self):
with patch("deerflow.community.serper.tools.get_app_config") as mock:
mock.return_value.get_tool_config.return_value = None
with patch.dict("os.environ", {"SERPER_API_KEY": " "}):
from deerflow.community.serper.tools import _get_api_key
assert _get_api_key("web_search") is None
def test_reads_config_for_requested_tool_name(self):
with patch("deerflow.community.serper.tools.get_app_config") as mock:
tool_config = MagicMock()
tool_config.model_extra = {"api_key": "image-key"}
mock.return_value.get_tool_config.return_value = tool_config
from deerflow.community.serper.tools import _get_api_key
assert _get_api_key("image_search") == "image-key"
mock.return_value.get_tool_config.assert_called_with("image_search")
class TestCoerceMaxResults:
def test_returns_value_when_valid_positive_int(self):
from deerflow.community.serper.tools import _coerce_max_results
assert _coerce_max_results(3) == 3
def test_returns_value_for_numeric_string(self):
from deerflow.community.serper.tools import _coerce_max_results
assert _coerce_max_results("7") == 7
def test_caps_value_at_default_maximum(self):
from deerflow.community.serper.tools import _coerce_max_results
assert _coerce_max_results(999) == 10
def test_respects_custom_maximum(self):
from deerflow.community.serper.tools import _coerce_max_results
assert _coerce_max_results(999, max_allowed=3) == 3
def test_returns_default_for_non_numeric_string(self):
from deerflow.community.serper.tools import _coerce_max_results
assert _coerce_max_results("oops") == 5
def test_returns_default_for_none(self):
from deerflow.community.serper.tools import _coerce_max_results
assert _coerce_max_results(None) == 5
def test_returns_default_for_non_coercible_object(self):
from deerflow.community.serper.tools import _coerce_max_results
assert _coerce_max_results(object()) == 5
def test_returns_default_for_zero(self):
from deerflow.community.serper.tools import _coerce_max_results
assert _coerce_max_results(0) == 5
def test_returns_default_for_negative(self):
from deerflow.community.serper.tools import _coerce_max_results
assert _coerce_max_results(-3) == 5
def test_respects_custom_default(self):
from deerflow.community.serper.tools import _coerce_max_results
assert _coerce_max_results("bad", default=2) == 2
class TestMissingKeyError:
def test_warns_once_per_tool_name(self, caplog):
import logging
import deerflow.community.serper.tools as serper_mod
with caplog.at_level(logging.WARNING):
serper_mod._missing_key_error("q1", "web_search")
serper_mod._missing_key_error("q2", "web_search")
warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
assert len(warnings) == 1
assert "web_search" in warnings[0].getMessage()
def test_warns_separately_for_each_tool(self, caplog):
import logging
import deerflow.community.serper.tools as serper_mod
with caplog.at_level(logging.WARNING):
serper_mod._missing_key_error("q1", "web_search")
serper_mod._missing_key_error("q2", "image_search")
warned_tools = {r.getMessage() for r in caplog.records if r.levelno == logging.WARNING}
assert any("web_search" in m for m in warned_tools)
assert any("image_search" in m for m in warned_tools)
def test_returns_structured_error_json(self):
import deerflow.community.serper.tools as serper_mod
parsed = json.loads(serper_mod._missing_key_error("hello", "web_search"))
assert parsed["error"] == "SERPER_API_KEY is not configured"
assert parsed["query"] == "hello"
class TestSafePublicUrl:
def test_https_public_hostname_passes(self):
from deerflow.community.serper.tools import _safe_public_url
assert _safe_public_url("https://example.com/i.jpg") == "https://example.com/i.jpg"
def test_public_ip_literal_passes(self):
from deerflow.community.serper.tools import _safe_public_url
assert _safe_public_url("https://8.8.8.8/i.jpg") == "https://8.8.8.8/i.jpg"
def test_localhost_is_filtered(self):
from deerflow.community.serper.tools import _safe_public_url
assert _safe_public_url("http://localhost/x.jpg") == ""
def test_localhost_subdomain_is_filtered(self):
from deerflow.community.serper.tools import _safe_public_url
assert _safe_public_url("http://foo.localhost/x.jpg") == ""
def test_trailing_dot_localhost_is_filtered(self):
from deerflow.community.serper.tools import _safe_public_url
# FQDN root label: localhost. still resolves to loopback.
assert _safe_public_url("http://localhost./x.jpg") == ""
def test_trailing_dot_loopback_ip_is_filtered(self):
from deerflow.community.serper.tools import _safe_public_url
assert _safe_public_url("http://127.0.0.1./x.jpg") == ""
def test_trailing_dot_private_ip_is_filtered(self):
from deerflow.community.serper.tools import _safe_public_url
assert _safe_public_url("http://10.0.0.1./x.jpg") == ""
def test_trailing_dot_public_host_passes(self):
from deerflow.community.serper.tools import _safe_public_url
# A trailing dot on a public host is harmless and must not be rejected.
assert _safe_public_url("https://example.com./i.jpg") == "https://example.com./i.jpg"
def test_private_ip_is_filtered(self):
from deerflow.community.serper.tools import _safe_public_url
assert _safe_public_url("http://10.0.0.1/x.jpg") == ""
def test_ipv4_mapped_ipv6_loopback_is_filtered(self):
from deerflow.community.serper.tools import _safe_public_url
assert _safe_public_url("http://[::ffff:127.0.0.1]/x.jpg") == ""
def test_non_http_scheme_is_filtered(self):
from deerflow.community.serper.tools import _safe_public_url
assert _safe_public_url("file:///etc/passwd") == ""
def test_non_string_is_filtered(self):
from deerflow.community.serper.tools import _safe_public_url
assert _safe_public_url(None) == ""
def test_decimal_encoded_loopback_is_filtered(self):
from deerflow.community.serper.tools import _safe_public_url
# 2130706433 == 127.0.0.1
assert _safe_public_url("http://2130706433/x.jpg") == ""
def test_hex_encoded_loopback_is_filtered(self):
from deerflow.community.serper.tools import _safe_public_url
# 0x7f000001 == 127.0.0.1
assert _safe_public_url("http://0x7f000001/x.jpg") == ""
def test_octal_encoded_loopback_is_filtered(self):
from deerflow.community.serper.tools import _safe_public_url
# 0177.0.0.1 == 127.0.0.1
assert _safe_public_url("http://0177.0.0.1/x.jpg") == ""
def test_decimal_encoded_private_ip_is_filtered(self):
from deerflow.community.serper.tools import _safe_public_url
# 167772161 == 10.0.0.1
assert _safe_public_url("http://167772161/x.jpg") == ""
def test_decimal_encoded_public_ip_passes(self):
from deerflow.community.serper.tools import _safe_public_url
# 134744072 == 8.8.8.8
assert _safe_public_url("http://134744072/i.jpg") == "http://134744072/i.jpg"
def test_domain_with_hex_chars_is_not_treated_as_ip(self):
from deerflow.community.serper.tools import _safe_public_url
assert _safe_public_url("https://cafe.com/i.jpg") == "https://cafe.com/i.jpg"
def test_out_of_range_octet_is_not_treated_as_ip(self):
from deerflow.community.serper.tools import _safe_public_url
# 999.1.1.1 is not a valid IPv4 literal; treat as a hostname, not blocked.
assert _safe_public_url("https://999.1.1.1/i.jpg") == "https://999.1.1.1/i.jpg"
def test_too_many_octets_is_not_treated_as_ip(self):
from deerflow.community.serper.tools import _safe_public_url
# More than 4 dotted parts cannot be an IPv4 literal; treat as hostname.
assert _safe_public_url("https://1.2.3.4.5/i.jpg") == "https://1.2.3.4.5/i.jpg"
def test_empty_octet_is_not_treated_as_ip(self):
from deerflow.community.serper.tools import _safe_public_url
# Empty dotted part (e.g. trailing/leading dot) cannot decode to an IP.
assert _safe_public_url("https://1.2..3/i.jpg") == "https://1.2..3/i.jpg"
def test_trailing_octet_out_of_range_is_not_treated_as_ip(self):
from deerflow.community.serper.tools import _safe_public_url
# Leading octets are valid but the trailing block exceeds its range.
assert _safe_public_url("https://1.2.3.999/i.jpg") == "https://1.2.3.999/i.jpg"
class TestWebSearchTool:
@@ -144,6 +380,47 @@ class TestWebSearchTool:
assert parsed["total_results"] == 3
assert len(parsed["results"]) == 3
def test_invalid_config_max_results_falls_back_to_default(self, mock_config_with_key):
mock_config_with_key.return_value.get_tool_config.return_value.model_extra = {
"api_key": "test-key",
"max_results": "oops",
}
organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(10)]
mock_resp = _make_serper_response(organic)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_post = mock_client_cls.return_value.__enter__.return_value.post
mock_post.return_value = mock_resp
from deerflow.community.serper.tools import web_search_tool
result = web_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["total_results"] == 5
assert mock_post.call_args.kwargs["json"]["num"] == 5
def test_config_max_results_is_capped(self, mock_config_with_key):
mock_config_with_key.return_value.get_tool_config.return_value.model_extra = {
"api_key": "test-key",
"max_results": 999,
}
organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(20)]
mock_resp = _make_serper_response(organic)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_post = mock_client_cls.return_value.__enter__.return_value.post
mock_post.return_value = mock_resp
from deerflow.community.serper.tools import web_search_tool
result = web_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["total_results"] == 10
assert len(parsed["results"]) == 10
assert mock_post.call_args.kwargs["json"]["num"] == 10
def test_max_results_parameter_accepted(self, mock_config_no_key):
"""Tool accepts max_results as a call parameter when config does not override it."""
organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(10)]
@@ -254,6 +531,23 @@ class TestWebSearchTool:
assert "error" in parsed
def test_http_status_error_from_response_returns_structured_error(self, mock_config_with_key):
mock_error_response = MagicMock()
mock_error_response.status_code = 403
mock_error_response.text = "Forbidden"
mock_error_response.raise_for_status.side_effect = httpx.HTTPStatusError("403", request=MagicMock(), response=mock_error_response)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_error_response
from deerflow.community.serper.tools import web_search_tool
result = web_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert "error" in parsed
assert "403" in parsed["error"]
def test_sends_correct_headers_and_payload(self, mock_config_with_key):
organic = [{"title": "T", "link": "https://x.com", "snippet": "S"}]
mock_resp = _make_serper_response(organic)
@@ -306,3 +600,610 @@ class TestWebSearchTool:
parsed = json.loads(result)
assert parsed["results"][0] == {"title": "", "url": "", "content": ""}
def test_malformed_json_response_returns_error(self, mock_config_with_key):
mock_resp = MagicMock()
mock_resp.json.side_effect = json.JSONDecodeError(" Expecting value", "doc", 0)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import web_search_tool
result = web_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert "error" in parsed
def test_non_dict_json_response_returns_error(self, mock_config_with_key):
"""A valid but non-dict payload (e.g. a list) must not crash the tool."""
mock_resp = MagicMock()
mock_resp.json.return_value = ["unexpected", "list"]
mock_resp.raise_for_status = MagicMock()
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import web_search_tool
result = web_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert "error" in parsed
assert parsed["query"] == "test"
def test_non_list_organic_returns_error(self, mock_config_with_key):
mock_resp = MagicMock()
mock_resp.json.return_value = {"organic": {"unexpected": "dict"}}
mock_resp.raise_for_status = MagicMock()
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import web_search_tool
result = web_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["error"] == "Serper returned an unexpected response format"
def test_null_organic_field_is_treated_as_no_results(self, mock_config_with_key):
"""A null-typed field (some APIs use it for "no results") is not a format error."""
mock_resp = MagicMock()
mock_resp.json.return_value = {"organic": None}
mock_resp.raise_for_status = MagicMock()
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import web_search_tool
result = web_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["error"] == "No results found"
def test_non_dict_organic_items_are_ignored(self, mock_config_with_key):
mock_resp = _make_serper_response(["bad", {"title": "T", "link": "https://x.com", "snippet": "S"}])
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import web_search_tool
result = web_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["total_results"] == 1
assert parsed["results"][0]["title"] == "T"
def test_timeout_returns_error(self, mock_config_with_key):
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.side_effect = httpx.TimeoutException("Read timed out")
from deerflow.community.serper.tools import web_search_tool
result = web_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert "error" in parsed
assert "timed out" in parsed["error"].lower()
def test_long_query_is_truncated(self, mock_config_with_key):
organic = [{"title": "T", "link": "https://x.com", "snippet": "S"}]
mock_resp = _make_serper_response(organic)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_post = mock_client_cls.return_value.__enter__.return_value.post
mock_post.return_value = mock_resp
from deerflow.community.serper.tools import web_search_tool
long_query = "a" * 1000
web_search_tool.invoke({"query": long_query})
payload = mock_post.call_args.kwargs["json"]
assert payload["q"] == "a" * 500
def test_query_is_stripped(self, mock_config_with_key):
organic = [{"title": "T", "link": "https://x.com", "snippet": "S"}]
mock_resp = _make_serper_response(organic)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_post = mock_client_cls.return_value.__enter__.return_value.post
mock_post.return_value = mock_resp
from deerflow.community.serper.tools import web_search_tool
web_search_tool.invoke({"query": " hello world "})
payload = mock_post.call_args.kwargs["json"]
assert payload["q"] == "hello world"
class TestImageSearchTool:
def test_basic_search_returns_normalized_results(self, mock_config_with_key):
images = [
{
"title": "Cat 1",
"imageUrl": "https://example.com/cat1.jpg",
"thumbnailUrl": "https://example.com/cat1_thumb.jpg",
},
{
"title": "Cat 2",
"imageUrl": "https://example.com/cat2.jpg",
"thumbnailUrl": "https://example.com/cat2_thumb.jpg",
},
]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "cat photo"})
parsed = json.loads(result)
assert parsed["query"] == "cat photo"
assert parsed["total_results"] == 2
assert parsed["results"][0]["title"] == "Cat 1"
assert parsed["results"][0]["image_url"] == "https://example.com/cat1.jpg"
assert parsed["results"][0]["thumbnail_url"] == "https://example.com/cat1_thumb.jpg"
assert parsed["usage_hint"] == "Use the 'image_url' values as reference images in image generation. Download them first if needed."
def test_sends_correct_headers_and_payload_to_images_endpoint(self, mock_config_with_key):
images = [{"title": "T", "imageUrl": "https://x.com/i.jpg", "thumbnailUrl": "https://x.com/t.jpg"}]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_post = mock_client_cls.return_value.__enter__.return_value.post
mock_post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
image_search_tool.invoke({"query": "hello world"})
call_args = mock_post.call_args
endpoint = call_args.args[0]
headers = call_args.kwargs["headers"]
payload = call_args.kwargs["json"]
assert endpoint == "https://google.serper.dev/images"
assert headers["X-API-KEY"] == "test-serper-key"
assert payload["q"] == "hello world"
assert payload["num"] == 5
def test_image_url_falls_back_to_thumbnail(self, mock_config_with_key):
images = [{"title": "Only thumb", "thumbnailUrl": "https://x.com/thumb.jpg"}]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["results"][0]["image_url"] == "https://x.com/thumb.jpg"
assert parsed["results"][0]["thumbnail_url"] == "https://x.com/thumb.jpg"
def test_thumbnail_url_falls_back_to_image(self, mock_config_with_key):
images = [{"title": "Only image", "imageUrl": "https://x.com/full.jpg"}]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["results"][0]["image_url"] == "https://x.com/full.jpg"
assert parsed["results"][0]["thumbnail_url"] == "https://x.com/full.jpg"
def test_filtered_image_url_does_not_collapse_onto_thumbnail(self, mock_config_with_key):
"""A present-but-unsafe imageUrl must not be replaced by the safe thumbnail."""
images = [{"title": "T", "imageUrl": "http://10.0.0.1/full.jpg", "thumbnailUrl": "https://example.com/t.jpg"}]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
# The high-res field stays empty rather than masquerading as the preview.
assert parsed["results"][0]["image_url"] == ""
assert parsed["results"][0]["thumbnail_url"] == "https://example.com/t.jpg"
def test_filtered_thumbnail_does_not_collapse_onto_image(self, mock_config_with_key):
"""A present-but-unsafe thumbnailUrl must not be replaced by the safe image."""
images = [{"title": "T", "imageUrl": "https://example.com/full.jpg", "thumbnailUrl": "http://127.0.0.1/t.jpg"}]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["results"][0]["image_url"] == "https://example.com/full.jpg"
assert parsed["results"][0]["thumbnail_url"] == ""
def test_respects_max_results_from_config(self, mock_config_with_key):
mock_config_with_key.return_value.get_tool_config.return_value.model_extra = {
"api_key": "test-key",
"max_results": 3,
}
images = [{"title": f"I{i}", "imageUrl": f"https://x.com/{i}.jpg"} for i in range(10)]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["total_results"] == 3
assert len(parsed["results"]) == 3
def test_config_max_results_is_capped(self, mock_config_with_key):
mock_config_with_key.return_value.get_tool_config.return_value.model_extra = {
"api_key": "test-key",
"max_results": 999,
}
images = [{"title": f"I{i}", "imageUrl": f"https://x.com/{i}.jpg"} for i in range(20)]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_post = mock_client_cls.return_value.__enter__.return_value.post
mock_post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["total_results"] == 10
assert len(parsed["results"]) == 10
assert mock_post.call_args.kwargs["json"]["num"] == 10
def test_empty_images_returns_error_json(self, mock_config_with_key):
mock_resp = _make_serper_images_response([])
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "no results"})
parsed = json.loads(result)
assert "error" in parsed
assert parsed["error"] == "No images found"
assert parsed["query"] == "no results"
def test_missing_api_key_returns_error_json(self, mock_config_no_key):
with patch.dict("os.environ", {}, clear=True):
import os
os.environ.pop("SERPER_API_KEY", None)
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert "error" in parsed
assert "SERPER_API_KEY" in parsed["error"]
def test_http_error_returns_structured_error(self, mock_config_with_key):
mock_error_response = MagicMock()
mock_error_response.status_code = 403
mock_error_response.text = "Forbidden"
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.side_effect = httpx.HTTPStatusError("403", request=MagicMock(), response=mock_error_response)
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert "error" in parsed
assert "403" in parsed["error"]
def test_network_exception_returns_error_json(self, mock_config_with_key):
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.side_effect = Exception("timeout")
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert "error" in parsed
def test_uses_env_key_when_config_absent(self):
with patch("deerflow.community.serper.tools.get_app_config") as mock:
mock.return_value.get_tool_config.return_value = None
with patch.dict("os.environ", {"SERPER_API_KEY": "env-only-key"}):
images = [{"title": "T", "imageUrl": "https://x.com/i.jpg"}]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_post = mock_client_cls.return_value.__enter__.return_value.post
mock_post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
image_search_tool.invoke({"query": "env key test"})
headers = mock_post.call_args.kwargs["headers"]
assert headers["X-API-KEY"] == "env-only-key"
def test_max_results_parameter_accepted(self, mock_config_no_key):
"""Tool accepts max_results as a call parameter when config does not override it."""
images = [{"title": f"I{i}", "imageUrl": f"https://x.com/{i}.jpg"} for i in range(10)]
mock_resp = _make_serper_images_response(images)
with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}):
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test", "max_results": 2})
parsed = json.loads(result)
assert parsed["total_results"] == 2
def test_config_max_results_overrides_parameter(self):
"""Config max_results overrides the parameter passed at call time."""
with patch("deerflow.community.serper.tools.get_app_config") as mock:
tool_config = MagicMock()
tool_config.model_extra = {"api_key": "test-key", "max_results": 3}
mock.return_value.get_tool_config.return_value = tool_config
images = [{"title": f"I{i}", "imageUrl": f"https://x.com/{i}.jpg"} for i in range(10)]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test", "max_results": 8})
parsed = json.loads(result)
assert parsed["total_results"] == 3
def test_missing_api_key_logs_warning_once(self, mock_config_no_key, caplog):
import logging
with patch.dict("os.environ", {}, clear=True):
import os
os.environ.pop("SERPER_API_KEY", None)
from deerflow.community.serper.tools import image_search_tool
with caplog.at_level(logging.WARNING, logger="deerflow.community.serper.tools"):
image_search_tool.invoke({"query": "q1"})
image_search_tool.invoke({"query": "q2"})
warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
assert len(warnings) == 1
def test_malformed_json_response_returns_error(self, mock_config_with_key):
mock_resp = MagicMock()
mock_resp.json.side_effect = json.JSONDecodeError(" Expecting value", "doc", 0)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert "error" in parsed
def test_non_dict_json_response_returns_error(self, mock_config_with_key):
"""A valid but non-dict payload (e.g. a list) must not crash the tool."""
mock_resp = MagicMock()
mock_resp.json.return_value = ["unexpected", "list"]
mock_resp.raise_for_status = MagicMock()
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert "error" in parsed
assert parsed["query"] == "test"
def test_non_list_images_returns_error(self, mock_config_with_key):
mock_resp = MagicMock()
mock_resp.json.return_value = {"images": {"unexpected": "dict"}}
mock_resp.raise_for_status = MagicMock()
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["error"] == "Serper returned an unexpected response format"
def test_null_images_field_is_treated_as_no_results(self, mock_config_with_key):
"""A null-typed images field is "no images", not a malformed payload."""
mock_resp = MagicMock()
mock_resp.json.return_value = {"images": None}
mock_resp.raise_for_status = MagicMock()
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["error"] == "No images found"
def test_non_dict_image_items_are_ignored(self, mock_config_with_key):
images = ["bad", {"title": "T", "imageUrl": "https://x.com/i.jpg"}]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["total_results"] == 1
assert parsed["results"][0]["image_url"] == "https://x.com/i.jpg"
def test_timeout_returns_error(self, mock_config_with_key):
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.side_effect = httpx.TimeoutException("Read timed out")
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert "error" in parsed
assert "timed out" in parsed["error"].lower()
def test_long_query_is_truncated(self, mock_config_with_key):
images = [{"title": "T", "imageUrl": "https://x.com/i.jpg"}]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_post = mock_client_cls.return_value.__enter__.return_value.post
mock_post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
long_query = "a" * 1000
image_search_tool.invoke({"query": long_query})
payload = mock_post.call_args.kwargs["json"]
assert payload["q"] == "a" * 500
def test_query_is_stripped(self, mock_config_with_key):
images = [{"title": "T", "imageUrl": "https://x.com/i.jpg"}]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_post = mock_client_cls.return_value.__enter__.return_value.post
mock_post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
image_search_tool.invoke({"query": " cat photo "})
payload = mock_post.call_args.kwargs["json"]
assert payload["q"] == "cat photo"
def test_partial_fields_in_image_result_returns_error(self, mock_config_with_key):
"""Missing image URLs should not be reported as usable results."""
images = [{}]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["error"] == "No safe image URLs found"
assert parsed["query"] == "test"
def test_unsafe_image_urls_are_filtered(self, mock_config_with_key):
images = [
{"title": "Local", "imageUrl": "file:///etc/passwd", "thumbnailUrl": "http://127.0.0.1/thumb.jpg"},
{"title": "Data", "imageUrl": "data:image/png;base64,abc", "thumbnailUrl": "http://10.0.0.1/thumb.jpg"},
{"title": "Safe", "imageUrl": "https://example.com/i.jpg", "thumbnailUrl": "http://example.com/t.jpg"},
]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["total_results"] == 1
assert parsed["results"][0]["title"] == "Safe"
assert parsed["results"][0]["image_url"] == "https://example.com/i.jpg"
assert parsed["results"][0]["thumbnail_url"] == "http://example.com/t.jpg"
def test_all_unsafe_image_urls_return_error(self, mock_config_with_key):
images = [
{"title": "Local", "imageUrl": "file:///etc/passwd", "thumbnailUrl": "http://127.0.0.1/thumb.jpg"},
{"title": "Private", "imageUrl": "http://10.0.0.1/image.jpg", "thumbnailUrl": "data:image/png;base64,abc"},
]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["error"] == "No safe image URLs found"
assert parsed["query"] == "test"
def test_unsafe_image_urls_do_not_consume_result_limit(self, mock_config_with_key):
mock_config_with_key.return_value.get_tool_config.return_value.model_extra = {
"api_key": "test-key",
"max_results": 1,
}
images = [
{"title": "Unsafe", "imageUrl": "file:///etc/passwd", "thumbnailUrl": "http://127.0.0.1/thumb.jpg"},
{"title": "Safe", "imageUrl": "https://example.com/i.jpg", "thumbnailUrl": "https://example.com/t.jpg"},
]
mock_resp = _make_serper_images_response(images)
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
from deerflow.community.serper.tools import image_search_tool
result = image_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed["total_results"] == 1
assert parsed["results"][0]["title"] == "Safe"
def test_package_exports_image_search_tool():
from deerflow.community.serper import image_search_tool
from deerflow.community.serper.tools import image_search_tool as direct_image_search_tool
assert image_search_tool is direct_image_search_tool
+14 -4
View File
@@ -522,13 +522,13 @@ tools:
# Web search tool (uses Serper - Google Search API, requires SERPER_API_KEY)
# Serper provides real-time Google Search results. Sign up at https://serper.dev
# Note: set SERPER_API_KEY in your environment before starting the app, or
# uncomment and fill in api_key below (the $VAR syntax is resolved at startup).
# Note: set SERPER_API_KEY in your environment before starting the app.
# Avoid putting literal API keys in config.yaml; use the $VAR form instead.
# - name: web_search
# group: web
# use: deerflow.community.serper.tools:web_search_tool
# max_results: 5
# # api_key: $SERPER_API_KEY # Optional if SERPER_API_KEY env var is set
# max_results: 5 # capped at 10 by the Serper provider
# # api_key: $SERPER_API_KEY # Optional explicit env-var reference
# Web search tool (uses Brave Search API, requires BRAVE_SEARCH_API_KEY)
# Brave Search returns results from an independent index. Sign up at
@@ -637,6 +637,16 @@ tools:
# # Image size filter. Options: "l" (large), "m" (medium), "i" (icon).
# image_size: "i"
# Image search tool (uses Serper - Google Images API, requires SERPER_API_KEY)
# Serper provides real-time Google Images results. Sign up at https://serper.dev
# Note: set SERPER_API_KEY in your environment before starting the app.
# Avoid putting literal API keys in config.yaml; use the $VAR form instead.
# - name: image_search
# group: web
# use: deerflow.community.serper.tools:image_search_tool
# max_results: 5 # capped at 10 by the Serper provider
# # api_key: $SERPER_API_KEY # Optional explicit env-var reference
# File operations tools
- name: ls
group: file:read
+21 -3
View File
@@ -212,13 +212,31 @@ tools:
### Image search
<Tabs items={["DuckDuckGo (default)", "InfoQuest", "Serper"]}>
<Tabs.Tab>
```yaml
tools:
- use: deerflow.community.image_search.tools:image_search_tool
# Or use InfoQuest:
# - use: deerflow.community.infoquest.tools:image_search_tool
# api_key: $INFOQUEST_API_KEY
```
No API key required. Default configuration, good for development and general use.
</Tabs.Tab>
<Tabs.Tab>
```yaml
tools:
- use: deerflow.community.infoquest.tools:image_search_tool
api_key: $INFOQUEST_API_KEY
```
Requires an InfoQuest API key.
</Tabs.Tab>
<Tabs.Tab>
```yaml
tools:
- use: deerflow.community.serper.tools:image_search_tool
api_key: $SERPER_API_KEY
```
Google Images results via Serper. Requires a [Serper](https://serper.dev) API key. Reuses `SERPER_API_KEY` with the Serper `web_search` tool.
</Tabs.Tab>
</Tabs>
## Tool groups
+21
View File
@@ -191,10 +191,31 @@ tools:
### 图像搜索
<Tabs items={["DuckDuckGo(默认)", "InfoQuest", "Serper"]}>
<Tabs.Tab>
```yaml
tools:
- use: deerflow.community.image_search.tools:image_search_tool
```
无需 API Key。默认配置,适合开发和通用用途。
</Tabs.Tab>
<Tabs.Tab>
```yaml
tools:
- use: deerflow.community.infoquest.tools:image_search_tool
api_key: $INFOQUEST_API_KEY
```
需要 InfoQuest API Key。
</Tabs.Tab>
<Tabs.Tab>
```yaml
tools:
- use: deerflow.community.serper.tools:image_search_tool
api_key: $SERPER_API_KEY
```
通过 Serper 获取 Google 图片结果。需要 [Serper](https://serper.dev) API Key,与 Serper `web_search` 工具复用同一个 `SERPER_API_KEY`。
</Tabs.Tab>
</Tabs>
## 工具组
+51 -32
View File
@@ -103,6 +103,7 @@ def _split_use_path(use: str) -> tuple[str, str] | None:
# Check result container
# ---------------------------------------------------------------------------
class CheckResult:
def __init__(
self,
@@ -129,6 +130,7 @@ class CheckResult:
# Individual checks
# ---------------------------------------------------------------------------
def check_python() -> CheckResult:
v = sys.version_info
version_str = f"{v.major}.{v.minor}.{v.micro}"
@@ -198,11 +200,7 @@ def check_nginx() -> CheckResult:
return CheckResult(
"nginx",
"fail",
fix=(
"macOS: brew install nginx\n"
"Ubuntu: sudo apt install nginx\n"
"Windows: use WSL or Docker mode"
),
fix=("macOS: brew install nginx\nUbuntu: sudo apt install nginx\nWindows: use WSL or Docker mode"),
)
@@ -404,11 +402,7 @@ def check_llm_auth(config_path: Path) -> list[CheckResult]:
)
if use == "deerflow.models.claude_provider:ClaudeChatModel":
credential_paths = [
Path(os.environ["CLAUDE_CODE_CREDENTIALS_PATH"]).expanduser()
for env_name in ("CLAUDE_CODE_CREDENTIALS_PATH",)
if os.environ.get(env_name)
]
credential_paths = [Path(os.environ["CLAUDE_CODE_CREDENTIALS_PATH"]).expanduser() for env_name in ("CLAUDE_CODE_CREDENTIALS_PATH",) if os.environ.get(env_name)]
credential_paths.append(Path("~/.claude/.credentials.json").expanduser())
has_oauth_env = any(
os.environ.get(name)
@@ -428,10 +422,7 @@ def check_llm_auth(config_path: Path) -> list[CheckResult]:
CheckResult(
f"Claude auth available (model: {model_name})",
"fail",
fix=(
"Set ANTHROPIC_API_KEY / CLAUDE_CODE_OAUTH_TOKEN, "
"or place credentials at ~/.claude/.credentials.json"
),
fix=("Set ANTHROPIC_API_KEY / CLAUDE_CODE_OAUTH_TOKEN, or place credentials at ~/.claude/.credentials.json"),
)
)
except Exception as exc:
@@ -458,7 +449,6 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes
data = _load_yaml_file(config_path)
tool_entries = [t for t in data.get("tools", []) if t.get("name") == tool_name]
tool_uses = [t.get("use", "") for t in tool_entries]
if not tool_entries:
return CheckResult(
label,
@@ -470,6 +460,7 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes
free_providers = {
"web_search": {"ddg_search": "DuckDuckGo (no key needed)"},
"web_fetch": {"jina_ai": "Jina AI Reader (no key needed)"},
"image_search": {"deerflow.community.image_search.tools": "DuckDuckGo Images (no key needed)"},
}
key_providers = {
"web_search": {
@@ -478,35 +469,57 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes
"exa": "EXA_API_KEY",
"firecrawl": "FIRECRAWL_API_KEY",
"brave": "BRAVE_SEARCH_API_KEY",
"serper": "SERPER_API_KEY",
},
"web_fetch": {
"infoquest": "INFOQUEST_API_KEY",
"exa": "EXA_API_KEY",
"firecrawl": "FIRECRAWL_API_KEY",
},
"image_search": {
"infoquest": "INFOQUEST_API_KEY",
"serper": "SERPER_API_KEY",
},
}
for tool_entry in tool_entries:
use = tool_entry.get("use", "")
def _configured_key_detail(tool: dict, default_var: str) -> tuple[Status, str] | None:
api_key = tool.get("api_key")
if isinstance(api_key, str) and api_key.strip():
key = api_key.strip()
if key.startswith("$"):
env_name = key[1:]
val = os.environ.get(env_name)
if val and val.strip():
return ("ok", f"{env_name} set from config")
# The referenced var is unset; fall through to the default
# env var below, which tools use as a runtime fallback.
else:
return ("warn", "literal api_key set in config")
val = os.environ.get(default_var)
return ("ok", f"{default_var} set") if val and val.strip() else None
for tool in tool_entries:
use = tool.get("use", "")
for provider, detail in free_providers.get(tool_name, {}).items():
if provider in use:
return CheckResult(label, "ok", detail)
for tool_entry in tool_entries:
use = tool_entry.get("use", "")
for tool in tool_entries:
use = tool.get("use", "")
for provider, var in key_providers.get(tool_name, {}).items():
if provider in use:
configured_key = tool_entry.get("api_key")
if isinstance(configured_key, str) and configured_key.strip():
if configured_key.startswith("$"):
ref_var = configured_key[1:]
if os.environ.get(ref_var):
return CheckResult(label, "ok", f"{provider} ({ref_var} set via api_key)")
else:
return CheckResult(label, "ok", f"{provider} (api_key configured)")
val = os.environ.get(var)
if val:
return CheckResult(label, "ok", f"{provider} ({var} set)")
key_status = _configured_key_detail(tool, var)
if key_status:
status, detail = key_status
if status == "warn":
return CheckResult(
label,
"warn",
f"{provider} ({detail})",
fix=f"Move the API key to .env as {var}=<your-key> and reference it as ${var}",
)
return CheckResult(label, "ok", f"{provider} ({detail})")
return CheckResult(
label,
"warn",
@@ -514,7 +527,8 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes
fix=f"Add {var}=<your-key> to .env, or run 'make setup'",
)
for use in tool_uses:
for tool in tool_entries:
use = tool.get("use", "")
split = _split_use_path(use)
if split is None:
return CheckResult(
@@ -544,6 +558,10 @@ def check_web_fetch(config_path: Path) -> CheckResult:
return check_web_tool(config_path, tool_name="web_fetch", label="web fetch configured")
def check_image_search(config_path: Path) -> CheckResult:
return check_web_tool(config_path, tool_name="image_search", label="image search configured")
def check_frontend_env(project_root: Path) -> CheckResult:
env_path = project_root / "frontend" / ".env"
if env_path.exists():
@@ -641,6 +659,7 @@ def check_env_file(project_root: Path) -> CheckResult:
# Main
# ---------------------------------------------------------------------------
def main() -> int:
project_root = Path(__file__).resolve().parents[1]
config_path = project_root / "config.yaml"
@@ -691,7 +710,7 @@ def main() -> int:
sections.append(("LLM Provider", llm_checks))
# ── Web Capabilities ─────────────────────────────────────────────────────
search_checks = [check_web_search(config_path), check_web_fetch(config_path)]
search_checks = [check_web_search(config_path), check_web_fetch(config_path), check_image_search(config_path)]
sections.append(("Web Capabilities", search_checks))
# ── Sandbox ──────────────────────────────────────────────────────────────