mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-18 05:25:57 +00:00
feat(community): add Serper Google Images provider for image_search (#3575)
* feat(community): add Serper Google Images provider for image_search Add a Serper-backed `image_search` tool alongside the existing Serper `web_search` provider, so users with a SERPER_API_KEY can pull Google Images results as reference images for downstream image generation. - Share request/response handling between web_search and image_search via `_serper_post` / `_response_items`, with bounded `max_results` (capped at 10) and query normalization. - Add a best-effort SSRF guard (`_safe_public_url`) that rejects non-http(s), localhost and private/non-global IP image URLs; filtered entries are dropped and never consume the result limit. - doctor: flag literal `api_key` values in config as a warning and steer users toward `.env` + `$SERPER_API_KEY`. - Docs/config: document the Serper image_search provider and SERPER_API_KEY, and discourage committing literal keys to config.yaml. - Tests: cover the provider end-to-end (100% line coverage on tools.py) and the doctor literal-key warning path. * fix(community): block obfuscated IPv4 literals in Serper image SSRF guard The image_search SSRF guard only rejected dotted-decimal IP literals; encoded forms such as decimal (http://2130706433/), hex (0x7f000001) and octal (0177.0.0.1) raised ValueError in ip_address() and were allowed through, even though many HTTP clients resolve them to private addresses like 127.0.0.1. Add _decode_ipv4() to permissively decode these inet_aton-style encodings and apply the same is_global check; hostnames that do not decode to an IP (e.g. cafe.com) are still treated as hosts and left to fetch-time re-validation. Addresses PR review feedback. Tests cover decimal/hex/octal loopback and private encodings plus non-IP edge cases; tools.py stays at 100% line coverage. * test(community): cover IPv4-mapped IPv6 URL filtering * fix(community): address Serper image search review feedback - Block trailing-dot hostname SSRF bypass (localhost./127.0.0.1.) in _safe_public_url by stripping the FQDN root label before checks. - Keep a filtered image/thumbnail URL empty instead of collapsing onto its counterpart, preserving the high-res/preview contract. - Evaluate the SSRF guard once per field rather than twice. - Treat a null-typed organic/images field as "no results" rather than a malformed payload. - doctor.py: when a config $VAR is unset, fall through to the default env var before reporting it as not set.
This commit is contained in:
@@ -236,6 +236,7 @@ tools:
|
||||
**Built-in Tools**:
|
||||
- `web_search` - Search the web (DuckDuckGo, Tavily, Brave, Exa, InfoQuest, Firecrawl)
|
||||
- `web_fetch` - Fetch web pages (Jina AI, Exa, InfoQuest, Firecrawl)
|
||||
- `image_search` - Search for reference images (DuckDuckGo, InfoQuest, Serper)
|
||||
- `ls` - List directory contents
|
||||
- `read_file` - Read file contents
|
||||
- `write_file` - Write file contents
|
||||
@@ -414,6 +415,7 @@ models:
|
||||
- `NOVITA_API_KEY` - Novita API key (OpenAI-compatible endpoint)
|
||||
- `TAVILY_API_KEY` - Tavily search API key
|
||||
- `BRAVE_SEARCH_API_KEY` - Brave Search API key
|
||||
- `SERPER_API_KEY` - Serper (Google Search/Images API) key for `web_search` and `image_search`
|
||||
- `DEER_FLOW_PROJECT_ROOT` - Project root for relative runtime paths
|
||||
- `DEER_FLOW_CONFIG_PATH` - Custom config file path
|
||||
- `DEER_FLOW_EXTENSIONS_CONFIG_PATH` - Custom extensions config file path
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
from .tools import web_search_tool
|
||||
from .tools import image_search_tool, web_search_tool
|
||||
|
||||
__all__ = ["web_search_tool"]
|
||||
__all__ = ["image_search_tool", "web_search_tool"]
|
||||
|
||||
@@ -1,13 +1,15 @@
|
||||
"""
|
||||
Web Search Tool - Search the web using Serper (Google Search API).
|
||||
Web and image search tools powered by Serper (Google Search API).
|
||||
|
||||
Serper provides real-time Google Search results via a JSON API.
|
||||
An API key is required. Sign up at https://serper.dev to get one.
|
||||
Serper provides real-time Google Search and Google Images results via a JSON
|
||||
API. An API key is required. Sign up at https://serper.dev to get one.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from ipaddress import IPv4Address, ip_address
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import httpx
|
||||
from langchain.tools import tool
|
||||
@@ -16,43 +18,168 @@ from deerflow.config import get_app_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_SERPER_ENDPOINT = "https://google.serper.dev/search"
|
||||
_api_key_warned = False
|
||||
_SERPER_SEARCH_ENDPOINT = "https://google.serper.dev/search"
|
||||
_SERPER_IMAGES_ENDPOINT = "https://google.serper.dev/images"
|
||||
_SERPER_MAX_RESULTS = 10
|
||||
_api_key_warned: set[str] = set()
|
||||
|
||||
|
||||
def _get_api_key() -> str | None:
|
||||
config = get_app_config().get_tool_config("web_search")
|
||||
def _get_api_key(tool_name: str) -> str | None:
|
||||
config = get_app_config().get_tool_config(tool_name)
|
||||
if config is not None:
|
||||
api_key = config.model_extra.get("api_key")
|
||||
if isinstance(api_key, str) and api_key.strip():
|
||||
return api_key
|
||||
return os.getenv("SERPER_API_KEY")
|
||||
return api_key.strip()
|
||||
env_key = os.getenv("SERPER_API_KEY")
|
||||
if isinstance(env_key, str) and env_key.strip():
|
||||
return env_key.strip()
|
||||
return None
|
||||
|
||||
|
||||
@tool("web_search", parse_docstring=True)
|
||||
def web_search_tool(query: str, max_results: int = 5) -> str:
|
||||
"""Search the web for information using Google Search via Serper.
|
||||
def _coerce_max_results(value: object, default: int = 5, max_allowed: int = _SERPER_MAX_RESULTS) -> int:
|
||||
"""Coerce config/parameter input into a bounded positive result count."""
|
||||
try:
|
||||
count = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return default
|
||||
if count <= 0:
|
||||
return default
|
||||
return min(count, max_allowed)
|
||||
|
||||
Args:
|
||||
query: Search keywords describing what you want to find. Be specific for better results.
|
||||
max_results: Maximum number of search results to return. Default is 5.
|
||||
|
||||
def _missing_key_error(query: str, tool_name: str) -> str:
|
||||
if tool_name not in _api_key_warned:
|
||||
_api_key_warned.add(tool_name)
|
||||
logger.warning("Serper API key is not set for '%s'. Set SERPER_API_KEY in your environment or provide api_key in config.yaml. Sign up at https://serper.dev", tool_name)
|
||||
return json.dumps(
|
||||
{"error": "SERPER_API_KEY is not configured", "query": query},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
|
||||
def _unexpected_format_error(query: str) -> str:
|
||||
return json.dumps(
|
||||
{"error": "Serper returned an unexpected response format", "query": query},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
|
||||
|
||||
def _response_items(data: dict, field: str, query: str) -> tuple[list[dict] | None, str | None]:
|
||||
items = data.get(field)
|
||||
# Treat a missing or null field as "no results" (some APIs return
|
||||
# ``{"organic": null}`` to signal that) rather than a malformed payload.
|
||||
if items is None:
|
||||
return [], None
|
||||
if not isinstance(items, list):
|
||||
logger.error("Serper returned unexpected '%s' payload type: %s", field, type(items).__name__)
|
||||
return None, _unexpected_format_error(query)
|
||||
return [item for item in items if isinstance(item, dict)], None
|
||||
|
||||
|
||||
def _clean_query(query: str) -> str:
|
||||
"""Normalize a raw query into the value actually sent to Serper."""
|
||||
query = query.strip()
|
||||
if len(query) > 500:
|
||||
query = query[:500]
|
||||
return query
|
||||
|
||||
|
||||
def _decode_ipv4(host: str) -> IPv4Address | None:
|
||||
"""Decode obfuscated IPv4 literals that ``ip_address`` rejects.
|
||||
|
||||
Mirrors the permissive ``inet_aton`` parsing many HTTP clients use, so that
|
||||
integer (``2130706433``), hex (``0x7f000001``) and octal (``0177.0.0.1``)
|
||||
encodings of an address are recognized. Returns an ``IPv4Address`` when the
|
||||
host decodes to one, otherwise ``None`` (e.g. real domains like
|
||||
``cafe.com`` fail to decode and are left for the caller to treat as a host).
|
||||
"""
|
||||
global _api_key_warned
|
||||
parts = host.split(".")
|
||||
if not 1 <= len(parts) <= 4:
|
||||
return None
|
||||
|
||||
config = get_app_config().get_tool_config("web_search")
|
||||
if config is not None and "max_results" in config.model_extra:
|
||||
max_results = config.model_extra.get("max_results", max_results)
|
||||
values: list[int] = []
|
||||
for part in parts:
|
||||
if not part:
|
||||
return None
|
||||
try:
|
||||
if part.startswith(("0x", "0X")):
|
||||
values.append(int(part, 16))
|
||||
elif part.startswith("0") and len(part) > 1:
|
||||
values.append(int(part, 8))
|
||||
else:
|
||||
values.append(int(part, 10))
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
api_key = _get_api_key()
|
||||
if not api_key:
|
||||
if not _api_key_warned:
|
||||
_api_key_warned = True
|
||||
logger.warning("Serper API key is not set. Set SERPER_API_KEY in your environment or provide api_key in config.yaml. Sign up at https://serper.dev")
|
||||
return json.dumps(
|
||||
{"error": "SERPER_API_KEY is not configured", "query": query},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
*leading, last = values
|
||||
for value in leading:
|
||||
if not 0 <= value <= 0xFF:
|
||||
return None
|
||||
max_last = (1 << (8 * (4 - len(leading)))) - 1
|
||||
if not 0 <= last <= max_last:
|
||||
return None
|
||||
|
||||
result = 0
|
||||
for value in leading:
|
||||
result = (result << 8) | value
|
||||
result = (result << (8 * (4 - len(leading)))) | last
|
||||
return ip_address(result)
|
||||
|
||||
|
||||
def _is_url_present(value: object) -> bool:
|
||||
"""Return ``True`` when *value* is a non-empty URL string.
|
||||
|
||||
Used to distinguish a field that was *absent* (eligible for cross-field
|
||||
fallback) from one that was *present but filtered* by the SSRF guard (which
|
||||
must stay empty rather than collapse onto its counterpart).
|
||||
"""
|
||||
return isinstance(value, str) and bool(value.strip())
|
||||
|
||||
|
||||
def _safe_public_url(value: object) -> str:
|
||||
"""Return ``value`` only if it is a safe, public http(s) URL, else "".
|
||||
|
||||
This is a best-effort SSRF guard that rejects non-http(s) schemes,
|
||||
``localhost``, and private/non-global IP literals (including obfuscated
|
||||
decimal/hex/octal encodings). It only inspects the URL string and cannot
|
||||
catch public hostnames that resolve to internal IPs (e.g. DNS rebinding);
|
||||
any consumer that actually downloads these URLs must re-validate the
|
||||
resolved IP at fetch time.
|
||||
"""
|
||||
if not isinstance(value, str):
|
||||
return ""
|
||||
url = value.strip()
|
||||
parsed = urlparse(url)
|
||||
if parsed.scheme not in {"http", "https"} or not parsed.netloc or not parsed.hostname:
|
||||
return ""
|
||||
|
||||
# Strip a single trailing dot (FQDN root label). ``localhost.`` and
|
||||
# ``127.0.0.1.`` resolve to loopback on common resolvers but would
|
||||
# otherwise slip past the localhost/IP checks below.
|
||||
host = parsed.hostname.lower().rstrip(".")
|
||||
if not host:
|
||||
return ""
|
||||
if host == "localhost" or host.endswith(".localhost"):
|
||||
return ""
|
||||
|
||||
try:
|
||||
ip = ip_address(host)
|
||||
except ValueError:
|
||||
ip = _decode_ipv4(host)
|
||||
if ip is None:
|
||||
return url
|
||||
return url if ip.is_global else ""
|
||||
|
||||
|
||||
def _serper_post(endpoint: str, api_key: str, query: str, max_results: int) -> tuple[dict | None, str | None]:
|
||||
"""Send a POST request to a Serper endpoint.
|
||||
|
||||
``query`` is expected to already be normalized via :func:`_clean_query`.
|
||||
|
||||
Returns a ``(data, error_json)`` tuple: on success ``data`` is the parsed
|
||||
JSON response and ``error_json`` is ``None``; on failure ``data`` is ``None``
|
||||
and ``error_json`` is a serialized structured error ready to return.
|
||||
"""
|
||||
headers = {
|
||||
"X-API-KEY": api_key,
|
||||
"Content-Type": "application/json",
|
||||
@@ -61,23 +188,56 @@ def web_search_tool(query: str, max_results: int = 5) -> str:
|
||||
|
||||
try:
|
||||
with httpx.Client(timeout=30) as client:
|
||||
response = client.post(_SERPER_ENDPOINT, headers=headers, json=payload)
|
||||
response = client.post(endpoint, headers=headers, json=payload)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
if not isinstance(data, dict):
|
||||
logger.error("Serper returned an unexpected payload type: %s", type(data).__name__)
|
||||
return None, _unexpected_format_error(query)
|
||||
return data, None
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"Serper API returned HTTP {e.response.status_code}: {e.response.text}")
|
||||
return json.dumps(
|
||||
resp_text = (e.response.text or "")[:500]
|
||||
logger.error("Serper API returned HTTP %s: %s", e.response.status_code, resp_text)
|
||||
return None, json.dumps(
|
||||
{"error": f"Serper API error: HTTP {e.response.status_code}", "query": query},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Serper search failed: {type(e).__name__}: {e}")
|
||||
return json.dumps({"error": str(e), "query": query}, ensure_ascii=False)
|
||||
logger.error("Serper request failed: %s: %s", type(e).__name__, str(e)[:500])
|
||||
return None, json.dumps({"error": str(e)[:500], "query": query}, ensure_ascii=False)
|
||||
|
||||
organic = data.get("organic", [])
|
||||
|
||||
@tool("web_search", parse_docstring=True)
|
||||
def web_search_tool(query: str, max_results: int = 5) -> str:
|
||||
"""Search the web for information using Google Search via Serper.
|
||||
|
||||
Args:
|
||||
query: Search keywords describing what you want to find. Be specific for better results.
|
||||
max_results: Maximum number of search results to return. Default is 5, capped at 10.
|
||||
"""
|
||||
config = get_app_config().get_tool_config("web_search")
|
||||
if config is not None and "max_results" in config.model_extra:
|
||||
max_results = config.model_extra.get("max_results", max_results)
|
||||
max_results = _coerce_max_results(max_results)
|
||||
query = _clean_query(query)
|
||||
|
||||
api_key = _get_api_key("web_search")
|
||||
if not api_key:
|
||||
return _missing_key_error(query, "web_search")
|
||||
|
||||
data, error_json = _serper_post(_SERPER_SEARCH_ENDPOINT, api_key, query, max_results)
|
||||
if error_json is not None:
|
||||
return error_json
|
||||
|
||||
organic, error_json = _response_items(data, "organic", query)
|
||||
if error_json is not None:
|
||||
return error_json
|
||||
if not organic:
|
||||
return json.dumps({"error": "No results found", "query": query}, ensure_ascii=False)
|
||||
|
||||
# Search result links are returned verbatim (not passed through
|
||||
# _safe_public_url): they are surfaced as citations for the model to read,
|
||||
# not fetched/downloaded by this tool, unlike image_search image URLs.
|
||||
normalized_results = [
|
||||
{
|
||||
"title": r.get("title", ""),
|
||||
@@ -93,3 +253,71 @@ def web_search_tool(query: str, max_results: int = 5) -> str:
|
||||
"results": normalized_results,
|
||||
}
|
||||
return json.dumps(output, indent=2, ensure_ascii=False)
|
||||
|
||||
|
||||
@tool("image_search", parse_docstring=True)
|
||||
def image_search_tool(query: str, max_results: int = 5) -> str:
|
||||
"""Search for images online using Google Images via Serper. Use this tool BEFORE image generation to find reference images for characters, portraits, objects, scenes, or any content requiring visual accuracy.
|
||||
|
||||
The returned image URLs can be used as reference images in image generation to significantly improve quality.
|
||||
|
||||
Args:
|
||||
query: Search keywords describing the images you want to find. Be specific for better results (e.g., "Japanese woman street photography 1990s" instead of just "woman").
|
||||
max_results: Maximum number of images to return. Default is 5, capped at 10.
|
||||
"""
|
||||
config = get_app_config().get_tool_config("image_search")
|
||||
if config is not None and "max_results" in config.model_extra:
|
||||
max_results = config.model_extra.get("max_results", max_results)
|
||||
max_results = _coerce_max_results(max_results)
|
||||
query = _clean_query(query)
|
||||
|
||||
api_key = _get_api_key("image_search")
|
||||
if not api_key:
|
||||
return _missing_key_error(query, "image_search")
|
||||
|
||||
data, error_json = _serper_post(_SERPER_IMAGES_ENDPOINT, api_key, query, max_results)
|
||||
if error_json is not None:
|
||||
return error_json
|
||||
|
||||
images, error_json = _response_items(data, "images", query)
|
||||
if error_json is not None:
|
||||
return error_json
|
||||
if not images:
|
||||
return json.dumps({"error": "No images found", "query": query}, ensure_ascii=False)
|
||||
|
||||
normalized_results = []
|
||||
for r in images:
|
||||
raw_image = r.get("imageUrl")
|
||||
raw_thumb = r.get("thumbnailUrl")
|
||||
# Evaluate the (non-trivial) SSRF guard once per field instead of twice.
|
||||
safe_image = _safe_public_url(raw_image)
|
||||
safe_thumb = _safe_public_url(raw_thumb)
|
||||
# Cross-fall back only when the other field was *absent*. A field that
|
||||
# was present but failed the SSRF filter is left empty rather than
|
||||
# collapsed onto its counterpart, so a dropped high-res URL never
|
||||
# silently masquerades as the preview (and vice versa), preserving the
|
||||
# high-res/preview contract callers rely on.
|
||||
image_url = safe_image or (safe_thumb if not _is_url_present(raw_image) else "")
|
||||
thumbnail_url = safe_thumb or (safe_image if not _is_url_present(raw_thumb) else "")
|
||||
if not image_url and not thumbnail_url:
|
||||
continue
|
||||
normalized_results.append(
|
||||
{
|
||||
"title": r.get("title", ""),
|
||||
"image_url": image_url,
|
||||
"thumbnail_url": thumbnail_url,
|
||||
}
|
||||
)
|
||||
if len(normalized_results) >= max_results:
|
||||
break
|
||||
|
||||
if not normalized_results:
|
||||
return json.dumps({"error": "No safe image URLs found", "query": query}, ensure_ascii=False)
|
||||
|
||||
output = {
|
||||
"query": query,
|
||||
"total_results": len(normalized_results),
|
||||
"results": normalized_results,
|
||||
"usage_hint": "Use the 'image_url' values as reference images in image generation. Download them first if needed.",
|
||||
}
|
||||
return json.dumps(output, indent=2, ensure_ascii=False)
|
||||
|
||||
@@ -214,13 +214,14 @@ class TestCheckWebSearch:
|
||||
assert result.fix is not None
|
||||
assert "BRAVE_SEARCH_API_KEY" in result.fix
|
||||
|
||||
def test_brave_with_inline_api_key_ok(self, tmp_path, monkeypatch):
|
||||
def test_brave_with_inline_api_key_warns(self, tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("BRAVE_SEARCH_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text('config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.brave.tools:web_search_tool\n api_key: "inline-key"\n')
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "api_key configured" in result.detail
|
||||
assert result.status == "warn"
|
||||
assert "literal api_key set in config" in result.detail
|
||||
assert "BRAVE_SEARCH_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_brave_with_api_key_env_ref_ok(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "bsa-test")
|
||||
@@ -228,7 +229,61 @@ class TestCheckWebSearch:
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.brave.tools:web_search_tool\n api_key: $BRAVE_SEARCH_API_KEY\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "api_key" in result.detail
|
||||
assert "BRAVE_SEARCH_API_KEY set from config" in result.detail
|
||||
|
||||
def test_serper_with_key_ok(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("SERPER_API_KEY", "test-key")
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "serper" in result.detail
|
||||
|
||||
def test_serper_without_key_warns(self, tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("SERPER_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert "SERPER_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_serper_inline_api_key_warns(self, tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("SERPER_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: inline-key\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert "literal api_key set in config" in result.detail
|
||||
assert "SERPER_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_serper_config_env_ref_ok(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("SERPER_API_KEY", "test-key")
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: $SERPER_API_KEY\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "SERPER_API_KEY set from config" in result.detail
|
||||
|
||||
def test_serper_unresolved_env_ref_falls_back_to_default_var(self, tmp_path, monkeypatch):
|
||||
# The referenced $VAR is unset, but the default SERPER_API_KEY is set,
|
||||
# which the tool uses as a runtime fallback; report ok rather than warn.
|
||||
monkeypatch.delenv("MY_CUSTOM_SERPER_KEY", raising=False)
|
||||
monkeypatch.setenv("SERPER_API_KEY", "test-key")
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: $MY_CUSTOM_SERPER_KEY\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "SERPER_API_KEY set" in result.detail
|
||||
|
||||
def test_serper_unresolved_env_ref_without_default_warns(self, tmp_path, monkeypatch):
|
||||
# Neither the referenced $VAR nor the default SERPER_API_KEY is set.
|
||||
monkeypatch.delenv("MY_CUSTOM_SERPER_KEY", raising=False)
|
||||
monkeypatch.delenv("SERPER_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: $MY_CUSTOM_SERPER_KEY\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert "SERPER_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_no_search_tool_warns(self, tmp_path):
|
||||
cfg = tmp_path / "config.yaml"
|
||||
@@ -284,6 +339,74 @@ class TestCheckWebFetch:
|
||||
assert result.status == "fail"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# check_image_search
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCheckImageSearch:
|
||||
def test_ddg_always_ok(self, tmp_path):
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.image_search.tools:image_search_tool\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "DuckDuckGo" in result.detail
|
||||
|
||||
def test_serper_with_key_ok(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("SERPER_API_KEY", "test-key")
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "serper" in result.detail
|
||||
|
||||
def test_serper_without_key_warns(self, tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("SERPER_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert "SERPER_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_serper_inline_api_key_warns(self, tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("SERPER_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n api_key: inline-key\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert "literal api_key set in config" in result.detail
|
||||
assert "SERPER_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_serper_config_env_ref_without_env_warns(self, tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("SERPER_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n api_key: $SERPER_API_KEY\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert "SERPER_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_infoquest_with_key_ok(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("INFOQUEST_API_KEY", "test-key")
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.infoquest.tools:image_search_tool\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "infoquest" in result.detail
|
||||
|
||||
def test_no_image_search_tool_warns(self, tmp_path):
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools: []\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert result.fix is not None
|
||||
|
||||
def test_invalid_provider_use_fails(self, tmp_path):
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.not_real.tools:image_search_tool\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "fail"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# check_env_file
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -12,9 +12,9 @@ def reset_api_key_warned():
|
||||
"""Reset the module-level warning flag before each test."""
|
||||
import deerflow.community.serper.tools as serper_mod
|
||||
|
||||
serper_mod._api_key_warned = False
|
||||
serper_mod._api_key_warned = set()
|
||||
yield
|
||||
serper_mod._api_key_warned = False
|
||||
serper_mod._api_key_warned = set()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -42,6 +42,13 @@ def _make_serper_response(organic: list) -> MagicMock:
|
||||
return mock_resp
|
||||
|
||||
|
||||
def _make_serper_images_response(images: list) -> MagicMock:
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.json.return_value = {"images": images}
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
return mock_resp
|
||||
|
||||
|
||||
class TestGetApiKey:
|
||||
def test_returns_config_key_when_present(self):
|
||||
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||
@@ -51,7 +58,7 @@ class TestGetApiKey:
|
||||
|
||||
from deerflow.community.serper.tools import _get_api_key
|
||||
|
||||
assert _get_api_key() == "from-config"
|
||||
assert _get_api_key("web_search") == "from-config"
|
||||
|
||||
def test_falls_back_to_env_when_config_key_empty(self):
|
||||
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||
@@ -61,7 +68,7 @@ class TestGetApiKey:
|
||||
with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}):
|
||||
from deerflow.community.serper.tools import _get_api_key
|
||||
|
||||
assert _get_api_key() == "env-key"
|
||||
assert _get_api_key("web_search") == "env-key"
|
||||
|
||||
def test_falls_back_to_env_when_config_key_whitespace(self):
|
||||
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||
@@ -71,7 +78,7 @@ class TestGetApiKey:
|
||||
with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}):
|
||||
from deerflow.community.serper.tools import _get_api_key
|
||||
|
||||
assert _get_api_key() == "env-key"
|
||||
assert _get_api_key("web_search") == "env-key"
|
||||
|
||||
def test_falls_back_to_env_when_config_key_null(self):
|
||||
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||
@@ -81,7 +88,7 @@ class TestGetApiKey:
|
||||
with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}):
|
||||
from deerflow.community.serper.tools import _get_api_key
|
||||
|
||||
assert _get_api_key() == "env-key"
|
||||
assert _get_api_key("web_search") == "env-key"
|
||||
|
||||
def test_falls_back_to_env_when_no_config(self):
|
||||
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||
@@ -89,7 +96,7 @@ class TestGetApiKey:
|
||||
with patch.dict("os.environ", {"SERPER_API_KEY": "env-only"}):
|
||||
from deerflow.community.serper.tools import _get_api_key
|
||||
|
||||
assert _get_api_key() == "env-only"
|
||||
assert _get_api_key("web_search") == "env-only"
|
||||
|
||||
def test_returns_none_when_no_key_anywhere(self):
|
||||
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||
@@ -100,7 +107,236 @@ class TestGetApiKey:
|
||||
os.environ.pop("SERPER_API_KEY", None)
|
||||
from deerflow.community.serper.tools import _get_api_key
|
||||
|
||||
assert _get_api_key() is None
|
||||
assert _get_api_key("web_search") is None
|
||||
|
||||
def test_returns_none_when_env_key_whitespace(self):
|
||||
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||
mock.return_value.get_tool_config.return_value = None
|
||||
with patch.dict("os.environ", {"SERPER_API_KEY": " "}):
|
||||
from deerflow.community.serper.tools import _get_api_key
|
||||
|
||||
assert _get_api_key("web_search") is None
|
||||
|
||||
def test_reads_config_for_requested_tool_name(self):
|
||||
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||
tool_config = MagicMock()
|
||||
tool_config.model_extra = {"api_key": "image-key"}
|
||||
mock.return_value.get_tool_config.return_value = tool_config
|
||||
|
||||
from deerflow.community.serper.tools import _get_api_key
|
||||
|
||||
assert _get_api_key("image_search") == "image-key"
|
||||
mock.return_value.get_tool_config.assert_called_with("image_search")
|
||||
|
||||
|
||||
class TestCoerceMaxResults:
|
||||
def test_returns_value_when_valid_positive_int(self):
|
||||
from deerflow.community.serper.tools import _coerce_max_results
|
||||
|
||||
assert _coerce_max_results(3) == 3
|
||||
|
||||
def test_returns_value_for_numeric_string(self):
|
||||
from deerflow.community.serper.tools import _coerce_max_results
|
||||
|
||||
assert _coerce_max_results("7") == 7
|
||||
|
||||
def test_caps_value_at_default_maximum(self):
|
||||
from deerflow.community.serper.tools import _coerce_max_results
|
||||
|
||||
assert _coerce_max_results(999) == 10
|
||||
|
||||
def test_respects_custom_maximum(self):
|
||||
from deerflow.community.serper.tools import _coerce_max_results
|
||||
|
||||
assert _coerce_max_results(999, max_allowed=3) == 3
|
||||
|
||||
def test_returns_default_for_non_numeric_string(self):
|
||||
from deerflow.community.serper.tools import _coerce_max_results
|
||||
|
||||
assert _coerce_max_results("oops") == 5
|
||||
|
||||
def test_returns_default_for_none(self):
|
||||
from deerflow.community.serper.tools import _coerce_max_results
|
||||
|
||||
assert _coerce_max_results(None) == 5
|
||||
|
||||
def test_returns_default_for_non_coercible_object(self):
|
||||
from deerflow.community.serper.tools import _coerce_max_results
|
||||
|
||||
assert _coerce_max_results(object()) == 5
|
||||
|
||||
def test_returns_default_for_zero(self):
|
||||
from deerflow.community.serper.tools import _coerce_max_results
|
||||
|
||||
assert _coerce_max_results(0) == 5
|
||||
|
||||
def test_returns_default_for_negative(self):
|
||||
from deerflow.community.serper.tools import _coerce_max_results
|
||||
|
||||
assert _coerce_max_results(-3) == 5
|
||||
|
||||
def test_respects_custom_default(self):
|
||||
from deerflow.community.serper.tools import _coerce_max_results
|
||||
|
||||
assert _coerce_max_results("bad", default=2) == 2
|
||||
|
||||
|
||||
class TestMissingKeyError:
|
||||
def test_warns_once_per_tool_name(self, caplog):
|
||||
import logging
|
||||
|
||||
import deerflow.community.serper.tools as serper_mod
|
||||
|
||||
with caplog.at_level(logging.WARNING):
|
||||
serper_mod._missing_key_error("q1", "web_search")
|
||||
serper_mod._missing_key_error("q2", "web_search")
|
||||
|
||||
warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
|
||||
assert len(warnings) == 1
|
||||
assert "web_search" in warnings[0].getMessage()
|
||||
|
||||
def test_warns_separately_for_each_tool(self, caplog):
|
||||
import logging
|
||||
|
||||
import deerflow.community.serper.tools as serper_mod
|
||||
|
||||
with caplog.at_level(logging.WARNING):
|
||||
serper_mod._missing_key_error("q1", "web_search")
|
||||
serper_mod._missing_key_error("q2", "image_search")
|
||||
|
||||
warned_tools = {r.getMessage() for r in caplog.records if r.levelno == logging.WARNING}
|
||||
assert any("web_search" in m for m in warned_tools)
|
||||
assert any("image_search" in m for m in warned_tools)
|
||||
|
||||
def test_returns_structured_error_json(self):
|
||||
import deerflow.community.serper.tools as serper_mod
|
||||
|
||||
parsed = json.loads(serper_mod._missing_key_error("hello", "web_search"))
|
||||
assert parsed["error"] == "SERPER_API_KEY is not configured"
|
||||
assert parsed["query"] == "hello"
|
||||
|
||||
|
||||
class TestSafePublicUrl:
|
||||
def test_https_public_hostname_passes(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
assert _safe_public_url("https://example.com/i.jpg") == "https://example.com/i.jpg"
|
||||
|
||||
def test_public_ip_literal_passes(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
assert _safe_public_url("https://8.8.8.8/i.jpg") == "https://8.8.8.8/i.jpg"
|
||||
|
||||
def test_localhost_is_filtered(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
assert _safe_public_url("http://localhost/x.jpg") == ""
|
||||
|
||||
def test_localhost_subdomain_is_filtered(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
assert _safe_public_url("http://foo.localhost/x.jpg") == ""
|
||||
|
||||
def test_trailing_dot_localhost_is_filtered(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
# FQDN root label: localhost. still resolves to loopback.
|
||||
assert _safe_public_url("http://localhost./x.jpg") == ""
|
||||
|
||||
def test_trailing_dot_loopback_ip_is_filtered(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
assert _safe_public_url("http://127.0.0.1./x.jpg") == ""
|
||||
|
||||
def test_trailing_dot_private_ip_is_filtered(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
assert _safe_public_url("http://10.0.0.1./x.jpg") == ""
|
||||
|
||||
def test_trailing_dot_public_host_passes(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
# A trailing dot on a public host is harmless and must not be rejected.
|
||||
assert _safe_public_url("https://example.com./i.jpg") == "https://example.com./i.jpg"
|
||||
|
||||
def test_private_ip_is_filtered(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
assert _safe_public_url("http://10.0.0.1/x.jpg") == ""
|
||||
|
||||
def test_ipv4_mapped_ipv6_loopback_is_filtered(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
assert _safe_public_url("http://[::ffff:127.0.0.1]/x.jpg") == ""
|
||||
|
||||
def test_non_http_scheme_is_filtered(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
assert _safe_public_url("file:///etc/passwd") == ""
|
||||
|
||||
def test_non_string_is_filtered(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
assert _safe_public_url(None) == ""
|
||||
|
||||
def test_decimal_encoded_loopback_is_filtered(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
# 2130706433 == 127.0.0.1
|
||||
assert _safe_public_url("http://2130706433/x.jpg") == ""
|
||||
|
||||
def test_hex_encoded_loopback_is_filtered(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
# 0x7f000001 == 127.0.0.1
|
||||
assert _safe_public_url("http://0x7f000001/x.jpg") == ""
|
||||
|
||||
def test_octal_encoded_loopback_is_filtered(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
# 0177.0.0.1 == 127.0.0.1
|
||||
assert _safe_public_url("http://0177.0.0.1/x.jpg") == ""
|
||||
|
||||
def test_decimal_encoded_private_ip_is_filtered(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
# 167772161 == 10.0.0.1
|
||||
assert _safe_public_url("http://167772161/x.jpg") == ""
|
||||
|
||||
def test_decimal_encoded_public_ip_passes(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
# 134744072 == 8.8.8.8
|
||||
assert _safe_public_url("http://134744072/i.jpg") == "http://134744072/i.jpg"
|
||||
|
||||
def test_domain_with_hex_chars_is_not_treated_as_ip(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
assert _safe_public_url("https://cafe.com/i.jpg") == "https://cafe.com/i.jpg"
|
||||
|
||||
def test_out_of_range_octet_is_not_treated_as_ip(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
# 999.1.1.1 is not a valid IPv4 literal; treat as a hostname, not blocked.
|
||||
assert _safe_public_url("https://999.1.1.1/i.jpg") == "https://999.1.1.1/i.jpg"
|
||||
|
||||
def test_too_many_octets_is_not_treated_as_ip(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
# More than 4 dotted parts cannot be an IPv4 literal; treat as hostname.
|
||||
assert _safe_public_url("https://1.2.3.4.5/i.jpg") == "https://1.2.3.4.5/i.jpg"
|
||||
|
||||
def test_empty_octet_is_not_treated_as_ip(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
# Empty dotted part (e.g. trailing/leading dot) cannot decode to an IP.
|
||||
assert _safe_public_url("https://1.2..3/i.jpg") == "https://1.2..3/i.jpg"
|
||||
|
||||
def test_trailing_octet_out_of_range_is_not_treated_as_ip(self):
|
||||
from deerflow.community.serper.tools import _safe_public_url
|
||||
|
||||
# Leading octets are valid but the trailing block exceeds its range.
|
||||
assert _safe_public_url("https://1.2.3.999/i.jpg") == "https://1.2.3.999/i.jpg"
|
||||
|
||||
|
||||
class TestWebSearchTool:
|
||||
@@ -144,6 +380,47 @@ class TestWebSearchTool:
|
||||
assert parsed["total_results"] == 3
|
||||
assert len(parsed["results"]) == 3
|
||||
|
||||
def test_invalid_config_max_results_falls_back_to_default(self, mock_config_with_key):
|
||||
mock_config_with_key.return_value.get_tool_config.return_value.model_extra = {
|
||||
"api_key": "test-key",
|
||||
"max_results": "oops",
|
||||
}
|
||||
organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(10)]
|
||||
mock_resp = _make_serper_response(organic)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_post = mock_client_cls.return_value.__enter__.return_value.post
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["total_results"] == 5
|
||||
assert mock_post.call_args.kwargs["json"]["num"] == 5
|
||||
|
||||
def test_config_max_results_is_capped(self, mock_config_with_key):
|
||||
mock_config_with_key.return_value.get_tool_config.return_value.model_extra = {
|
||||
"api_key": "test-key",
|
||||
"max_results": 999,
|
||||
}
|
||||
organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(20)]
|
||||
mock_resp = _make_serper_response(organic)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_post = mock_client_cls.return_value.__enter__.return_value.post
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["total_results"] == 10
|
||||
assert len(parsed["results"]) == 10
|
||||
assert mock_post.call_args.kwargs["json"]["num"] == 10
|
||||
|
||||
def test_max_results_parameter_accepted(self, mock_config_no_key):
|
||||
"""Tool accepts max_results as a call parameter when config does not override it."""
|
||||
organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(10)]
|
||||
@@ -254,6 +531,23 @@ class TestWebSearchTool:
|
||||
|
||||
assert "error" in parsed
|
||||
|
||||
def test_http_status_error_from_response_returns_structured_error(self, mock_config_with_key):
|
||||
mock_error_response = MagicMock()
|
||||
mock_error_response.status_code = 403
|
||||
mock_error_response.text = "Forbidden"
|
||||
mock_error_response.raise_for_status.side_effect = httpx.HTTPStatusError("403", request=MagicMock(), response=mock_error_response)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_error_response
|
||||
|
||||
from deerflow.community.serper.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert "error" in parsed
|
||||
assert "403" in parsed["error"]
|
||||
|
||||
def test_sends_correct_headers_and_payload(self, mock_config_with_key):
|
||||
organic = [{"title": "T", "link": "https://x.com", "snippet": "S"}]
|
||||
mock_resp = _make_serper_response(organic)
|
||||
@@ -306,3 +600,610 @@ class TestWebSearchTool:
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["results"][0] == {"title": "", "url": "", "content": ""}
|
||||
|
||||
def test_malformed_json_response_returns_error(self, mock_config_with_key):
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.json.side_effect = json.JSONDecodeError(" Expecting value", "doc", 0)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert "error" in parsed
|
||||
|
||||
def test_non_dict_json_response_returns_error(self, mock_config_with_key):
|
||||
"""A valid but non-dict payload (e.g. a list) must not crash the tool."""
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.json.return_value = ["unexpected", "list"]
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert "error" in parsed
|
||||
assert parsed["query"] == "test"
|
||||
|
||||
def test_non_list_organic_returns_error(self, mock_config_with_key):
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.json.return_value = {"organic": {"unexpected": "dict"}}
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["error"] == "Serper returned an unexpected response format"
|
||||
|
||||
def test_null_organic_field_is_treated_as_no_results(self, mock_config_with_key):
|
||||
"""A null-typed field (some APIs use it for "no results") is not a format error."""
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.json.return_value = {"organic": None}
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["error"] == "No results found"
|
||||
|
||||
def test_non_dict_organic_items_are_ignored(self, mock_config_with_key):
|
||||
mock_resp = _make_serper_response(["bad", {"title": "T", "link": "https://x.com", "snippet": "S"}])
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["total_results"] == 1
|
||||
assert parsed["results"][0]["title"] == "T"
|
||||
|
||||
def test_timeout_returns_error(self, mock_config_with_key):
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.side_effect = httpx.TimeoutException("Read timed out")
|
||||
|
||||
from deerflow.community.serper.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert "error" in parsed
|
||||
assert "timed out" in parsed["error"].lower()
|
||||
|
||||
def test_long_query_is_truncated(self, mock_config_with_key):
|
||||
organic = [{"title": "T", "link": "https://x.com", "snippet": "S"}]
|
||||
mock_resp = _make_serper_response(organic)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_post = mock_client_cls.return_value.__enter__.return_value.post
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import web_search_tool
|
||||
|
||||
long_query = "a" * 1000
|
||||
web_search_tool.invoke({"query": long_query})
|
||||
payload = mock_post.call_args.kwargs["json"]
|
||||
|
||||
assert payload["q"] == "a" * 500
|
||||
|
||||
def test_query_is_stripped(self, mock_config_with_key):
|
||||
organic = [{"title": "T", "link": "https://x.com", "snippet": "S"}]
|
||||
mock_resp = _make_serper_response(organic)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_post = mock_client_cls.return_value.__enter__.return_value.post
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import web_search_tool
|
||||
|
||||
web_search_tool.invoke({"query": " hello world "})
|
||||
payload = mock_post.call_args.kwargs["json"]
|
||||
|
||||
assert payload["q"] == "hello world"
|
||||
|
||||
|
||||
class TestImageSearchTool:
|
||||
def test_basic_search_returns_normalized_results(self, mock_config_with_key):
|
||||
images = [
|
||||
{
|
||||
"title": "Cat 1",
|
||||
"imageUrl": "https://example.com/cat1.jpg",
|
||||
"thumbnailUrl": "https://example.com/cat1_thumb.jpg",
|
||||
},
|
||||
{
|
||||
"title": "Cat 2",
|
||||
"imageUrl": "https://example.com/cat2.jpg",
|
||||
"thumbnailUrl": "https://example.com/cat2_thumb.jpg",
|
||||
},
|
||||
]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "cat photo"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["query"] == "cat photo"
|
||||
assert parsed["total_results"] == 2
|
||||
assert parsed["results"][0]["title"] == "Cat 1"
|
||||
assert parsed["results"][0]["image_url"] == "https://example.com/cat1.jpg"
|
||||
assert parsed["results"][0]["thumbnail_url"] == "https://example.com/cat1_thumb.jpg"
|
||||
assert parsed["usage_hint"] == "Use the 'image_url' values as reference images in image generation. Download them first if needed."
|
||||
|
||||
def test_sends_correct_headers_and_payload_to_images_endpoint(self, mock_config_with_key):
|
||||
images = [{"title": "T", "imageUrl": "https://x.com/i.jpg", "thumbnailUrl": "https://x.com/t.jpg"}]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_post = mock_client_cls.return_value.__enter__.return_value.post
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
image_search_tool.invoke({"query": "hello world"})
|
||||
|
||||
call_args = mock_post.call_args
|
||||
endpoint = call_args.args[0]
|
||||
headers = call_args.kwargs["headers"]
|
||||
payload = call_args.kwargs["json"]
|
||||
|
||||
assert endpoint == "https://google.serper.dev/images"
|
||||
assert headers["X-API-KEY"] == "test-serper-key"
|
||||
assert payload["q"] == "hello world"
|
||||
assert payload["num"] == 5
|
||||
|
||||
def test_image_url_falls_back_to_thumbnail(self, mock_config_with_key):
|
||||
images = [{"title": "Only thumb", "thumbnailUrl": "https://x.com/thumb.jpg"}]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["results"][0]["image_url"] == "https://x.com/thumb.jpg"
|
||||
assert parsed["results"][0]["thumbnail_url"] == "https://x.com/thumb.jpg"
|
||||
|
||||
def test_thumbnail_url_falls_back_to_image(self, mock_config_with_key):
|
||||
images = [{"title": "Only image", "imageUrl": "https://x.com/full.jpg"}]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["results"][0]["image_url"] == "https://x.com/full.jpg"
|
||||
assert parsed["results"][0]["thumbnail_url"] == "https://x.com/full.jpg"
|
||||
|
||||
def test_filtered_image_url_does_not_collapse_onto_thumbnail(self, mock_config_with_key):
|
||||
"""A present-but-unsafe imageUrl must not be replaced by the safe thumbnail."""
|
||||
images = [{"title": "T", "imageUrl": "http://10.0.0.1/full.jpg", "thumbnailUrl": "https://example.com/t.jpg"}]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
# The high-res field stays empty rather than masquerading as the preview.
|
||||
assert parsed["results"][0]["image_url"] == ""
|
||||
assert parsed["results"][0]["thumbnail_url"] == "https://example.com/t.jpg"
|
||||
|
||||
def test_filtered_thumbnail_does_not_collapse_onto_image(self, mock_config_with_key):
|
||||
"""A present-but-unsafe thumbnailUrl must not be replaced by the safe image."""
|
||||
images = [{"title": "T", "imageUrl": "https://example.com/full.jpg", "thumbnailUrl": "http://127.0.0.1/t.jpg"}]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["results"][0]["image_url"] == "https://example.com/full.jpg"
|
||||
assert parsed["results"][0]["thumbnail_url"] == ""
|
||||
|
||||
def test_respects_max_results_from_config(self, mock_config_with_key):
|
||||
mock_config_with_key.return_value.get_tool_config.return_value.model_extra = {
|
||||
"api_key": "test-key",
|
||||
"max_results": 3,
|
||||
}
|
||||
images = [{"title": f"I{i}", "imageUrl": f"https://x.com/{i}.jpg"} for i in range(10)]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["total_results"] == 3
|
||||
assert len(parsed["results"]) == 3
|
||||
|
||||
def test_config_max_results_is_capped(self, mock_config_with_key):
|
||||
mock_config_with_key.return_value.get_tool_config.return_value.model_extra = {
|
||||
"api_key": "test-key",
|
||||
"max_results": 999,
|
||||
}
|
||||
images = [{"title": f"I{i}", "imageUrl": f"https://x.com/{i}.jpg"} for i in range(20)]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_post = mock_client_cls.return_value.__enter__.return_value.post
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["total_results"] == 10
|
||||
assert len(parsed["results"]) == 10
|
||||
assert mock_post.call_args.kwargs["json"]["num"] == 10
|
||||
|
||||
def test_empty_images_returns_error_json(self, mock_config_with_key):
|
||||
mock_resp = _make_serper_images_response([])
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "no results"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert "error" in parsed
|
||||
assert parsed["error"] == "No images found"
|
||||
assert parsed["query"] == "no results"
|
||||
|
||||
def test_missing_api_key_returns_error_json(self, mock_config_no_key):
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
import os
|
||||
|
||||
os.environ.pop("SERPER_API_KEY", None)
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert "error" in parsed
|
||||
assert "SERPER_API_KEY" in parsed["error"]
|
||||
|
||||
def test_http_error_returns_structured_error(self, mock_config_with_key):
|
||||
mock_error_response = MagicMock()
|
||||
mock_error_response.status_code = 403
|
||||
mock_error_response.text = "Forbidden"
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.side_effect = httpx.HTTPStatusError("403", request=MagicMock(), response=mock_error_response)
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert "error" in parsed
|
||||
assert "403" in parsed["error"]
|
||||
|
||||
def test_network_exception_returns_error_json(self, mock_config_with_key):
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.side_effect = Exception("timeout")
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert "error" in parsed
|
||||
|
||||
def test_uses_env_key_when_config_absent(self):
|
||||
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||
mock.return_value.get_tool_config.return_value = None
|
||||
with patch.dict("os.environ", {"SERPER_API_KEY": "env-only-key"}):
|
||||
images = [{"title": "T", "imageUrl": "https://x.com/i.jpg"}]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_post = mock_client_cls.return_value.__enter__.return_value.post
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
image_search_tool.invoke({"query": "env key test"})
|
||||
headers = mock_post.call_args.kwargs["headers"]
|
||||
|
||||
assert headers["X-API-KEY"] == "env-only-key"
|
||||
|
||||
def test_max_results_parameter_accepted(self, mock_config_no_key):
|
||||
"""Tool accepts max_results as a call parameter when config does not override it."""
|
||||
images = [{"title": f"I{i}", "imageUrl": f"https://x.com/{i}.jpg"} for i in range(10)]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}):
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test", "max_results": 2})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["total_results"] == 2
|
||||
|
||||
def test_config_max_results_overrides_parameter(self):
|
||||
"""Config max_results overrides the parameter passed at call time."""
|
||||
with patch("deerflow.community.serper.tools.get_app_config") as mock:
|
||||
tool_config = MagicMock()
|
||||
tool_config.model_extra = {"api_key": "test-key", "max_results": 3}
|
||||
mock.return_value.get_tool_config.return_value = tool_config
|
||||
|
||||
images = [{"title": f"I{i}", "imageUrl": f"https://x.com/{i}.jpg"} for i in range(10)]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test", "max_results": 8})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["total_results"] == 3
|
||||
|
||||
def test_missing_api_key_logs_warning_once(self, mock_config_no_key, caplog):
|
||||
import logging
|
||||
|
||||
with patch.dict("os.environ", {}, clear=True):
|
||||
import os
|
||||
|
||||
os.environ.pop("SERPER_API_KEY", None)
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="deerflow.community.serper.tools"):
|
||||
image_search_tool.invoke({"query": "q1"})
|
||||
image_search_tool.invoke({"query": "q2"})
|
||||
|
||||
warnings = [r for r in caplog.records if r.levelno == logging.WARNING]
|
||||
assert len(warnings) == 1
|
||||
|
||||
def test_malformed_json_response_returns_error(self, mock_config_with_key):
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.json.side_effect = json.JSONDecodeError(" Expecting value", "doc", 0)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert "error" in parsed
|
||||
|
||||
def test_non_dict_json_response_returns_error(self, mock_config_with_key):
|
||||
"""A valid but non-dict payload (e.g. a list) must not crash the tool."""
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.json.return_value = ["unexpected", "list"]
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert "error" in parsed
|
||||
assert parsed["query"] == "test"
|
||||
|
||||
def test_non_list_images_returns_error(self, mock_config_with_key):
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.json.return_value = {"images": {"unexpected": "dict"}}
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["error"] == "Serper returned an unexpected response format"
|
||||
|
||||
def test_null_images_field_is_treated_as_no_results(self, mock_config_with_key):
|
||||
"""A null-typed images field is "no images", not a malformed payload."""
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.json.return_value = {"images": None}
|
||||
mock_resp.raise_for_status = MagicMock()
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["error"] == "No images found"
|
||||
|
||||
def test_non_dict_image_items_are_ignored(self, mock_config_with_key):
|
||||
images = ["bad", {"title": "T", "imageUrl": "https://x.com/i.jpg"}]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["total_results"] == 1
|
||||
assert parsed["results"][0]["image_url"] == "https://x.com/i.jpg"
|
||||
|
||||
def test_timeout_returns_error(self, mock_config_with_key):
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.side_effect = httpx.TimeoutException("Read timed out")
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert "error" in parsed
|
||||
assert "timed out" in parsed["error"].lower()
|
||||
|
||||
def test_long_query_is_truncated(self, mock_config_with_key):
|
||||
images = [{"title": "T", "imageUrl": "https://x.com/i.jpg"}]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_post = mock_client_cls.return_value.__enter__.return_value.post
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
long_query = "a" * 1000
|
||||
image_search_tool.invoke({"query": long_query})
|
||||
payload = mock_post.call_args.kwargs["json"]
|
||||
|
||||
assert payload["q"] == "a" * 500
|
||||
|
||||
def test_query_is_stripped(self, mock_config_with_key):
|
||||
images = [{"title": "T", "imageUrl": "https://x.com/i.jpg"}]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_post = mock_client_cls.return_value.__enter__.return_value.post
|
||||
mock_post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
image_search_tool.invoke({"query": " cat photo "})
|
||||
payload = mock_post.call_args.kwargs["json"]
|
||||
|
||||
assert payload["q"] == "cat photo"
|
||||
|
||||
def test_partial_fields_in_image_result_returns_error(self, mock_config_with_key):
|
||||
"""Missing image URLs should not be reported as usable results."""
|
||||
images = [{}]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["error"] == "No safe image URLs found"
|
||||
assert parsed["query"] == "test"
|
||||
|
||||
def test_unsafe_image_urls_are_filtered(self, mock_config_with_key):
|
||||
images = [
|
||||
{"title": "Local", "imageUrl": "file:///etc/passwd", "thumbnailUrl": "http://127.0.0.1/thumb.jpg"},
|
||||
{"title": "Data", "imageUrl": "data:image/png;base64,abc", "thumbnailUrl": "http://10.0.0.1/thumb.jpg"},
|
||||
{"title": "Safe", "imageUrl": "https://example.com/i.jpg", "thumbnailUrl": "http://example.com/t.jpg"},
|
||||
]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["total_results"] == 1
|
||||
assert parsed["results"][0]["title"] == "Safe"
|
||||
assert parsed["results"][0]["image_url"] == "https://example.com/i.jpg"
|
||||
assert parsed["results"][0]["thumbnail_url"] == "http://example.com/t.jpg"
|
||||
|
||||
def test_all_unsafe_image_urls_return_error(self, mock_config_with_key):
|
||||
images = [
|
||||
{"title": "Local", "imageUrl": "file:///etc/passwd", "thumbnailUrl": "http://127.0.0.1/thumb.jpg"},
|
||||
{"title": "Private", "imageUrl": "http://10.0.0.1/image.jpg", "thumbnailUrl": "data:image/png;base64,abc"},
|
||||
]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["error"] == "No safe image URLs found"
|
||||
assert parsed["query"] == "test"
|
||||
|
||||
def test_unsafe_image_urls_do_not_consume_result_limit(self, mock_config_with_key):
|
||||
mock_config_with_key.return_value.get_tool_config.return_value.model_extra = {
|
||||
"api_key": "test-key",
|
||||
"max_results": 1,
|
||||
}
|
||||
images = [
|
||||
{"title": "Unsafe", "imageUrl": "file:///etc/passwd", "thumbnailUrl": "http://127.0.0.1/thumb.jpg"},
|
||||
{"title": "Safe", "imageUrl": "https://example.com/i.jpg", "thumbnailUrl": "https://example.com/t.jpg"},
|
||||
]
|
||||
mock_resp = _make_serper_images_response(images)
|
||||
|
||||
with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls:
|
||||
mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp
|
||||
|
||||
from deerflow.community.serper.tools import image_search_tool
|
||||
|
||||
result = image_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed["total_results"] == 1
|
||||
assert parsed["results"][0]["title"] == "Safe"
|
||||
|
||||
|
||||
def test_package_exports_image_search_tool():
|
||||
from deerflow.community.serper import image_search_tool
|
||||
from deerflow.community.serper.tools import image_search_tool as direct_image_search_tool
|
||||
|
||||
assert image_search_tool is direct_image_search_tool
|
||||
|
||||
+14
-4
@@ -522,13 +522,13 @@ tools:
|
||||
|
||||
# Web search tool (uses Serper - Google Search API, requires SERPER_API_KEY)
|
||||
# Serper provides real-time Google Search results. Sign up at https://serper.dev
|
||||
# Note: set SERPER_API_KEY in your environment before starting the app, or
|
||||
# uncomment and fill in api_key below (the $VAR syntax is resolved at startup).
|
||||
# Note: set SERPER_API_KEY in your environment before starting the app.
|
||||
# Avoid putting literal API keys in config.yaml; use the $VAR form instead.
|
||||
# - name: web_search
|
||||
# group: web
|
||||
# use: deerflow.community.serper.tools:web_search_tool
|
||||
# max_results: 5
|
||||
# # api_key: $SERPER_API_KEY # Optional if SERPER_API_KEY env var is set
|
||||
# max_results: 5 # capped at 10 by the Serper provider
|
||||
# # api_key: $SERPER_API_KEY # Optional explicit env-var reference
|
||||
|
||||
# Web search tool (uses Brave Search API, requires BRAVE_SEARCH_API_KEY)
|
||||
# Brave Search returns results from an independent index. Sign up at
|
||||
@@ -637,6 +637,16 @@ tools:
|
||||
# # Image size filter. Options: "l" (large), "m" (medium), "i" (icon).
|
||||
# image_size: "i"
|
||||
|
||||
# Image search tool (uses Serper - Google Images API, requires SERPER_API_KEY)
|
||||
# Serper provides real-time Google Images results. Sign up at https://serper.dev
|
||||
# Note: set SERPER_API_KEY in your environment before starting the app.
|
||||
# Avoid putting literal API keys in config.yaml; use the $VAR form instead.
|
||||
# - name: image_search
|
||||
# group: web
|
||||
# use: deerflow.community.serper.tools:image_search_tool
|
||||
# max_results: 5 # capped at 10 by the Serper provider
|
||||
# # api_key: $SERPER_API_KEY # Optional explicit env-var reference
|
||||
|
||||
# File operations tools
|
||||
- name: ls
|
||||
group: file:read
|
||||
|
||||
@@ -212,13 +212,31 @@ tools:
|
||||
|
||||
### Image search
|
||||
|
||||
<Tabs items={["DuckDuckGo (default)", "InfoQuest", "Serper"]}>
|
||||
<Tabs.Tab>
|
||||
```yaml
|
||||
tools:
|
||||
- use: deerflow.community.image_search.tools:image_search_tool
|
||||
# Or use InfoQuest:
|
||||
# - use: deerflow.community.infoquest.tools:image_search_tool
|
||||
# api_key: $INFOQUEST_API_KEY
|
||||
```
|
||||
No API key required. Default configuration, good for development and general use.
|
||||
</Tabs.Tab>
|
||||
<Tabs.Tab>
|
||||
```yaml
|
||||
tools:
|
||||
- use: deerflow.community.infoquest.tools:image_search_tool
|
||||
api_key: $INFOQUEST_API_KEY
|
||||
```
|
||||
Requires an InfoQuest API key.
|
||||
</Tabs.Tab>
|
||||
<Tabs.Tab>
|
||||
```yaml
|
||||
tools:
|
||||
- use: deerflow.community.serper.tools:image_search_tool
|
||||
api_key: $SERPER_API_KEY
|
||||
```
|
||||
Google Images results via Serper. Requires a [Serper](https://serper.dev) API key. Reuses `SERPER_API_KEY` with the Serper `web_search` tool.
|
||||
</Tabs.Tab>
|
||||
</Tabs>
|
||||
|
||||
## Tool groups
|
||||
|
||||
|
||||
@@ -191,10 +191,31 @@ tools:
|
||||
|
||||
### 图像搜索
|
||||
|
||||
<Tabs items={["DuckDuckGo(默认)", "InfoQuest", "Serper"]}>
|
||||
<Tabs.Tab>
|
||||
```yaml
|
||||
tools:
|
||||
- use: deerflow.community.image_search.tools:image_search_tool
|
||||
```
|
||||
无需 API Key。默认配置,适合开发和通用用途。
|
||||
</Tabs.Tab>
|
||||
<Tabs.Tab>
|
||||
```yaml
|
||||
tools:
|
||||
- use: deerflow.community.infoquest.tools:image_search_tool
|
||||
api_key: $INFOQUEST_API_KEY
|
||||
```
|
||||
需要 InfoQuest API Key。
|
||||
</Tabs.Tab>
|
||||
<Tabs.Tab>
|
||||
```yaml
|
||||
tools:
|
||||
- use: deerflow.community.serper.tools:image_search_tool
|
||||
api_key: $SERPER_API_KEY
|
||||
```
|
||||
通过 Serper 获取 Google 图片结果。需要 [Serper](https://serper.dev) API Key,与 Serper `web_search` 工具复用同一个 `SERPER_API_KEY`。
|
||||
</Tabs.Tab>
|
||||
</Tabs>
|
||||
|
||||
## 工具组
|
||||
|
||||
|
||||
+51
-32
@@ -103,6 +103,7 @@ def _split_use_path(use: str) -> tuple[str, str] | None:
|
||||
# Check result container
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class CheckResult:
|
||||
def __init__(
|
||||
self,
|
||||
@@ -129,6 +130,7 @@ class CheckResult:
|
||||
# Individual checks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def check_python() -> CheckResult:
|
||||
v = sys.version_info
|
||||
version_str = f"{v.major}.{v.minor}.{v.micro}"
|
||||
@@ -198,11 +200,7 @@ def check_nginx() -> CheckResult:
|
||||
return CheckResult(
|
||||
"nginx",
|
||||
"fail",
|
||||
fix=(
|
||||
"macOS: brew install nginx\n"
|
||||
"Ubuntu: sudo apt install nginx\n"
|
||||
"Windows: use WSL or Docker mode"
|
||||
),
|
||||
fix=("macOS: brew install nginx\nUbuntu: sudo apt install nginx\nWindows: use WSL or Docker mode"),
|
||||
)
|
||||
|
||||
|
||||
@@ -404,11 +402,7 @@ def check_llm_auth(config_path: Path) -> list[CheckResult]:
|
||||
)
|
||||
|
||||
if use == "deerflow.models.claude_provider:ClaudeChatModel":
|
||||
credential_paths = [
|
||||
Path(os.environ["CLAUDE_CODE_CREDENTIALS_PATH"]).expanduser()
|
||||
for env_name in ("CLAUDE_CODE_CREDENTIALS_PATH",)
|
||||
if os.environ.get(env_name)
|
||||
]
|
||||
credential_paths = [Path(os.environ["CLAUDE_CODE_CREDENTIALS_PATH"]).expanduser() for env_name in ("CLAUDE_CODE_CREDENTIALS_PATH",) if os.environ.get(env_name)]
|
||||
credential_paths.append(Path("~/.claude/.credentials.json").expanduser())
|
||||
has_oauth_env = any(
|
||||
os.environ.get(name)
|
||||
@@ -428,10 +422,7 @@ def check_llm_auth(config_path: Path) -> list[CheckResult]:
|
||||
CheckResult(
|
||||
f"Claude auth available (model: {model_name})",
|
||||
"fail",
|
||||
fix=(
|
||||
"Set ANTHROPIC_API_KEY / CLAUDE_CODE_OAUTH_TOKEN, "
|
||||
"or place credentials at ~/.claude/.credentials.json"
|
||||
),
|
||||
fix=("Set ANTHROPIC_API_KEY / CLAUDE_CODE_OAUTH_TOKEN, or place credentials at ~/.claude/.credentials.json"),
|
||||
)
|
||||
)
|
||||
except Exception as exc:
|
||||
@@ -458,7 +449,6 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes
|
||||
data = _load_yaml_file(config_path)
|
||||
|
||||
tool_entries = [t for t in data.get("tools", []) if t.get("name") == tool_name]
|
||||
tool_uses = [t.get("use", "") for t in tool_entries]
|
||||
if not tool_entries:
|
||||
return CheckResult(
|
||||
label,
|
||||
@@ -470,6 +460,7 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes
|
||||
free_providers = {
|
||||
"web_search": {"ddg_search": "DuckDuckGo (no key needed)"},
|
||||
"web_fetch": {"jina_ai": "Jina AI Reader (no key needed)"},
|
||||
"image_search": {"deerflow.community.image_search.tools": "DuckDuckGo Images (no key needed)"},
|
||||
}
|
||||
key_providers = {
|
||||
"web_search": {
|
||||
@@ -478,35 +469,57 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes
|
||||
"exa": "EXA_API_KEY",
|
||||
"firecrawl": "FIRECRAWL_API_KEY",
|
||||
"brave": "BRAVE_SEARCH_API_KEY",
|
||||
"serper": "SERPER_API_KEY",
|
||||
},
|
||||
"web_fetch": {
|
||||
"infoquest": "INFOQUEST_API_KEY",
|
||||
"exa": "EXA_API_KEY",
|
||||
"firecrawl": "FIRECRAWL_API_KEY",
|
||||
},
|
||||
"image_search": {
|
||||
"infoquest": "INFOQUEST_API_KEY",
|
||||
"serper": "SERPER_API_KEY",
|
||||
},
|
||||
}
|
||||
|
||||
for tool_entry in tool_entries:
|
||||
use = tool_entry.get("use", "")
|
||||
def _configured_key_detail(tool: dict, default_var: str) -> tuple[Status, str] | None:
|
||||
api_key = tool.get("api_key")
|
||||
if isinstance(api_key, str) and api_key.strip():
|
||||
key = api_key.strip()
|
||||
if key.startswith("$"):
|
||||
env_name = key[1:]
|
||||
val = os.environ.get(env_name)
|
||||
if val and val.strip():
|
||||
return ("ok", f"{env_name} set from config")
|
||||
# The referenced var is unset; fall through to the default
|
||||
# env var below, which tools use as a runtime fallback.
|
||||
else:
|
||||
return ("warn", "literal api_key set in config")
|
||||
|
||||
val = os.environ.get(default_var)
|
||||
return ("ok", f"{default_var} set") if val and val.strip() else None
|
||||
|
||||
for tool in tool_entries:
|
||||
use = tool.get("use", "")
|
||||
for provider, detail in free_providers.get(tool_name, {}).items():
|
||||
if provider in use:
|
||||
return CheckResult(label, "ok", detail)
|
||||
|
||||
for tool_entry in tool_entries:
|
||||
use = tool_entry.get("use", "")
|
||||
for tool in tool_entries:
|
||||
use = tool.get("use", "")
|
||||
for provider, var in key_providers.get(tool_name, {}).items():
|
||||
if provider in use:
|
||||
configured_key = tool_entry.get("api_key")
|
||||
if isinstance(configured_key, str) and configured_key.strip():
|
||||
if configured_key.startswith("$"):
|
||||
ref_var = configured_key[1:]
|
||||
if os.environ.get(ref_var):
|
||||
return CheckResult(label, "ok", f"{provider} ({ref_var} set via api_key)")
|
||||
else:
|
||||
return CheckResult(label, "ok", f"{provider} (api_key configured)")
|
||||
val = os.environ.get(var)
|
||||
if val:
|
||||
return CheckResult(label, "ok", f"{provider} ({var} set)")
|
||||
key_status = _configured_key_detail(tool, var)
|
||||
if key_status:
|
||||
status, detail = key_status
|
||||
if status == "warn":
|
||||
return CheckResult(
|
||||
label,
|
||||
"warn",
|
||||
f"{provider} ({detail})",
|
||||
fix=f"Move the API key to .env as {var}=<your-key> and reference it as ${var}",
|
||||
)
|
||||
return CheckResult(label, "ok", f"{provider} ({detail})")
|
||||
return CheckResult(
|
||||
label,
|
||||
"warn",
|
||||
@@ -514,7 +527,8 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes
|
||||
fix=f"Add {var}=<your-key> to .env, or run 'make setup'",
|
||||
)
|
||||
|
||||
for use in tool_uses:
|
||||
for tool in tool_entries:
|
||||
use = tool.get("use", "")
|
||||
split = _split_use_path(use)
|
||||
if split is None:
|
||||
return CheckResult(
|
||||
@@ -544,6 +558,10 @@ def check_web_fetch(config_path: Path) -> CheckResult:
|
||||
return check_web_tool(config_path, tool_name="web_fetch", label="web fetch configured")
|
||||
|
||||
|
||||
def check_image_search(config_path: Path) -> CheckResult:
|
||||
return check_web_tool(config_path, tool_name="image_search", label="image search configured")
|
||||
|
||||
|
||||
def check_frontend_env(project_root: Path) -> CheckResult:
|
||||
env_path = project_root / "frontend" / ".env"
|
||||
if env_path.exists():
|
||||
@@ -641,6 +659,7 @@ def check_env_file(project_root: Path) -> CheckResult:
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main() -> int:
|
||||
project_root = Path(__file__).resolve().parents[1]
|
||||
config_path = project_root / "config.yaml"
|
||||
@@ -691,7 +710,7 @@ def main() -> int:
|
||||
sections.append(("LLM Provider", llm_checks))
|
||||
|
||||
# ── Web Capabilities ─────────────────────────────────────────────────────
|
||||
search_checks = [check_web_search(config_path), check_web_fetch(config_path)]
|
||||
search_checks = [check_web_search(config_path), check_web_fetch(config_path), check_image_search(config_path)]
|
||||
sections.append(("Web Capabilities", search_checks))
|
||||
|
||||
# ── Sandbox ──────────────────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user