mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-18 13:46:02 +00:00
feat(community): add Serper Google Images provider for image_search (#3575)
* feat(community): add Serper Google Images provider for image_search Add a Serper-backed `image_search` tool alongside the existing Serper `web_search` provider, so users with a SERPER_API_KEY can pull Google Images results as reference images for downstream image generation. - Share request/response handling between web_search and image_search via `_serper_post` / `_response_items`, with bounded `max_results` (capped at 10) and query normalization. - Add a best-effort SSRF guard (`_safe_public_url`) that rejects non-http(s), localhost and private/non-global IP image URLs; filtered entries are dropped and never consume the result limit. - doctor: flag literal `api_key` values in config as a warning and steer users toward `.env` + `$SERPER_API_KEY`. - Docs/config: document the Serper image_search provider and SERPER_API_KEY, and discourage committing literal keys to config.yaml. - Tests: cover the provider end-to-end (100% line coverage on tools.py) and the doctor literal-key warning path. * fix(community): block obfuscated IPv4 literals in Serper image SSRF guard The image_search SSRF guard only rejected dotted-decimal IP literals; encoded forms such as decimal (http://2130706433/), hex (0x7f000001) and octal (0177.0.0.1) raised ValueError in ip_address() and were allowed through, even though many HTTP clients resolve them to private addresses like 127.0.0.1. Add _decode_ipv4() to permissively decode these inet_aton-style encodings and apply the same is_global check; hostnames that do not decode to an IP (e.g. cafe.com) are still treated as hosts and left to fetch-time re-validation. Addresses PR review feedback. Tests cover decimal/hex/octal loopback and private encodings plus non-IP edge cases; tools.py stays at 100% line coverage. * test(community): cover IPv4-mapped IPv6 URL filtering * fix(community): address Serper image search review feedback - Block trailing-dot hostname SSRF bypass (localhost./127.0.0.1.) in _safe_public_url by stripping the FQDN root label before checks. - Keep a filtered image/thumbnail URL empty instead of collapsing onto its counterpart, preserving the high-res/preview contract. - Evaluate the SSRF guard once per field rather than twice. - Treat a null-typed organic/images field as "no results" rather than a malformed payload. - doctor.py: when a config $VAR is unset, fall through to the default env var before reporting it as not set.
This commit is contained in:
@@ -214,13 +214,14 @@ class TestCheckWebSearch:
|
||||
assert result.fix is not None
|
||||
assert "BRAVE_SEARCH_API_KEY" in result.fix
|
||||
|
||||
def test_brave_with_inline_api_key_ok(self, tmp_path, monkeypatch):
|
||||
def test_brave_with_inline_api_key_warns(self, tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("BRAVE_SEARCH_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text('config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.brave.tools:web_search_tool\n api_key: "inline-key"\n')
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "api_key configured" in result.detail
|
||||
assert result.status == "warn"
|
||||
assert "literal api_key set in config" in result.detail
|
||||
assert "BRAVE_SEARCH_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_brave_with_api_key_env_ref_ok(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("BRAVE_SEARCH_API_KEY", "bsa-test")
|
||||
@@ -228,7 +229,61 @@ class TestCheckWebSearch:
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.brave.tools:web_search_tool\n api_key: $BRAVE_SEARCH_API_KEY\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "api_key" in result.detail
|
||||
assert "BRAVE_SEARCH_API_KEY set from config" in result.detail
|
||||
|
||||
def test_serper_with_key_ok(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("SERPER_API_KEY", "test-key")
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "serper" in result.detail
|
||||
|
||||
def test_serper_without_key_warns(self, tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("SERPER_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert "SERPER_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_serper_inline_api_key_warns(self, tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("SERPER_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: inline-key\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert "literal api_key set in config" in result.detail
|
||||
assert "SERPER_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_serper_config_env_ref_ok(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("SERPER_API_KEY", "test-key")
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: $SERPER_API_KEY\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "SERPER_API_KEY set from config" in result.detail
|
||||
|
||||
def test_serper_unresolved_env_ref_falls_back_to_default_var(self, tmp_path, monkeypatch):
|
||||
# The referenced $VAR is unset, but the default SERPER_API_KEY is set,
|
||||
# which the tool uses as a runtime fallback; report ok rather than warn.
|
||||
monkeypatch.delenv("MY_CUSTOM_SERPER_KEY", raising=False)
|
||||
monkeypatch.setenv("SERPER_API_KEY", "test-key")
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: $MY_CUSTOM_SERPER_KEY\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "SERPER_API_KEY set" in result.detail
|
||||
|
||||
def test_serper_unresolved_env_ref_without_default_warns(self, tmp_path, monkeypatch):
|
||||
# Neither the referenced $VAR nor the default SERPER_API_KEY is set.
|
||||
monkeypatch.delenv("MY_CUSTOM_SERPER_KEY", raising=False)
|
||||
monkeypatch.delenv("SERPER_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: web_search\n use: deerflow.community.serper.tools:web_search_tool\n api_key: $MY_CUSTOM_SERPER_KEY\n")
|
||||
result = doctor.check_web_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert "SERPER_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_no_search_tool_warns(self, tmp_path):
|
||||
cfg = tmp_path / "config.yaml"
|
||||
@@ -284,6 +339,74 @@ class TestCheckWebFetch:
|
||||
assert result.status == "fail"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# check_image_search
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCheckImageSearch:
|
||||
def test_ddg_always_ok(self, tmp_path):
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.image_search.tools:image_search_tool\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "DuckDuckGo" in result.detail
|
||||
|
||||
def test_serper_with_key_ok(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("SERPER_API_KEY", "test-key")
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "serper" in result.detail
|
||||
|
||||
def test_serper_without_key_warns(self, tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("SERPER_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert "SERPER_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_serper_inline_api_key_warns(self, tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("SERPER_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n api_key: inline-key\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert "literal api_key set in config" in result.detail
|
||||
assert "SERPER_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_serper_config_env_ref_without_env_warns(self, tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("SERPER_API_KEY", raising=False)
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.serper.tools:image_search_tool\n api_key: $SERPER_API_KEY\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert "SERPER_API_KEY" in (result.fix or "")
|
||||
|
||||
def test_infoquest_with_key_ok(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("INFOQUEST_API_KEY", "test-key")
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.infoquest.tools:image_search_tool\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "ok"
|
||||
assert "infoquest" in result.detail
|
||||
|
||||
def test_no_image_search_tool_warns(self, tmp_path):
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools: []\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "warn"
|
||||
assert result.fix is not None
|
||||
|
||||
def test_invalid_provider_use_fails(self, tmp_path):
|
||||
cfg = tmp_path / "config.yaml"
|
||||
cfg.write_text("config_version: 5\ntools:\n - name: image_search\n use: deerflow.community.not_real.tools:image_search_tool\n")
|
||||
result = doctor.check_image_search(cfg)
|
||||
assert result.status == "fail"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# check_env_file
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user