mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-18 13:46:02 +00:00
feat(community): add Serper Google Images provider for image_search (#3575)
* feat(community): add Serper Google Images provider for image_search Add a Serper-backed `image_search` tool alongside the existing Serper `web_search` provider, so users with a SERPER_API_KEY can pull Google Images results as reference images for downstream image generation. - Share request/response handling between web_search and image_search via `_serper_post` / `_response_items`, with bounded `max_results` (capped at 10) and query normalization. - Add a best-effort SSRF guard (`_safe_public_url`) that rejects non-http(s), localhost and private/non-global IP image URLs; filtered entries are dropped and never consume the result limit. - doctor: flag literal `api_key` values in config as a warning and steer users toward `.env` + `$SERPER_API_KEY`. - Docs/config: document the Serper image_search provider and SERPER_API_KEY, and discourage committing literal keys to config.yaml. - Tests: cover the provider end-to-end (100% line coverage on tools.py) and the doctor literal-key warning path. * fix(community): block obfuscated IPv4 literals in Serper image SSRF guard The image_search SSRF guard only rejected dotted-decimal IP literals; encoded forms such as decimal (http://2130706433/), hex (0x7f000001) and octal (0177.0.0.1) raised ValueError in ip_address() and were allowed through, even though many HTTP clients resolve them to private addresses like 127.0.0.1. Add _decode_ipv4() to permissively decode these inet_aton-style encodings and apply the same is_global check; hostnames that do not decode to an IP (e.g. cafe.com) are still treated as hosts and left to fetch-time re-validation. Addresses PR review feedback. Tests cover decimal/hex/octal loopback and private encodings plus non-IP edge cases; tools.py stays at 100% line coverage. * test(community): cover IPv4-mapped IPv6 URL filtering * fix(community): address Serper image search review feedback - Block trailing-dot hostname SSRF bypass (localhost./127.0.0.1.) in _safe_public_url by stripping the FQDN root label before checks. - Keep a filtered image/thumbnail URL empty instead of collapsing onto its counterpart, preserving the high-res/preview contract. - Evaluate the SSRF guard once per field rather than twice. - Treat a null-typed organic/images field as "no results" rather than a malformed payload. - doctor.py: when a config $VAR is unset, fall through to the default env var before reporting it as not set.
This commit is contained in:
+51
-32
@@ -103,6 +103,7 @@ def _split_use_path(use: str) -> tuple[str, str] | None:
|
||||
# Check result container
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class CheckResult:
|
||||
def __init__(
|
||||
self,
|
||||
@@ -129,6 +130,7 @@ class CheckResult:
|
||||
# Individual checks
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def check_python() -> CheckResult:
|
||||
v = sys.version_info
|
||||
version_str = f"{v.major}.{v.minor}.{v.micro}"
|
||||
@@ -198,11 +200,7 @@ def check_nginx() -> CheckResult:
|
||||
return CheckResult(
|
||||
"nginx",
|
||||
"fail",
|
||||
fix=(
|
||||
"macOS: brew install nginx\n"
|
||||
"Ubuntu: sudo apt install nginx\n"
|
||||
"Windows: use WSL or Docker mode"
|
||||
),
|
||||
fix=("macOS: brew install nginx\nUbuntu: sudo apt install nginx\nWindows: use WSL or Docker mode"),
|
||||
)
|
||||
|
||||
|
||||
@@ -404,11 +402,7 @@ def check_llm_auth(config_path: Path) -> list[CheckResult]:
|
||||
)
|
||||
|
||||
if use == "deerflow.models.claude_provider:ClaudeChatModel":
|
||||
credential_paths = [
|
||||
Path(os.environ["CLAUDE_CODE_CREDENTIALS_PATH"]).expanduser()
|
||||
for env_name in ("CLAUDE_CODE_CREDENTIALS_PATH",)
|
||||
if os.environ.get(env_name)
|
||||
]
|
||||
credential_paths = [Path(os.environ["CLAUDE_CODE_CREDENTIALS_PATH"]).expanduser() for env_name in ("CLAUDE_CODE_CREDENTIALS_PATH",) if os.environ.get(env_name)]
|
||||
credential_paths.append(Path("~/.claude/.credentials.json").expanduser())
|
||||
has_oauth_env = any(
|
||||
os.environ.get(name)
|
||||
@@ -428,10 +422,7 @@ def check_llm_auth(config_path: Path) -> list[CheckResult]:
|
||||
CheckResult(
|
||||
f"Claude auth available (model: {model_name})",
|
||||
"fail",
|
||||
fix=(
|
||||
"Set ANTHROPIC_API_KEY / CLAUDE_CODE_OAUTH_TOKEN, "
|
||||
"or place credentials at ~/.claude/.credentials.json"
|
||||
),
|
||||
fix=("Set ANTHROPIC_API_KEY / CLAUDE_CODE_OAUTH_TOKEN, or place credentials at ~/.claude/.credentials.json"),
|
||||
)
|
||||
)
|
||||
except Exception as exc:
|
||||
@@ -458,7 +449,6 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes
|
||||
data = _load_yaml_file(config_path)
|
||||
|
||||
tool_entries = [t for t in data.get("tools", []) if t.get("name") == tool_name]
|
||||
tool_uses = [t.get("use", "") for t in tool_entries]
|
||||
if not tool_entries:
|
||||
return CheckResult(
|
||||
label,
|
||||
@@ -470,6 +460,7 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes
|
||||
free_providers = {
|
||||
"web_search": {"ddg_search": "DuckDuckGo (no key needed)"},
|
||||
"web_fetch": {"jina_ai": "Jina AI Reader (no key needed)"},
|
||||
"image_search": {"deerflow.community.image_search.tools": "DuckDuckGo Images (no key needed)"},
|
||||
}
|
||||
key_providers = {
|
||||
"web_search": {
|
||||
@@ -478,35 +469,57 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes
|
||||
"exa": "EXA_API_KEY",
|
||||
"firecrawl": "FIRECRAWL_API_KEY",
|
||||
"brave": "BRAVE_SEARCH_API_KEY",
|
||||
"serper": "SERPER_API_KEY",
|
||||
},
|
||||
"web_fetch": {
|
||||
"infoquest": "INFOQUEST_API_KEY",
|
||||
"exa": "EXA_API_KEY",
|
||||
"firecrawl": "FIRECRAWL_API_KEY",
|
||||
},
|
||||
"image_search": {
|
||||
"infoquest": "INFOQUEST_API_KEY",
|
||||
"serper": "SERPER_API_KEY",
|
||||
},
|
||||
}
|
||||
|
||||
for tool_entry in tool_entries:
|
||||
use = tool_entry.get("use", "")
|
||||
def _configured_key_detail(tool: dict, default_var: str) -> tuple[Status, str] | None:
|
||||
api_key = tool.get("api_key")
|
||||
if isinstance(api_key, str) and api_key.strip():
|
||||
key = api_key.strip()
|
||||
if key.startswith("$"):
|
||||
env_name = key[1:]
|
||||
val = os.environ.get(env_name)
|
||||
if val and val.strip():
|
||||
return ("ok", f"{env_name} set from config")
|
||||
# The referenced var is unset; fall through to the default
|
||||
# env var below, which tools use as a runtime fallback.
|
||||
else:
|
||||
return ("warn", "literal api_key set in config")
|
||||
|
||||
val = os.environ.get(default_var)
|
||||
return ("ok", f"{default_var} set") if val and val.strip() else None
|
||||
|
||||
for tool in tool_entries:
|
||||
use = tool.get("use", "")
|
||||
for provider, detail in free_providers.get(tool_name, {}).items():
|
||||
if provider in use:
|
||||
return CheckResult(label, "ok", detail)
|
||||
|
||||
for tool_entry in tool_entries:
|
||||
use = tool_entry.get("use", "")
|
||||
for tool in tool_entries:
|
||||
use = tool.get("use", "")
|
||||
for provider, var in key_providers.get(tool_name, {}).items():
|
||||
if provider in use:
|
||||
configured_key = tool_entry.get("api_key")
|
||||
if isinstance(configured_key, str) and configured_key.strip():
|
||||
if configured_key.startswith("$"):
|
||||
ref_var = configured_key[1:]
|
||||
if os.environ.get(ref_var):
|
||||
return CheckResult(label, "ok", f"{provider} ({ref_var} set via api_key)")
|
||||
else:
|
||||
return CheckResult(label, "ok", f"{provider} (api_key configured)")
|
||||
val = os.environ.get(var)
|
||||
if val:
|
||||
return CheckResult(label, "ok", f"{provider} ({var} set)")
|
||||
key_status = _configured_key_detail(tool, var)
|
||||
if key_status:
|
||||
status, detail = key_status
|
||||
if status == "warn":
|
||||
return CheckResult(
|
||||
label,
|
||||
"warn",
|
||||
f"{provider} ({detail})",
|
||||
fix=f"Move the API key to .env as {var}=<your-key> and reference it as ${var}",
|
||||
)
|
||||
return CheckResult(label, "ok", f"{provider} ({detail})")
|
||||
return CheckResult(
|
||||
label,
|
||||
"warn",
|
||||
@@ -514,7 +527,8 @@ def check_web_tool(config_path: Path, *, tool_name: str, label: str) -> CheckRes
|
||||
fix=f"Add {var}=<your-key> to .env, or run 'make setup'",
|
||||
)
|
||||
|
||||
for use in tool_uses:
|
||||
for tool in tool_entries:
|
||||
use = tool.get("use", "")
|
||||
split = _split_use_path(use)
|
||||
if split is None:
|
||||
return CheckResult(
|
||||
@@ -544,6 +558,10 @@ def check_web_fetch(config_path: Path) -> CheckResult:
|
||||
return check_web_tool(config_path, tool_name="web_fetch", label="web fetch configured")
|
||||
|
||||
|
||||
def check_image_search(config_path: Path) -> CheckResult:
|
||||
return check_web_tool(config_path, tool_name="image_search", label="image search configured")
|
||||
|
||||
|
||||
def check_frontend_env(project_root: Path) -> CheckResult:
|
||||
env_path = project_root / "frontend" / ".env"
|
||||
if env_path.exists():
|
||||
@@ -641,6 +659,7 @@ def check_env_file(project_root: Path) -> CheckResult:
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main() -> int:
|
||||
project_root = Path(__file__).resolve().parents[1]
|
||||
config_path = project_root / "config.yaml"
|
||||
@@ -691,7 +710,7 @@ def main() -> int:
|
||||
sections.append(("LLM Provider", llm_checks))
|
||||
|
||||
# ── Web Capabilities ─────────────────────────────────────────────────────
|
||||
search_checks = [check_web_search(config_path), check_web_fetch(config_path)]
|
||||
search_checks = [check_web_search(config_path), check_web_fetch(config_path), check_image_search(config_path)]
|
||||
sections.append(("Web Capabilities", search_checks))
|
||||
|
||||
# ── Sandbox ──────────────────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user