mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-11 09:55:59 +00:00
* fix(web_fetch): support proxy for Jina reader in restricted networks The web_fetch tool built a bare httpx.AsyncClient() with no proxy awareness, so users behind a corporate proxy / in Docker / WSL could not reach https://r.jina.ai and web_fetch timed out. - Add optional `proxy` / `trust_env` params to JinaClient.crawl and wire them from the `web_fetch` tool config (with type coercion for YAML string values). - Pass internal service hostnames through NO_PROXY in both compose files so proxy env inherited via env_file does not break in-cluster calls (gateway/provisioner/etc). - Load proxy vars from .env into the shell in scripts/docker.sh so the NO_PROXY interpolation can merge user-provided values on `make` path. - Document proxy/trust_env options in config.example.yaml. Closes #3418 * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -9,6 +9,38 @@ from deerflow.utils.readability import ReadabilityExtractor
|
||||
readability_extractor = ReadabilityExtractor()
|
||||
|
||||
|
||||
def _coerce_bool(value: object, default: bool) -> bool:
|
||||
if isinstance(value, bool):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
normalized = value.strip().lower()
|
||||
if normalized in {"1", "true", "yes", "on"}:
|
||||
return True
|
||||
if normalized in {"0", "false", "no", "off"}:
|
||||
return False
|
||||
return default
|
||||
|
||||
|
||||
def _coerce_timeout(value: object, default: int) -> int:
|
||||
if isinstance(value, bool):
|
||||
return default
|
||||
if isinstance(value, int):
|
||||
return value
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
return int(value)
|
||||
except ValueError:
|
||||
return default
|
||||
return default
|
||||
|
||||
|
||||
def _coerce_proxy(value: object) -> str | None:
|
||||
if not isinstance(value, str):
|
||||
return None
|
||||
proxy = value.strip()
|
||||
return proxy or None
|
||||
|
||||
|
||||
@tool("web_fetch", parse_docstring=True)
|
||||
async def web_fetch_tool(url: str) -> str:
|
||||
"""Fetch the contents of a web page at a given URL.
|
||||
@@ -22,10 +54,14 @@ async def web_fetch_tool(url: str) -> str:
|
||||
"""
|
||||
jina_client = JinaClient()
|
||||
timeout = 10
|
||||
proxy = None
|
||||
trust_env = True
|
||||
config = get_app_config().get_tool_config("web_fetch")
|
||||
if config is not None and "timeout" in config.model_extra:
|
||||
timeout = config.model_extra.get("timeout")
|
||||
html_content = await jina_client.crawl(url, return_format="html", timeout=timeout)
|
||||
if config is not None:
|
||||
timeout = _coerce_timeout(config.model_extra.get("timeout"), timeout)
|
||||
proxy = _coerce_proxy(config.model_extra.get("proxy"))
|
||||
trust_env = _coerce_bool(config.model_extra.get("trust_env"), trust_env)
|
||||
html_content = await jina_client.crawl(url, return_format="html", timeout=timeout, proxy=proxy, trust_env=trust_env)
|
||||
if isinstance(html_content, str) and html_content.startswith("Error:"):
|
||||
return html_content
|
||||
article = await asyncio.to_thread(readability_extractor.extract_article, html_content)
|
||||
|
||||
Reference in New Issue
Block a user