mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-12 02:15:58 +00:00
feat(community): add SearXNG and Browserless web search/fetch tools (#3451)
* feat(community): add SearXNG and Browserless web search/fetch tools - SearXNG web_search: privacy-focused meta search engine integration with configurable base_url via config.yaml tool settings - Browserless web_fetch: headless browser page fetching with readability article extraction - Both tools are fully configurable through tool config section - No external API keys required for basic operation * fix: address PR review feedback and add unit tests - Guard config.model_extra against None values (review #1, #2) - Coerce max_results to int when reading from config (review #2) - Fix web_fetch_tool to use direct HTTP fetch instead of reusing the web_search client config (review #3) - Fix misleading docstring for SearxngClient.fetch (review #4) - Remove unused target_url variable to pass Ruff lint (review #5) - Normalize bool config values with _normalize_bool helper to handle env-resolved string values correctly (review #6) - Add unit tests for both SearXNG and Browserless client classes and their tool functions with mocked httpx (review #7, #8) * fix: convert to async httpx to avoid blocking I/O on event loop - Replace httpx.Client with httpx.AsyncClient in both client classes - Convert tool functions to async def - Wrap readability_extractor calls in asyncio.to_thread() - Update all tests to use pytest.mark.asyncio and async mocks - Fix import sorting to pass Ruff lint * fix(browserless): replace deprecated waitUntil with waitForEvent The Browserless API has deprecated the waitUntil parameter. Replace with waitForEvent which accepts values like 'networkidle'. Default is empty (no wait), configurable via config.yaml. * fix(browserless): remove deprecated gotoTimeout and bestAttempt params The Browserless /content API does not accept gotoTimeout or bestAttempt as top-level payload keys. These were being sent unconditionally, causing 400 Bad Request errors on current Browserless versions. Changes: - Remove goto_timeout_ms parameter and 'gotoTimeout' from payload - Remove best_attempt parameter and 'bestAttempt' from payload - Remove _normalize_bool helper (no longer needed) - Remove goto_timeout_ms and best_attempt config reading in tools.py - Add tests for waitForSelector and reject params - Verify no deprecated params are sent in test_fetch_html_success * refactor(searxng): remove web_fetch_tool, decouple from web_search config SearXNG is a search engine — it should only provide web_search_tool. The web_fetch responsibility belongs to Browserless (headless Chrome) or Jina AI, not SearXNG. Changes: - Remove web_fetch_tool from SearXNG tools.py and __init__.py - Remove SearxngClient.fetch() method (no longer needed) - Remove unused asyncio/readability imports from SearXNG tools.py - Add test for max_results string-to-int coercion from config - Add test for search with categories parameter - Add test for httpx.RequestError handling - Apply ruff format fixes to browserless_client.py and test files
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
from .browserless_client import BrowserlessClient
|
||||
from .tools import web_fetch_tool
|
||||
|
||||
__all__ = ["BrowserlessClient", "web_fetch_tool"]
|
||||
@@ -0,0 +1,98 @@
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class BrowserlessClient:
|
||||
"""Client for Browserless headless Chrome API."""
|
||||
|
||||
def __init__(self, base_url: str, token: str = "", timeout_s: float = 30) -> None:
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.token = token
|
||||
self.timeout_s = timeout_s
|
||||
|
||||
async def fetch_html(
|
||||
self,
|
||||
url: str,
|
||||
wait_for_event: str = "",
|
||||
wait_for_timeout_ms: int = 0,
|
||||
wait_for_selector: str = "",
|
||||
wait_for_selector_timeout_ms: int = 5000,
|
||||
reject_resource_types: list[str] | None = None,
|
||||
reject_request_pattern: list[str] | None = None,
|
||||
) -> str:
|
||||
"""Fetch the rendered HTML of a page using Browserless.
|
||||
|
||||
Only sends accepted parameters for the current Browserless API version.
|
||||
Sets a default navigation timeout (30s) via query param.
|
||||
|
||||
Args:
|
||||
url: The URL to fetch.
|
||||
wait_for_event: Wait for a page event (e.g. "networkidle", "load").
|
||||
wait_for_timeout_ms: Extra wait after page load.
|
||||
wait_for_selector: CSS selector to wait for.
|
||||
wait_for_selector_timeout_ms: Timeout for selector wait.
|
||||
reject_resource_types: Resource types to block (e.g. ["image"]).
|
||||
reject_request_pattern: URL patterns to block.
|
||||
|
||||
Returns:
|
||||
Rendered HTML content.
|
||||
"""
|
||||
payload: dict[str, Any] = {
|
||||
"url": url,
|
||||
}
|
||||
|
||||
if self.token:
|
||||
payload["token"] = self.token
|
||||
if wait_for_event:
|
||||
payload["waitForEvent"] = wait_for_event
|
||||
if wait_for_timeout_ms > 0:
|
||||
payload["waitForTimeout"] = wait_for_timeout_ms
|
||||
if wait_for_selector:
|
||||
payload["waitForSelector"] = {
|
||||
"selector": wait_for_selector,
|
||||
"timeout": wait_for_selector_timeout_ms,
|
||||
}
|
||||
if reject_resource_types:
|
||||
payload["rejectResourceTypes"] = reject_resource_types
|
||||
if reject_request_pattern:
|
||||
payload["rejectRequestPattern"] = reject_request_pattern
|
||||
|
||||
logger.debug(f"Fetching URL via Browserless: {url}")
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=self.timeout_s) as client:
|
||||
resp = await client.post(
|
||||
f"{self.base_url}/content",
|
||||
json=payload,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Cache-Control": "no-cache",
|
||||
},
|
||||
)
|
||||
|
||||
code = resp.status_code
|
||||
target_code = resp.headers.get("X-Response-Code", "")
|
||||
target_status = resp.headers.get("X-Response-Status", "")
|
||||
|
||||
logger.debug(f"Browserless response: code={code}, target_code={target_code}, target_status={target_status}")
|
||||
|
||||
if code != 200:
|
||||
return f"Error: Browserless HTTP {code}: {resp.text[:200]}"
|
||||
|
||||
html = resp.text
|
||||
if not html or not html.strip():
|
||||
return "Error: Browserless returned empty response"
|
||||
|
||||
return html
|
||||
|
||||
except httpx.TimeoutException:
|
||||
return f"Error: Browserless request timed out after {self.timeout_s}s"
|
||||
except httpx.RequestError as e:
|
||||
logger.error(f"Browserless request failed: {e}")
|
||||
return f"Error: Browserless request failed: {e!s}"
|
||||
except Exception as e:
|
||||
logger.error(f"Browserless fetch failed: {e}")
|
||||
return f"Error: Browserless fetch failed: {e!s}"
|
||||
@@ -0,0 +1,85 @@
|
||||
import asyncio
|
||||
import logging
|
||||
|
||||
from langchain.tools import tool
|
||||
|
||||
from deerflow.config import get_app_config
|
||||
from deerflow.utils.readability import ReadabilityExtractor
|
||||
|
||||
from .browserless_client import BrowserlessClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# readability_extractor runs CPU-bound parsing; always call via asyncio.to_thread
|
||||
_readability_extractor = ReadabilityExtractor()
|
||||
|
||||
|
||||
def _get_tool_config(tool_name: str) -> dict | None:
|
||||
"""Get tool config extras safely, returning None if not configured."""
|
||||
config = get_app_config().get_tool_config(tool_name)
|
||||
if config is None:
|
||||
return None
|
||||
extras = config.model_extra
|
||||
return extras if extras is not None else {}
|
||||
|
||||
|
||||
def _get_browserless_client() -> BrowserlessClient:
|
||||
cfg = _get_tool_config("web_fetch")
|
||||
base_url = "http://localhost:3032"
|
||||
token = ""
|
||||
timeout_s = 30.0
|
||||
if cfg is not None:
|
||||
base_url = cfg.get("base_url", base_url)
|
||||
token = cfg.get("token", token)
|
||||
raw = cfg.get("timeout_s", timeout_s)
|
||||
timeout_s = float(raw) if not isinstance(raw, float) else raw
|
||||
return BrowserlessClient(base_url=base_url, token=token, timeout_s=timeout_s)
|
||||
|
||||
|
||||
@tool("web_fetch", parse_docstring=True)
|
||||
async def web_fetch_tool(url: str) -> str:
|
||||
"""Fetch the contents of a web page at a given URL using Browserless (headless Chrome).
|
||||
Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
|
||||
This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
|
||||
Do NOT add www. to URLs that do NOT have them.
|
||||
URLs must include the schema: https://example.com is a valid URL while example.com is an invalid URL.
|
||||
|
||||
Args:
|
||||
url: The URL to fetch the contents of.
|
||||
"""
|
||||
try:
|
||||
cfg = _get_tool_config("web_fetch")
|
||||
|
||||
wait_for_event = ""
|
||||
wait_for_timeout_ms = 0
|
||||
wait_for_selector = ""
|
||||
wait_for_selector_timeout_ms = 5000
|
||||
reject_resource_types: list[str] | None = None
|
||||
reject_request_pattern: list[str] | None = None
|
||||
|
||||
if cfg is not None:
|
||||
wait_for_event = cfg.get("wait_for_event", wait_for_event)
|
||||
raw_wait = cfg.get("wait_for_timeout_ms", wait_for_timeout_ms)
|
||||
wait_for_timeout_ms = int(raw_wait) if not isinstance(raw_wait, int) else raw_wait
|
||||
wait_for_selector = cfg.get("wait_for_selector", wait_for_selector)
|
||||
|
||||
client = _get_browserless_client()
|
||||
html = await client.fetch_html(
|
||||
url=url,
|
||||
wait_for_event=wait_for_event,
|
||||
wait_for_timeout_ms=wait_for_timeout_ms,
|
||||
wait_for_selector=wait_for_selector,
|
||||
wait_for_selector_timeout_ms=wait_for_selector_timeout_ms,
|
||||
reject_resource_types=reject_resource_types,
|
||||
reject_request_pattern=reject_request_pattern,
|
||||
)
|
||||
|
||||
if html.startswith("Error:"):
|
||||
return html
|
||||
|
||||
article = await asyncio.to_thread(_readability_extractor.extract_article, html)
|
||||
return article.to_markdown()[:4096]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in web_fetch_tool: {e}")
|
||||
return f"Error: {str(e)}"
|
||||
@@ -0,0 +1,3 @@
|
||||
from .tools import web_search_tool
|
||||
|
||||
__all__ = ["web_search_tool"]
|
||||
@@ -0,0 +1,65 @@
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SearxngClient:
|
||||
"""Client for SearXNG meta search engine API."""
|
||||
|
||||
def __init__(self, base_url: str) -> None:
|
||||
self.base_url = base_url.rstrip("/")
|
||||
|
||||
async def search(
|
||||
self,
|
||||
query: str,
|
||||
max_results: int = 5,
|
||||
categories: list[str] | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Search the web using SearXNG.
|
||||
|
||||
Args:
|
||||
query: The search query.
|
||||
max_results: Maximum number of results to return.
|
||||
categories: Search categories to use.
|
||||
|
||||
Returns:
|
||||
List of search result dictionaries.
|
||||
"""
|
||||
params: dict[str, Any] = {
|
||||
"q": query,
|
||||
"format": "json",
|
||||
"language": "auto",
|
||||
"pageno": 1,
|
||||
}
|
||||
if max_results:
|
||||
params["limit"] = max_results
|
||||
if categories:
|
||||
params["categories"] = ",".join(categories)
|
||||
|
||||
logger.debug(f"Searching SearXNG at {self.base_url} with query: {query}")
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
resp = await client.get(
|
||||
f"{self.base_url}/search",
|
||||
params=params,
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0 (compatible; DeerFlow/1.0)",
|
||||
"Accept": "application/json",
|
||||
},
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
results = data.get("results", [])
|
||||
return results[:max_results] if max_results else results
|
||||
except httpx.HTTPStatusError as e:
|
||||
logger.error(f"SearXNG search returned error status: {e}")
|
||||
raise
|
||||
except httpx.RequestError as e:
|
||||
logger.error(f"SearXNG search request failed: {e}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"An unexpected error occurred during SearXNG search: {e}")
|
||||
raise
|
||||
@@ -0,0 +1,58 @@
|
||||
import json
|
||||
import logging
|
||||
|
||||
from langchain.tools import tool
|
||||
|
||||
from deerflow.config import get_app_config
|
||||
|
||||
from .searxng_client import SearxngClient
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_tool_config(tool_name: str) -> dict | None:
|
||||
"""Get tool config extras safely, returning None if not configured."""
|
||||
config = get_app_config().get_tool_config(tool_name)
|
||||
if config is None:
|
||||
return None
|
||||
extras = config.model_extra
|
||||
return extras if extras is not None else {}
|
||||
|
||||
|
||||
def _get_searxng_client() -> SearxngClient:
|
||||
cfg = _get_tool_config("web_search")
|
||||
base_url = "http://localhost:8088"
|
||||
if cfg is not None:
|
||||
base_url = cfg.get("base_url", base_url)
|
||||
return SearxngClient(base_url=base_url)
|
||||
|
||||
|
||||
@tool("web_search", parse_docstring=True)
|
||||
async def web_search_tool(query: str) -> str:
|
||||
"""Search the web using SearXNG.
|
||||
|
||||
Args:
|
||||
query: The query to search for.
|
||||
"""
|
||||
try:
|
||||
cfg = _get_tool_config("web_search")
|
||||
max_results = 5
|
||||
if cfg is not None:
|
||||
raw = cfg.get("max_results", max_results)
|
||||
max_results = int(raw) if not isinstance(raw, int) else raw
|
||||
|
||||
client = _get_searxng_client()
|
||||
results = await client.search(query, max_results=max_results)
|
||||
|
||||
normalized = [
|
||||
{
|
||||
"title": r.get("title", ""),
|
||||
"url": r.get("url", ""),
|
||||
"snippet": r.get("content", ""),
|
||||
}
|
||||
for r in results
|
||||
]
|
||||
return json.dumps(normalized, indent=2, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in web_search_tool: {e}")
|
||||
return json.dumps({"error": str(e), "query": query}, ensure_ascii=False)
|
||||
@@ -0,0 +1,187 @@
|
||||
"""Tests for Browserless community tools."""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from deerflow.community.browserless import tools
|
||||
from deerflow.community.browserless.browserless_client import BrowserlessClient
|
||||
|
||||
|
||||
class AsyncMock(MagicMock):
|
||||
"""Mock that supports async call."""
|
||||
|
||||
async def __call__(self, *args, **kwargs):
|
||||
return super().__call__(*args, **kwargs)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestBrowserlessClient:
|
||||
"""Tests for the BrowserlessClient class."""
|
||||
|
||||
async def test_fetch_html_success(self):
|
||||
"""fetch_html returns HTML content on success."""
|
||||
with patch("deerflow.community.browserless.browserless_client.httpx.AsyncClient") as mock_cls:
|
||||
mock_ctx = MagicMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_ctx
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.text = "<html><body>Page content</body></html>"
|
||||
mock_resp.headers = {}
|
||||
mock_ctx.post = AsyncMock(return_value=mock_resp)
|
||||
|
||||
client = BrowserlessClient(base_url="http://browserless:3000")
|
||||
result = await client.fetch_html("https://example.com")
|
||||
|
||||
assert result == "<html><body>Page content</body></html>"
|
||||
call_kwargs = mock_ctx.post.call_args.kwargs
|
||||
assert call_kwargs["json"]["url"] == "https://example.com"
|
||||
assert "waitUntil" not in call_kwargs["json"]
|
||||
assert "gotoTimeout" not in call_kwargs["json"]
|
||||
assert "bestAttempt" not in call_kwargs["json"]
|
||||
|
||||
async def test_fetch_html_empty_response(self):
|
||||
"""fetch_html returns error for empty response."""
|
||||
with patch("deerflow.community.browserless.browserless_client.httpx.AsyncClient") as mock_cls:
|
||||
mock_ctx = MagicMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_ctx
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.text = " "
|
||||
mock_resp.headers = {}
|
||||
mock_ctx.post = AsyncMock(return_value=mock_resp)
|
||||
|
||||
client = BrowserlessClient(base_url="http://browserless:3000")
|
||||
result = await client.fetch_html("https://example.com")
|
||||
assert result == "Error: Browserless returned empty response"
|
||||
|
||||
async def test_fetch_html_http_error(self):
|
||||
"""fetch_html returns error for non-200 status."""
|
||||
with patch("deerflow.community.browserless.browserless_client.httpx.AsyncClient") as mock_cls:
|
||||
mock_ctx = MagicMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_ctx
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 500
|
||||
mock_resp.text = "Internal error"
|
||||
mock_resp.headers = {}
|
||||
mock_ctx.post = AsyncMock(return_value=mock_resp)
|
||||
|
||||
client = BrowserlessClient(base_url="http://browserless:3000")
|
||||
result = await client.fetch_html("https://example.com")
|
||||
assert "Error: Browserless HTTP 500" in result
|
||||
|
||||
async def test_fetch_html_timeout(self):
|
||||
"""fetch_html returns timeout error."""
|
||||
with patch("deerflow.community.browserless.browserless_client.httpx.AsyncClient") as mock_cls:
|
||||
mock_ctx = MagicMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_ctx
|
||||
import httpx
|
||||
|
||||
mock_ctx.post = AsyncMock(side_effect=httpx.TimeoutException("Timed out"))
|
||||
|
||||
client = BrowserlessClient(base_url="http://browserless:3000", timeout_s=10)
|
||||
result = await client.fetch_html("https://example.com")
|
||||
assert "timed out" in result.lower() or "timeout" in result.lower()
|
||||
|
||||
async def test_fetch_html_with_token(self):
|
||||
"""fetch_html includes token in payload when set."""
|
||||
with patch("deerflow.community.browserless.browserless_client.httpx.AsyncClient") as mock_cls:
|
||||
mock_ctx = MagicMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_ctx
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.text = "<html>OK</html>"
|
||||
mock_resp.headers = {}
|
||||
mock_ctx.post = AsyncMock(return_value=mock_resp)
|
||||
|
||||
client = BrowserlessClient(base_url="http://browserless:3000", token="my-token")
|
||||
await client.fetch_html("https://example.com")
|
||||
|
||||
payload = mock_ctx.post.call_args.kwargs["json"]
|
||||
assert payload["token"] == "my-token"
|
||||
|
||||
async def test_fetch_html_with_wait_for_selector(self):
|
||||
"""fetch_html sends waitForSelector when selector is set."""
|
||||
with patch("deerflow.community.browserless.browserless_client.httpx.AsyncClient") as mock_cls:
|
||||
mock_ctx = MagicMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_ctx
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.text = "<html>OK</html>"
|
||||
mock_resp.headers = {}
|
||||
mock_ctx.post = AsyncMock(return_value=mock_resp)
|
||||
|
||||
client = BrowserlessClient(base_url="http://browserless:3000")
|
||||
await client.fetch_html("https://example.com", wait_for_selector="article")
|
||||
|
||||
payload = mock_ctx.post.call_args.kwargs["json"]
|
||||
assert payload["waitForSelector"]["selector"] == "article"
|
||||
|
||||
async def test_fetch_html_with_reject_params(self):
|
||||
"""fetch_html sends reject params when set."""
|
||||
with patch("deerflow.community.browserless.browserless_client.httpx.AsyncClient") as mock_cls:
|
||||
mock_ctx = MagicMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_ctx
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.text = "<html>OK</html>"
|
||||
mock_resp.headers = {}
|
||||
mock_ctx.post = AsyncMock(return_value=mock_resp)
|
||||
|
||||
client = BrowserlessClient(base_url="http://browserless:3000")
|
||||
await client.fetch_html(
|
||||
"https://example.com",
|
||||
reject_resource_types=["image"],
|
||||
reject_request_pattern=[r"\.css$"],
|
||||
)
|
||||
|
||||
payload = mock_ctx.post.call_args.kwargs["json"]
|
||||
assert payload["rejectResourceTypes"] == ["image"]
|
||||
assert payload["rejectRequestPattern"] == [r"\.css$"]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestBrowserlessTools:
|
||||
"""Tests for the Browserless tool functions."""
|
||||
|
||||
@patch("deerflow.community.browserless.tools._get_browserless_client")
|
||||
async def test_web_fetch_tool_success(self, mock_get_client):
|
||||
"""web_fetch_tool successfully fetches and extracts content."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.fetch_html = AsyncMock(return_value="<html><body><article><h1>Title</h1><p>Content</p></article></body></html>")
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
with patch("deerflow.community.browserless.tools._get_tool_config", return_value=None):
|
||||
result = await tools.web_fetch_tool.ainvoke("https://example.com/article")
|
||||
|
||||
assert "Error:" not in result
|
||||
|
||||
@patch("deerflow.community.browserless.tools._get_browserless_client")
|
||||
async def test_web_fetch_tool_error(self, mock_get_client):
|
||||
"""web_fetch_tool returns error when fetch fails."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.fetch_html = AsyncMock(return_value="Error: Browserless returned empty response")
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
with patch("deerflow.community.browserless.tools._get_tool_config", return_value=None):
|
||||
result = await tools.web_fetch_tool.ainvoke("https://example.com")
|
||||
|
||||
assert result.startswith("Error:")
|
||||
|
||||
@patch("deerflow.community.browserless.tools._get_browserless_client")
|
||||
async def test_web_fetch_tool_exception(self, mock_get_client):
|
||||
"""web_fetch_tool returns error when client raises exception."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.fetch_html = AsyncMock(side_effect=Exception("Unexpected error"))
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
with patch("deerflow.community.browserless.tools._get_tool_config", return_value=None):
|
||||
result = await tools.web_fetch_tool.ainvoke("https://example.com")
|
||||
|
||||
assert result.startswith("Error:")
|
||||
@@ -0,0 +1,163 @@
|
||||
"""Tests for SearXNG community tools."""
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from deerflow.community.searxng import tools
|
||||
from deerflow.community.searxng.searxng_client import SearxngClient
|
||||
|
||||
|
||||
class AsyncMock(MagicMock):
|
||||
"""Mock that supports async call."""
|
||||
|
||||
async def __call__(self, *args, **kwargs):
|
||||
return super().__call__(*args, **kwargs)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSearxngClient:
|
||||
"""Tests for the SearxngClient class."""
|
||||
|
||||
async def test_search_success(self):
|
||||
"""Search returns normalized results."""
|
||||
results_data = {
|
||||
"results": [
|
||||
{"title": "Page 1", "url": "https://example.com/1", "content": "Snippet 1"},
|
||||
{"title": "Page 2", "url": "https://example.com/2", "content": "Snippet 2"},
|
||||
]
|
||||
}
|
||||
|
||||
with patch("deerflow.community.searxng.searxng_client.httpx.AsyncClient") as mock_cls:
|
||||
mock_ctx = MagicMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_ctx
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = results_data
|
||||
mock_resp.raise_for_status.return_value = None
|
||||
mock_ctx.get = AsyncMock(return_value=mock_resp)
|
||||
|
||||
client = SearxngClient(base_url="http://searxng:8080")
|
||||
result = await client.search("test query", max_results=5)
|
||||
|
||||
assert len(result) == 2
|
||||
assert result[0]["title"] == "Page 1"
|
||||
assert result[1]["url"] == "https://example.com/2"
|
||||
|
||||
async def test_search_empty_results(self):
|
||||
"""Search returns empty list when no results."""
|
||||
with patch("deerflow.community.searxng.searxng_client.httpx.AsyncClient") as mock_cls:
|
||||
mock_ctx = MagicMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_ctx
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = {"results": []}
|
||||
mock_resp.raise_for_status.return_value = None
|
||||
mock_ctx.get = AsyncMock(return_value=mock_resp)
|
||||
|
||||
client = SearxngClient(base_url="http://searxng:8080")
|
||||
result = await client.search("empty query")
|
||||
assert result == []
|
||||
|
||||
async def test_search_http_error(self):
|
||||
"""Search raises on HTTP error."""
|
||||
with patch("deerflow.community.searxng.searxng_client.httpx.AsyncClient") as mock_cls:
|
||||
mock_ctx = MagicMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_ctx
|
||||
|
||||
import httpx
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.raise_for_status.side_effect = httpx.HTTPStatusError("403 Forbidden", request=MagicMock(), response=MagicMock())
|
||||
mock_ctx.get = AsyncMock(return_value=mock_resp)
|
||||
|
||||
client = SearxngClient(base_url="http://searxng:8080")
|
||||
with pytest.raises(httpx.HTTPStatusError):
|
||||
await client.search("blocked query")
|
||||
|
||||
async def test_search_request_error(self):
|
||||
"""Search raises on request error."""
|
||||
with patch("deerflow.community.searxng.searxng_client.httpx.AsyncClient") as mock_cls:
|
||||
mock_ctx = MagicMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_ctx
|
||||
|
||||
import httpx
|
||||
|
||||
mock_ctx.get = AsyncMock(side_effect=httpx.RequestError("Connection refused"))
|
||||
|
||||
client = SearxngClient(base_url="http://searxng:8080")
|
||||
with pytest.raises(httpx.RequestError):
|
||||
await client.search("unreachable query")
|
||||
|
||||
async def test_search_with_categories(self):
|
||||
"""Search passes categories parameter."""
|
||||
with patch("deerflow.community.searxng.searxng_client.httpx.AsyncClient") as mock_cls:
|
||||
mock_ctx = MagicMock()
|
||||
mock_cls.return_value.__aenter__.return_value = mock_ctx
|
||||
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.status_code = 200
|
||||
mock_resp.json.return_value = {"results": []}
|
||||
mock_resp.raise_for_status.return_value = None
|
||||
mock_ctx.get = AsyncMock(return_value=mock_resp)
|
||||
|
||||
client = SearxngClient(base_url="http://searxng:8080")
|
||||
await client.search("test", categories=["news", "science"])
|
||||
|
||||
call_kwargs = mock_ctx.get.call_args.kwargs
|
||||
assert call_kwargs["params"]["categories"] == "news,science"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
class TestSearxngTools:
|
||||
"""Tests for the SearXNG tool functions."""
|
||||
|
||||
@patch("deerflow.community.searxng.tools._get_searxng_client")
|
||||
async def test_web_search_tool_success(self, mock_get_client):
|
||||
"""web_search_tool returns JSON results."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.search = AsyncMock(
|
||||
return_value=[
|
||||
{"title": "Result 1", "url": "https://example.com/1", "content": "Desc 1"},
|
||||
]
|
||||
)
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
with patch("deerflow.community.searxng.tools._get_tool_config", return_value=None):
|
||||
result = await tools.web_search_tool.ainvoke("test query")
|
||||
|
||||
data = json.loads(result)
|
||||
assert len(data) == 1
|
||||
assert data[0]["title"] == "Result 1"
|
||||
|
||||
@patch("deerflow.community.searxng.tools._get_searxng_client")
|
||||
async def test_web_search_tool_error(self, mock_get_client):
|
||||
"""web_search_tool handles errors gracefully."""
|
||||
mock_client = MagicMock()
|
||||
mock_client.search = AsyncMock(side_effect=Exception("API error"))
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
with patch("deerflow.community.searxng.tools._get_tool_config", return_value=None):
|
||||
result = await tools.web_search_tool.ainvoke("test query")
|
||||
|
||||
data = json.loads(result)
|
||||
assert "error" in data
|
||||
|
||||
@patch("deerflow.community.searxng.tools._get_searxng_client")
|
||||
async def test_web_search_tool_with_max_results(self, mock_get_client):
|
||||
"""web_search_tool respects max_results config."""
|
||||
mock_client = MagicMock()
|
||||
# Return 10 results; the tool should slice to max_results=3
|
||||
mock_client.search = AsyncMock(return_value=[{"title": f"Result {i}", "url": f"https://example.com/{i}", "content": f"Desc {i}"} for i in range(10)])
|
||||
mock_get_client.return_value = mock_client
|
||||
|
||||
with patch("deerflow.community.searxng.tools._get_tool_config", return_value={"max_results": "3"}):
|
||||
await tools.web_search_tool.ainvoke("test query")
|
||||
|
||||
# Verify that search was called with max_results=3 (coerced from string)
|
||||
mock_client.search.assert_called_once()
|
||||
call_kwargs = mock_client.search.call_args.kwargs
|
||||
assert call_kwargs["max_results"] == 3
|
||||
Reference in New Issue
Block a user