Merge branch 'main' into fix-2804

This commit is contained in:
Willem Jiang
2026-05-12 15:53:28 +08:00
committed by GitHub
38 changed files with 953 additions and 291 deletions
@@ -14,6 +14,10 @@ def _ai_with_tool_calls(tool_calls):
return AIMessage(content="", tool_calls=tool_calls)
def _ai_with_invalid_tool_calls(invalid_tool_calls):
return AIMessage(content="", tool_calls=[], invalid_tool_calls=invalid_tool_calls)
def _tool_msg(tool_call_id, name="test_tool"):
return ToolMessage(content="result", tool_call_id=tool_call_id, name=name)
@@ -22,6 +26,16 @@ def _tc(name="bash", tc_id="call_1"):
return {"name": name, "id": tc_id, "args": {}}
def _invalid_tc(name="write_file", tc_id="write_file:36", error="Failed to parse tool arguments: malformed JSON"):
return {
"type": "invalid_tool_call",
"name": name,
"id": tc_id,
"args": '{"description":"write report","path":"/mnt/user-data/outputs/report.md","content":"bad {"json"}"}',
"error": error,
}
class TestBuildPatchedMessagesNoPatch:
def test_empty_messages(self):
mw = DanglingToolCallMiddleware()
@@ -144,6 +158,42 @@ class TestBuildPatchedMessagesPatching:
assert patched[1].name == "bash"
assert patched[1].status == "error"
def test_invalid_tool_call_is_patched(self):
mw = DanglingToolCallMiddleware()
msgs = [_ai_with_invalid_tool_calls([_invalid_tc()])]
patched = mw._build_patched_messages(msgs)
assert patched is not None
assert len(patched) == 2
assert isinstance(patched[1], ToolMessage)
assert patched[1].tool_call_id == "write_file:36"
assert patched[1].name == "write_file"
assert patched[1].status == "error"
assert "arguments were invalid" in patched[1].content
assert "Failed to parse tool arguments" in patched[1].content
def test_valid_and_invalid_tool_calls_are_both_patched(self):
mw = DanglingToolCallMiddleware()
msgs = [
AIMessage(
content="",
tool_calls=[_tc("bash", "call_1")],
invalid_tool_calls=[_invalid_tc()],
)
]
patched = mw._build_patched_messages(msgs)
assert patched is not None
tool_msgs = [m for m in patched if isinstance(m, ToolMessage)]
assert len(tool_msgs) == 2
assert {tm.tool_call_id for tm in tool_msgs} == {"call_1", "write_file:36"}
def test_invalid_tool_call_already_responded_is_not_patched(self):
mw = DanglingToolCallMiddleware()
msgs = [
_ai_with_invalid_tool_calls([_invalid_tc()]),
_tool_msg("write_file:36", "write_file"),
]
assert mw._build_patched_messages(msgs) is None
class TestWrapModelCall:
def test_no_patch_passthrough(self):
+42
View File
@@ -122,3 +122,45 @@ def test_health_still_works_when_docs_disabled():
resp = client.get("/health")
assert resp.status_code == 200
assert resp.json()["status"] == "healthy"
# ---------------------------------------------------------------------------
# Runtime CORS behavior
# ---------------------------------------------------------------------------
def _make_gateway_client(cors_origins: str) -> TestClient:
with patch.dict(os.environ, {"GATEWAY_CORS_ORIGINS": cors_origins}):
_reset_gateway_config()
from app.gateway.app import create_app
return TestClient(create_app())
def test_gateway_cors_allows_configured_origin():
"""GATEWAY_CORS_ORIGINS should control actual browser CORS responses."""
client = _make_gateway_client("https://app.example")
response = client.get("/health", headers={"Origin": "https://app.example"})
assert response.status_code == 200
assert response.headers["access-control-allow-origin"] == "https://app.example"
assert response.headers["access-control-allow-credentials"] == "true"
def test_gateway_cors_rejects_unconfigured_origin():
client = _make_gateway_client("https://app.example")
response = client.get("/health", headers={"Origin": "https://evil.example"})
assert response.status_code == 200
assert "access-control-allow-origin" not in response.headers
def test_gateway_cors_normalizes_configured_default_port():
client = _make_gateway_client("https://app.example:443")
response = client.get("/health", headers={"Origin": "https://app.example"})
assert response.status_code == 200
assert response.headers["access-control-allow-origin"] == "https://app.example"
@@ -53,6 +53,29 @@ def test_nginx_routes_official_langgraph_prefix_to_gateway_api():
assert "proxy_pass http://gateway" in content or "proxy_pass http://$gateway_upstream" in content
def test_nginx_defers_cors_to_gateway_allowlist():
for path in ("docker/nginx/nginx.local.conf", "docker/nginx/nginx.conf"):
content = _read(path)
assert "Access-Control-Allow-Origin" not in content
assert "Access-Control-Allow-Methods" not in content
assert "Access-Control-Allow-Headers" not in content
assert "Access-Control-Allow-Credentials" not in content
assert "proxy_hide_header 'Access-Control-Allow-" not in content
assert "if ($request_method = 'OPTIONS')" not in content
def test_gateway_cors_configuration_uses_gateway_allowlist():
gateway_config = _read("backend/app/gateway/config.py")
gateway_app = _read("backend/app/gateway/app.py")
csrf_middleware = _read("backend/app/gateway/csrf_middleware.py")
assert not re.search(r"(?<!GATEWAY_)[\"']CORS_ORIGINS[\"']", gateway_config)
assert "cors_origins" not in gateway_config
assert "get_configured_cors_origins" in gateway_app
assert "GATEWAY_CORS_ORIGINS" in csrf_middleware
def test_frontend_rewrites_langgraph_prefix_to_gateway():
next_config = _read("frontend/next.config.js")
api_client = _read("frontend/src/core/api/api-client.ts")
+8 -8
View File
@@ -5,7 +5,8 @@ import pytest
from langchain_core.tools import StructuredTool
from pydantic import BaseModel, Field
from deerflow.mcp.tools import _make_sync_tool_wrapper, get_mcp_tools
from deerflow.mcp.tools import get_mcp_tools
from deerflow.tools.sync import make_sync_tool_wrapper
class MockArgs(BaseModel):
@@ -51,14 +52,13 @@ def test_mcp_tool_sync_wrapper_generation():
def test_mcp_tool_sync_wrapper_in_running_loop():
"""Test the actual helper function from production code (Fix for Comment 1 & 3)."""
"""Test the shared sync wrapper from production code."""
async def mock_coro(x: int):
await asyncio.sleep(0.01)
return f"async_result: {x}"
# Test the real helper function exported from deerflow.mcp.tools
sync_func = _make_sync_tool_wrapper(mock_coro, "test_tool")
sync_func = make_sync_tool_wrapper(mock_coro, "test_tool")
async def run_in_loop():
# This call should succeed due to ThreadPoolExecutor in the real helper
@@ -70,16 +70,16 @@ def test_mcp_tool_sync_wrapper_in_running_loop():
def test_mcp_tool_sync_wrapper_exception_logging():
"""Test the actual helper's error logging (Fix for Comment 3)."""
"""Test the shared sync wrapper's error logging."""
async def error_coro():
raise ValueError("Tool failure")
sync_func = _make_sync_tool_wrapper(error_coro, "error_tool")
sync_func = make_sync_tool_wrapper(error_coro, "error_tool")
with patch("deerflow.mcp.tools.logger.error") as mock_log_error:
with patch("deerflow.tools.sync.logger.error") as mock_log_error:
with pytest.raises(ValueError, match="Tool failure"):
sync_func()
mock_log_error.assert_called_once()
# Verify the tool name is in the log message
assert "error_tool" in mock_log_error.call_args[0][0]
assert mock_log_error.call_args[0][1] == "error_tool"
+93
View File
@@ -339,6 +339,99 @@ class TestConvenienceFields:
data = j.get_completion_data()
assert data["first_human_message"] == "What is AI?"
@pytest.mark.anyio
async def test_completion_data_counts_human_ai_and_tool_messages(self, journal_setup):
from langchain_core.messages import HumanMessage, ToolMessage
j, _ = journal_setup
j.on_chat_model_start({}, [[HumanMessage(content="Question")]], run_id=uuid4(), tags=["lead_agent"])
j.on_llm_end(_make_llm_response("Answer"), run_id=uuid4(), parent_run_id=None, tags=["lead_agent"])
j.on_tool_end(ToolMessage(content="Tool result", tool_call_id="call_1", name="search"), run_id=uuid4())
data = j.get_completion_data()
assert data["message_count"] == 3
assert data["first_human_message"] == "Question"
assert data["last_ai_message"] == "Answer"
@pytest.mark.anyio
async def test_tool_call_only_ai_does_not_clear_last_ai_message(self, journal_setup):
j, _ = journal_setup
j.on_llm_end(_make_llm_response("Useful answer"), run_id=uuid4(), parent_run_id=None, tags=["lead_agent"])
j.on_llm_end(
_make_llm_response("", tool_calls=[{"id": "call_1", "name": "search", "args": {}}]),
run_id=uuid4(),
parent_run_id=None,
tags=["lead_agent"],
)
data = j.get_completion_data()
assert data["message_count"] == 2
assert data["last_ai_message"] == "Useful answer"
@pytest.mark.anyio
async def test_last_ai_message_extracts_mixed_content_without_extra_newlines(self, journal_setup):
j, _ = journal_setup
j.on_llm_end(
_make_llm_response(
[
{"type": "text", "text": "First "},
{"type": "text", "content": "second"},
" third",
{"type": "image", "url": "ignored"},
]
),
run_id=uuid4(),
parent_run_id=None,
tags=["lead_agent"],
)
data = j.get_completion_data()
assert data["message_count"] == 1
assert data["last_ai_message"] == "First second third"
@pytest.mark.anyio
async def test_last_ai_message_extracts_mapping_content(self, journal_setup):
j, _ = journal_setup
j.on_llm_end(_make_llm_response({"content": "Nested answer"}), run_id=uuid4(), parent_run_id=None, tags=["lead_agent"])
data = j.get_completion_data()
assert data["message_count"] == 1
assert data["last_ai_message"] == "Nested answer"
@pytest.mark.anyio
async def test_duplicate_llm_run_id_does_not_double_count_message_summary(self, journal_setup):
j, _ = journal_setup
run_id = uuid4()
j.on_llm_end(_make_llm_response("Answer", usage=None), run_id=run_id, parent_run_id=None, tags=["lead_agent"])
j.on_llm_end(
_make_llm_response("Answer", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}),
run_id=run_id,
parent_run_id=None,
tags=["lead_agent"],
)
data = j.get_completion_data()
assert data["message_count"] == 1
assert data["last_ai_message"] == "Answer"
assert data["total_tokens"] == 15
@pytest.mark.anyio
async def test_subagent_ai_does_not_overwrite_lead_last_ai_message(self, journal_setup):
j, _ = journal_setup
j.on_llm_end(_make_llm_response("Lead answer"), run_id=uuid4(), parent_run_id=None, tags=["lead_agent"])
j.on_llm_end(_make_llm_response("Subagent detail"), run_id=uuid4(), parent_run_id=None, tags=["subagent:research"])
data = j.get_completion_data()
assert data["message_count"] == 2
assert data["last_ai_message"] == "Lead answer"
@pytest.mark.anyio
async def test_get_completion_data(self, journal_setup):
j, _ = journal_setup
+51
View File
@@ -5,6 +5,7 @@ import re
import pytest
from deerflow.runtime import RunManager, RunStatus
from deerflow.runtime.runs.store.memory import MemoryRunStore
ISO_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}")
@@ -141,3 +142,53 @@ async def test_create_defaults(manager: RunManager):
assert record.kwargs == {}
assert record.multitask_strategy == "reject"
assert record.assistant_id is None
@pytest.mark.anyio
async def test_model_name_create_or_reject():
"""create_or_reject should accept and persist model_name."""
from deerflow.runtime.runs.schemas import DisconnectMode
store = MemoryRunStore()
mgr = RunManager(store=store)
record = await mgr.create_or_reject(
"thread-1",
assistant_id="lead_agent",
on_disconnect=DisconnectMode.cancel,
metadata={"key": "val"},
kwargs={"input": {}},
multitask_strategy="reject",
model_name="anthropic.claude-sonnet-4-20250514-v1:0",
)
assert record.model_name == "anthropic.claude-sonnet-4-20250514-v1:0"
assert record.status == RunStatus.pending
# Verify model_name was persisted to store
stored = await store.get(record.run_id)
assert stored is not None
assert stored["model_name"] == "anthropic.claude-sonnet-4-20250514-v1:0"
# Verify retrieval returns the model_name via in-memory record
fetched = mgr.get(record.run_id)
assert fetched is not None
assert fetched.model_name == "anthropic.claude-sonnet-4-20250514-v1:0"
@pytest.mark.anyio
async def test_model_name_default_is_none():
"""create_or_reject without model_name should default to None."""
from deerflow.runtime.runs.schemas import DisconnectMode
store = MemoryRunStore()
mgr = RunManager(store=store)
record = await mgr.create_or_reject(
"thread-1",
on_disconnect=DisconnectMode.cancel,
model_name=None,
)
assert record.model_name is None
stored = await store.get(record.run_id)
assert stored["model_name"] is None
+29
View File
@@ -249,3 +249,32 @@ class TestRunRepository:
rows = await repo.list_by_thread("t1", user_id=None)
assert len(rows) == 2
await _cleanup()
@pytest.mark.anyio
async def test_model_name_persistence(self, tmp_path):
"""RunRepository should persist, normalize, and truncate model_name correctly via SQL."""
from deerflow.persistence.engine import get_session_factory, init_engine
url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
repo = RunRepository(get_session_factory())
await repo.put("run-1", thread_id="thread-1", model_name="gpt-4o")
row = await repo.get("run-1")
assert row is not None
assert row["model_name"] == "gpt-4o"
long_name = "a" * 200
await repo.put("run-2", thread_id="thread-1", model_name=long_name)
row2 = await repo.get("run-2")
assert row2["model_name"] == "a" * 128
await repo.put("run-3", thread_id="thread-1", model_name=123)
row3 = await repo.get("run-3")
assert row3["model_name"] == "123"
await repo.put("run-4", thread_id="thread-1", model_name=None)
row4 = await repo.get("run-4")
assert row4["model_name"] is None
await _cleanup()
+183 -1
View File
@@ -291,7 +291,7 @@ class TestAgentConstruction:
assert captured["agent"]["model"] is model
assert captured["agent"]["middleware"] is middlewares
assert captured["agent"]["tools"] == []
assert captured["agent"]["system_prompt"] == base_config.system_prompt
assert captured["agent"]["system_prompt"] is None # system_prompt is merged into initial state messages
@pytest.mark.anyio
async def test_load_skill_messages_uses_explicit_app_config_for_skill_storage(
@@ -331,6 +331,124 @@ class TestAgentConstruction:
assert len(messages) == 1
assert "Use demo skill" in messages[0].content
@pytest.mark.anyio
async def test_build_initial_state_consolidates_system_prompt_and_skills(
self,
classes,
base_config,
monkeypatch: pytest.MonkeyPatch,
tmp_path,
):
"""_build_initial_state merges system_prompt and skills into one SystemMessage."""
SubagentExecutor = classes["SubagentExecutor"]
skill_dir = tmp_path / "my-skill"
skill_dir.mkdir()
skill_file = skill_dir / "SKILL.md"
skill_file.write_text("Skill instructions here", encoding="utf-8")
monkeypatch.setattr(
sys.modules["deerflow.skills.storage"],
"get_or_new_skill_storage",
lambda *, app_config=None: SimpleNamespace(load_skills=lambda *, enabled_only: [SimpleNamespace(name="my-skill", skill_file=skill_file, allowed_tools=None)]),
)
executor = SubagentExecutor(
config=base_config,
tools=[],
thread_id="test-thread",
)
state, _filtered_tools = await executor._build_initial_state("Do the task")
messages = state["messages"]
# Should have exactly 2 messages: one combined SystemMessage + one HumanMessage
assert len(messages) == 2
from langchain_core.messages import HumanMessage, SystemMessage
assert isinstance(messages[0], SystemMessage)
assert isinstance(messages[1], HumanMessage)
# SystemMessage should contain both the system_prompt and skill content
assert base_config.system_prompt in messages[0].content
assert "Skill instructions here" in messages[0].content
# HumanMessage should be the task
assert messages[1].content == "Do the task"
@pytest.mark.anyio
async def test_build_initial_state_no_skills_only_system_prompt(
self,
classes,
base_config,
monkeypatch: pytest.MonkeyPatch,
):
"""_build_initial_state works when there are no skills."""
SubagentExecutor = classes["SubagentExecutor"]
monkeypatch.setattr(
sys.modules["deerflow.skills.storage"],
"get_or_new_skill_storage",
lambda *, app_config=None: SimpleNamespace(load_skills=lambda *, enabled_only: []),
)
executor = SubagentExecutor(
config=base_config,
tools=[],
thread_id="test-thread",
)
state, _filtered_tools = await executor._build_initial_state("Do the task")
messages = state["messages"]
from langchain_core.messages import HumanMessage, SystemMessage
assert len(messages) == 2
assert isinstance(messages[0], SystemMessage)
assert base_config.system_prompt in messages[0].content
assert isinstance(messages[1], HumanMessage)
@pytest.mark.anyio
async def test_build_initial_state_no_system_prompt_with_skills(
self,
classes,
monkeypatch: pytest.MonkeyPatch,
tmp_path,
):
"""_build_initial_state works when there is no system_prompt but there are skills."""
SubagentConfig = classes["SubagentConfig"]
config = SubagentConfig(
name="test-agent",
description="Test agent",
system_prompt=None,
max_turns=10,
timeout_seconds=60,
)
skill_dir = tmp_path / "my-skill"
skill_dir.mkdir()
skill_file = skill_dir / "SKILL.md"
skill_file.write_text("Skill content", encoding="utf-8")
monkeypatch.setattr(
sys.modules["deerflow.skills.storage"],
"get_or_new_skill_storage",
lambda *, app_config=None: SimpleNamespace(load_skills=lambda *, enabled_only: [SimpleNamespace(name="my-skill", skill_file=skill_file, allowed_tools=None)]),
)
SubagentExecutor = classes["SubagentExecutor"]
executor = SubagentExecutor(config=config, tools=[], thread_id="test-thread")
state, _filtered_tools = await executor._build_initial_state("Do the task")
messages = state["messages"]
from langchain_core.messages import HumanMessage, SystemMessage
assert len(messages) == 2
assert isinstance(messages[0], SystemMessage)
assert "Skill content" in messages[0].content
assert isinstance(messages[1], HumanMessage)
# -----------------------------------------------------------------------------
# Async Execution Path Tests
@@ -514,6 +632,70 @@ class TestAsyncExecutionPath:
assert result.status == SubagentStatus.COMPLETED
assert "Task" in result.result
@pytest.mark.anyio
async def test_aexecute_passes_at_most_one_system_message_to_agent(
self,
classes,
base_config,
monkeypatch: pytest.MonkeyPatch,
tmp_path,
):
"""Regression: messages sent to agent.astream must contain at most one
SystemMessage and it must be the first message.
This catches any regression where system_prompt would be re-injected
via create_agent() (e.g. system_prompt not passed as None) and appear
as a second SystemMessage, which providers like vLLM and Xinference
reject with "System message must be at the beginning."
"""
from langchain_core.messages import AIMessage, SystemMessage
SubagentExecutor = classes["SubagentExecutor"]
SubagentStatus = classes["SubagentStatus"]
# Set up a skill so both system_prompt AND skill content are present,
# maximising the chance of catching a double-SystemMessage regression.
skill_dir = tmp_path / "regression-skill"
skill_dir.mkdir()
(skill_dir / "SKILL.md").write_text("Skill instruction text", encoding="utf-8")
monkeypatch.setattr(
sys.modules["deerflow.skills.storage"],
"get_or_new_skill_storage",
lambda *, app_config=None: SimpleNamespace(load_skills=lambda *, enabled_only: [SimpleNamespace(name="regression-skill", skill_file=skill_dir / "SKILL.md", allowed_tools=None)]),
)
captured_states: list[dict] = []
async def capturing_astream(state, **kwargs):
captured_states.append(state)
yield {"messages": [AIMessage(content="Done", id="msg-1")]}
mock_agent = MagicMock()
mock_agent.astream = capturing_astream
executor = SubagentExecutor(
config=base_config,
tools=[],
thread_id="test-thread",
)
with patch.object(executor, "_create_agent", return_value=mock_agent):
result = await executor._aexecute("Do something")
assert result.status == SubagentStatus.COMPLETED
assert len(captured_states) == 1, "astream should be called exactly once"
initial_messages = captured_states[0]["messages"]
system_messages = [m for m in initial_messages if isinstance(m, SystemMessage)]
assert len(system_messages) <= 1, f"Expected at most 1 SystemMessage but got {len(system_messages)}: {system_messages}"
if system_messages:
assert initial_messages[0] is system_messages[0], "SystemMessage must be the first message in the conversation"
# The consolidated SystemMessage must carry both the system_prompt
# and all skill content — nothing should be split across two messages.
assert base_config.system_prompt in system_messages[0].content
assert "Skill instruction text" in system_messages[0].content
class TestSkillAllowedTools:
@pytest.mark.anyio
+41 -1
View File
@@ -10,7 +10,8 @@ from __future__ import annotations
from unittest.mock import MagicMock, patch
from langchain_core.tools import BaseTool, tool
from langchain_core.tools import BaseTool, StructuredTool, tool
from pydantic import BaseModel, Field
from deerflow.tools.tools import get_available_tools
@@ -19,6 +20,10 @@ from deerflow.tools.tools import get_available_tools
# ---------------------------------------------------------------------------
class AsyncToolArgs(BaseModel):
x: int = Field(..., description="test input")
@tool
def _tool_alpha(x: str) -> str:
"""Alpha tool."""
@@ -52,10 +57,45 @@ def _make_minimal_config(tools):
config.tools = tools
config.models = []
config.tool_search.enabled = False
config.skill_evolution.enabled = False
config.sandbox = MagicMock()
config.acp_agents = {}
return config
@patch("deerflow.tools.tools.get_app_config")
@patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True)
@patch("deerflow.tools.tools.reset_deferred_registry")
def test_config_loaded_async_only_tool_gets_sync_wrapper(mock_reset, mock_bash, mock_cfg):
"""Config-loaded async-only tools can still be invoked by sync clients."""
async def async_tool_impl(x: int) -> str:
return f"result: {x}"
async_tool = StructuredTool(
name="async_tool",
description="Async-only test tool.",
args_schema=AsyncToolArgs,
func=None,
coroutine=async_tool_impl,
)
tool_cfg = MagicMock()
tool_cfg.name = "async_tool"
tool_cfg.group = "test"
tool_cfg.use = "tests.fake:async_tool"
mock_cfg.return_value = _make_minimal_config([tool_cfg])
with (
patch("deerflow.tools.tools.resolve_variable", return_value=async_tool),
patch("deerflow.tools.tools.BUILTIN_TOOLS", []),
):
result = get_available_tools(include_mcp=False, app_config=mock_cfg.return_value)
assert async_tool in result
assert async_tool.func is not None
assert async_tool.invoke({"x": 42}) == "result: 42"
@patch("deerflow.tools.tools.get_app_config")
@patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True)
@patch("deerflow.tools.tools.reset_deferred_registry")