mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-11 01:45:58 +00:00
feat(subagents): extend deferred MCP tool loading to subagents (#3432)
* feat(subagents): extend deferred MCP tool loading to subagents (#3341) Subagents now reuse the lead agent's deferred-tool path: when tool_search.enabled, MCP tool schemas are withheld from the model and surfaced by name in <available-deferred-tools>, fetched on demand via the generated tool_search helper. DeferredToolFilterMiddleware deterministically rewrites request.tools to hide the deferred schemas (the prompt section is discovery only, not enforcement). Consolidates the assembly into deerflow.tools.builtins.tool_search, now the single home for both assemble_deferred_tools (centralized fail-closed guard, replacing the lead-only private _assemble_deferred) and the relocated get_deferred_tools_prompt_section. Shared by every build path: lead agent, embedded client, and subagent executor. tool_search is appended after the subagent's name-level tool policy and is treated as infrastructure: its catalog is built from the already policy-filtered list, so it can never surface a tool the policy denied. Follow-up to #3370. Fixes #3341. * test(subagents): assert the real middleware builder emits a working deferred filter (#3341) The existing recipe test hand-constructs DeferredToolFilterMiddleware, so it cannot catch a regression in how build_subagent_runtime_middlewares (the call executor._create_agent actually makes) wires the deferred setup into the filter. Add a test that sources the filter from the real builder given a real setup and runs it through a graph: a wrong catalog hash would silently stop promotion, a dropped filter would stop hiding — both now caught. Running the full real middleware stack is intentionally avoided (the other runtime middlewares need sandbox/thread infra to execute, which would make the test flaky); their attachment + ordering before Safety stays locked in test_tool_error_handling_middleware.py. * test(subagents): keep executor tests config-free in CI * chore: trigger ci * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,174 @@
|
||||
"""End-to-end: the subagent deferral recipe hides then promotes an MCP tool (#3341).
|
||||
|
||||
#3272 wired deferred MCP loading into the lead agent only. #3341 extends it to
|
||||
subagents. This locks the *subagent build recipe* - the shared helpers the
|
||||
executor now calls (``assemble_deferred_tools`` + ``get_deferred_tools_prompt_section``)
|
||||
plus the ``DeferredToolFilterMiddleware`` that ``build_subagent_runtime_middlewares``
|
||||
attaches - composing into the same hide/promote loop the lead has, under the
|
||||
subagent's build shape (``system_prompt=None`` + a single ``SystemMessage``).
|
||||
|
||||
The hide/promote mechanics themselves are also covered for the lead path by
|
||||
tests/test_deferred_promotion_integration.py; this asserts the subagent recipe
|
||||
produces an equivalent loop without binding MCP schemas before promotion.
|
||||
|
||||
A second test (``test_subagent_builder_emits_working_deferred_filter``) closes the
|
||||
remaining seam: it sources the filter from the *real* ``build_subagent_runtime_middlewares``
|
||||
(the exact call ``executor._create_agent`` makes) rather than hand-constructing it, so a
|
||||
regression in how the builder wires the setup into the filter - wrong catalog hash,
|
||||
dropped filter, wrong deferred set - is caught at runtime. (Running the full real stack
|
||||
is intentionally avoided: the other runtime middlewares need sandbox/thread infra to
|
||||
execute, which would make the test flaky; their attachment + ordering is locked in
|
||||
tests/test_tool_error_handling_middleware.py instead.)
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
|
||||
from langchain.agents import create_agent
|
||||
from langchain_core.language_models.fake_chat_models import GenericFakeChatModel
|
||||
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
|
||||
from langchain_core.tools import tool as as_tool
|
||||
|
||||
from deerflow.agents.middlewares.deferred_tool_filter_middleware import DeferredToolFilterMiddleware
|
||||
from deerflow.agents.thread_state import ThreadState
|
||||
from deerflow.tools.builtins.tool_search import assemble_deferred_tools, get_deferred_tools_prompt_section
|
||||
from deerflow.tools.mcp_metadata import tag_mcp_tool
|
||||
|
||||
|
||||
@as_tool
|
||||
def active_tool(x: str) -> str:
|
||||
"An always-active tool."
|
||||
return x
|
||||
|
||||
|
||||
@as_tool
|
||||
def mcp_calc(expression: str) -> str:
|
||||
"Evaluate arithmetic."
|
||||
return expression
|
||||
|
||||
|
||||
@as_tool
|
||||
def mcp_other(x: str) -> str:
|
||||
"Another deferred MCP tool."
|
||||
return x
|
||||
|
||||
|
||||
def test_subagent_deferral_recipe_hides_then_promotes():
|
||||
bound: list[list[str]] = []
|
||||
|
||||
class RecordingModel(GenericFakeChatModel):
|
||||
def bind_tools(self, tools, **kwargs):
|
||||
bound.append([getattr(t, "name", None) for t in tools])
|
||||
return self
|
||||
|
||||
# The subagent build path (executor._build_initial_state): policy-filtered
|
||||
# tools -> assemble_deferred_tools appends tool_search, fail-closed.
|
||||
filtered = [active_tool, tag_mcp_tool(mcp_calc), tag_mcp_tool(mcp_other)]
|
||||
final_tools, setup = assemble_deferred_tools(filtered, enabled=True)
|
||||
assert "tool_search" in [t.name for t in final_tools]
|
||||
assert setup.deferred_names == frozenset({"mcp_calc", "mcp_other"})
|
||||
|
||||
# The subagent injects the section into its single SystemMessage.
|
||||
section = get_deferred_tools_prompt_section(deferred_names=setup.deferred_names)
|
||||
assert "<available-deferred-tools>" in section
|
||||
assert "mcp_calc" in section and "mcp_other" in section
|
||||
|
||||
turn1 = AIMessage(content="", tool_calls=[{"name": "tool_search", "args": {"query": "select:mcp_calc"}, "id": "c1", "type": "tool_call"}])
|
||||
turn2 = AIMessage(content="done")
|
||||
model = RecordingModel(messages=iter([turn1, turn2]))
|
||||
|
||||
# The middleware DeferredToolFilterMiddleware is exactly what
|
||||
# build_subagent_runtime_middlewares attaches for this setup (locked by
|
||||
# tests/test_tool_error_handling_middleware.py); the subagent build passes
|
||||
# system_prompt=None with state_schema=ThreadState.
|
||||
graph = create_agent(
|
||||
model=model,
|
||||
tools=final_tools,
|
||||
middleware=[DeferredToolFilterMiddleware(setup.deferred_names, setup.catalog_hash)],
|
||||
system_prompt=None,
|
||||
state_schema=ThreadState,
|
||||
)
|
||||
|
||||
result = asyncio.run(graph.ainvoke({"messages": [SystemMessage(content=section), HumanMessage(content="use the deferred calculator")]}))
|
||||
|
||||
assert len(bound) >= 2, f"expected >=2 model binds, got {bound}"
|
||||
# Turn 1: both deferred MCP tools hidden from the subagent's model binding.
|
||||
assert "mcp_calc" not in bound[0] and "mcp_other" not in bound[0]
|
||||
# Turn 2: the searched tool is promoted; the un-searched one stays hidden.
|
||||
assert "mcp_calc" in bound[1]
|
||||
assert "mcp_other" not in bound[1]
|
||||
# Promotion recorded in graph state, scoped by catalog hash.
|
||||
assert result["promoted"] == {"catalog_hash": setup.catalog_hash, "names": ["mcp_calc"]}
|
||||
|
||||
|
||||
def test_subagent_builder_emits_working_deferred_filter():
|
||||
"""The real build path the executor calls - ``build_subagent_runtime_middlewares`` -
|
||||
must emit a ``DeferredToolFilterMiddleware`` that actually hides/promotes through a
|
||||
graph. The recipe test above hand-builds the filter; this sources it from the real
|
||||
builder given a real setup, so a regression in the builder's wiring is caught: a
|
||||
wrong catalog hash silently stops promotion (turn 2 would keep mcp_calc hidden), a
|
||||
dropped filter stops hiding (turn 1 would bind mcp_calc)."""
|
||||
from deerflow.agents.middlewares.tool_error_handling_middleware import build_subagent_runtime_middlewares
|
||||
from deerflow.config.app_config import AppConfig, CircuitBreakerConfig
|
||||
from deerflow.config.guardrails_config import GuardrailsConfig
|
||||
from deerflow.config.model_config import ModelConfig
|
||||
from deerflow.config.sandbox_config import SandboxConfig
|
||||
|
||||
bound: list[list[str]] = []
|
||||
|
||||
class RecordingModel(GenericFakeChatModel):
|
||||
def bind_tools(self, tools, **kwargs):
|
||||
bound.append([getattr(t, "name", None) for t in tools])
|
||||
return self
|
||||
|
||||
filtered = [active_tool, tag_mcp_tool(mcp_calc), tag_mcp_tool(mcp_other)]
|
||||
final_tools, setup = assemble_deferred_tools(filtered, enabled=True)
|
||||
section = get_deferred_tools_prompt_section(deferred_names=setup.deferred_names)
|
||||
|
||||
app_config = AppConfig(
|
||||
models=[
|
||||
ModelConfig(
|
||||
name="test-model",
|
||||
display_name="test-model",
|
||||
description=None,
|
||||
use="langchain_openai:ChatOpenAI",
|
||||
model="test-model",
|
||||
supports_vision=False,
|
||||
)
|
||||
],
|
||||
sandbox=SandboxConfig(use="test"),
|
||||
guardrails=GuardrailsConfig(enabled=False),
|
||||
circuit_breaker=CircuitBreakerConfig(failure_threshold=7, recovery_timeout_sec=11),
|
||||
)
|
||||
|
||||
# The exact call executor._create_agent makes. Pull the filter the builder
|
||||
# produced (not a hand-rolled one) so its wiring - deferred set + catalog hash -
|
||||
# is what's under test.
|
||||
middlewares = build_subagent_runtime_middlewares(app_config=app_config, model_name="test-model", deferred_setup=setup)
|
||||
deferred_filters = [m for m in middlewares if isinstance(m, DeferredToolFilterMiddleware)]
|
||||
assert len(deferred_filters) == 1, f"builder must emit exactly one deferred filter, got {[type(m).__name__ for m in middlewares]}"
|
||||
|
||||
turn1 = AIMessage(content="", tool_calls=[{"name": "tool_search", "args": {"query": "select:mcp_calc"}, "id": "c1", "type": "tool_call"}])
|
||||
turn2 = AIMessage(content="done")
|
||||
model = RecordingModel(messages=iter([turn1, turn2]))
|
||||
|
||||
# Run only the builder-produced filter (the component under test). The other
|
||||
# runtime middlewares need sandbox/thread infra to *execute*, so running the
|
||||
# full stack here would be flaky; their attachment + ordering before Safety is
|
||||
# locked in tests/test_tool_error_handling_middleware.py.
|
||||
graph = create_agent(
|
||||
model=model,
|
||||
tools=final_tools,
|
||||
middleware=deferred_filters,
|
||||
system_prompt=None,
|
||||
state_schema=ThreadState,
|
||||
)
|
||||
result = asyncio.run(graph.ainvoke({"messages": [SystemMessage(content=section), HumanMessage(content="use the deferred calculator")]}))
|
||||
|
||||
assert len(bound) >= 2, f"expected >=2 model binds, got {bound}"
|
||||
# Turn 1: both deferred MCP tools hidden - the builder-produced filter is active.
|
||||
assert "mcp_calc" not in bound[0] and "mcp_other" not in bound[0]
|
||||
# Turn 2: the searched tool is promoted - proves the builder wired the catalog
|
||||
# hash correctly (a wrong hash would leave mcp_calc hidden here).
|
||||
assert "mcp_calc" in bound[1]
|
||||
assert "mcp_other" not in bound[1]
|
||||
assert result["promoted"] == {"catalog_hash": setup.catalog_hash, "names": ["mcp_calc"]}
|
||||
Reference in New Issue
Block a user