feat(subagents): extend deferred MCP tool loading to subagents (#3432)

* feat(subagents): extend deferred MCP tool loading to subagents (#3341) Subagents now reuse the lead agent's deferred-tool path: when tool_search.enabled, MCP tool schemas are withheld from the model and surfaced by name in <available-deferred-tools>, fetched on demand via the generated tool_search helper. DeferredToolFilterMiddleware deterministically rewrites request.tools to hide the deferred schemas (the prompt section is discovery only, not enforcement). Consolidates the assembly into deerflow.tools.builtins.tool_search, now the single home for both assemble_deferred_tools (centralized fail-closed guard, replacing the lead-only private _assemble_deferred) and the relocated get_deferred_tools_prompt_section. Shared by every build path: lead agent, embedded client, and subagent executor. tool_search is appended after the subagent's name-level tool policy and is treated as infrastructure: its catalog is built from the already policy-filtered list, so it can never surface a tool the policy denied. Follow-up to #3370. Fixes #3341. * test(subagents): assert the real middleware builder emits a working deferred filter (#3341) The existing recipe test hand-constructs DeferredToolFilterMiddleware, so it cannot catch a regression in how build_subagent_runtime_middlewares (the call executor._create_agent actually makes) wires the deferred setup into the filter. Add a test that sources the filter from the real builder given a real setup and runs it through a graph: a wrong catalog hash would silently stop promotion, a dropped filter would stop hiding — both now caught. Running the full real middleware stack is intentionally avoided (the other runtime middlewares need sandbox/thread infra to execute, which would make the test flaky); their attachment + ordering before Safety stays locked in test_tool_error_handling_middleware.py. * test(subagents): keep executor tests config-free in CI * chore: trigger ci * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
2026-06-10 17:35:57 +00:00 · 2026-06-08 23:17:22 +08:00
parent 3c2b60aaae
commit 3b6dd0a4e3
12 changed files with 551 additions and 83 deletions
@@ -22,7 +22,7 @@ from langchain_core.tools import tool as as_tool
 from deerflow.agents.middlewares.deferred_tool_filter_middleware import DeferredToolFilterMiddleware
 from deerflow.skills.tool_policy import filter_tools_by_skill_allowed_tools
 from deerflow.skills.types import Skill
-from deerflow.tools.builtins.tool_search import DeferredToolSetup, build_deferred_tool_setup
+from deerflow.tools.builtins.tool_search import DeferredToolSetup, assemble_deferred_tools, build_deferred_tool_setup
 from deerflow.tools.mcp_metadata import tag_mcp_tool


@@ -93,17 +93,15 @@ def test_policy_excluded_mcp_tool_not_in_catalog():
 def test_fail_closed_when_mcp_survives_without_setup(monkeypatch):
    """Finding 2: simulate a wiring regression and assert it fails loudly.

-    ``_assemble_deferred`` lazy-imports ``build_deferred_tool_setup`` from the
-    source module, so patch it there (not on the agent module).
+    ``assemble_deferred_tools`` references ``build_deferred_tool_setup`` as a
+    module global, so patch it in ``tool_search`` (its home module).
    """
-    from deerflow.agents.lead_agent import agent as agentmod
-
    monkeypatch.setattr(
        "deerflow.tools.builtins.tool_search.build_deferred_tool_setup",
        lambda tools, *, enabled: DeferredToolSetup(None, frozenset(), None),
    )
    with pytest.raises(RuntimeError, match="fail-closed"):
-        agentmod._assemble_deferred([tag_mcp_tool(mcp_secret)], enabled=True)
+        assemble_deferred_tools([tag_mcp_tool(mcp_secret)], enabled=True)


 def test_subagent_reentry_does_not_touch_lead_state():
@@ -146,12 +144,10 @@ def _make_skill(allowed_tools):

 def test_policy_denied_mcp_yields_no_tool_search_end_to_end():
    """An allowlist that denies the MCP tool gates it end-to-end: after the real
-    policy filter no MCP tool survives, so ``_assemble_deferred`` adds no
+    policy filter no MCP tool survives, so ``assemble_deferred_tools`` adds no
    tool_search (and does not fail-closed, because no MCP tool leaked through)."""
-    from deerflow.agents.lead_agent import agent as agentmod
-
    filtered = filter_tools_by_skill_allowed_tools([active_tool, tag_mcp_tool(mcp_secret)], [_make_skill(["active_tool"])])
-    final_tools, setup = agentmod._assemble_deferred(filtered, enabled=True)
+    final_tools, setup = assemble_deferred_tools(filtered, enabled=True)

    assert [t.name for t in final_tools] == ["active_tool"]
    assert "tool_search" not in {t.name for t in final_tools}
@@ -167,11 +163,9 @@ def test_tool_search_appended_after_policy_but_never_exposes_denied_tool():
    is derived from the already policy-filtered list — so it can never expose a
    tool the allowlist denied. Locks that contract so the ordering cannot regress.
    """
-    from deerflow.agents.lead_agent import agent as agentmod
-
    allowed = ["active_tool", "mcp_secret"]  # permits the MCP tool, does NOT list tool_search
    filtered = filter_tools_by_skill_allowed_tools([active_tool, tag_mcp_tool(mcp_secret)], [_make_skill(allowed)])
-    final_tools, setup = agentmod._assemble_deferred(filtered, enabled=True)
+    final_tools, setup = assemble_deferred_tools(filtered, enabled=True)

    names = {t.name for t in final_tools}
    assert "tool_search" in names  # appended despite not being in the allowlist
@@ -0,0 +1,174 @@
+"""End-to-end: the subagent deferral recipe hides then promotes an MCP tool (#3341).
+
+#3272 wired deferred MCP loading into the lead agent only. #3341 extends it to
+subagents. This locks the *subagent build recipe* - the shared helpers the
+executor now calls (``assemble_deferred_tools`` + ``get_deferred_tools_prompt_section``)
+plus the ``DeferredToolFilterMiddleware`` that ``build_subagent_runtime_middlewares``
+attaches - composing into the same hide/promote loop the lead has, under the
+subagent's build shape (``system_prompt=None`` + a single ``SystemMessage``).
+
+The hide/promote mechanics themselves are also covered for the lead path by
+tests/test_deferred_promotion_integration.py; this asserts the subagent recipe
+produces an equivalent loop without binding MCP schemas before promotion.
+
+A second test (``test_subagent_builder_emits_working_deferred_filter``) closes the
+remaining seam: it sources the filter from the *real* ``build_subagent_runtime_middlewares``
+(the exact call ``executor._create_agent`` makes) rather than hand-constructing it, so a
+regression in how the builder wires the setup into the filter - wrong catalog hash,
+dropped filter, wrong deferred set - is caught at runtime. (Running the full real stack
+is intentionally avoided: the other runtime middlewares need sandbox/thread infra to
+execute, which would make the test flaky; their attachment + ordering is locked in
+tests/test_tool_error_handling_middleware.py instead.)
+"""
+
+import asyncio
+
+from langchain.agents import create_agent
+from langchain_core.language_models.fake_chat_models import GenericFakeChatModel
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
+from langchain_core.tools import tool as as_tool
+
+from deerflow.agents.middlewares.deferred_tool_filter_middleware import DeferredToolFilterMiddleware
+from deerflow.agents.thread_state import ThreadState
+from deerflow.tools.builtins.tool_search import assemble_deferred_tools, get_deferred_tools_prompt_section
+from deerflow.tools.mcp_metadata import tag_mcp_tool
+
+
+@as_tool
+def active_tool(x: str) -> str:
+    "An always-active tool."
+    return x
+
+
+@as_tool
+def mcp_calc(expression: str) -> str:
+    "Evaluate arithmetic."
+    return expression
+
+
+@as_tool
+def mcp_other(x: str) -> str:
+    "Another deferred MCP tool."
+    return x
+
+
+def test_subagent_deferral_recipe_hides_then_promotes():
+    bound: list[list[str]] = []
+
+    class RecordingModel(GenericFakeChatModel):
+        def bind_tools(self, tools, **kwargs):
+            bound.append([getattr(t, "name", None) for t in tools])
+            return self
+
+    # The subagent build path (executor._build_initial_state): policy-filtered
+    # tools -> assemble_deferred_tools appends tool_search, fail-closed.
+    filtered = [active_tool, tag_mcp_tool(mcp_calc), tag_mcp_tool(mcp_other)]
+    final_tools, setup = assemble_deferred_tools(filtered, enabled=True)
+    assert "tool_search" in [t.name for t in final_tools]
+    assert setup.deferred_names == frozenset({"mcp_calc", "mcp_other"})
+
+    # The subagent injects the section into its single SystemMessage.
+    section = get_deferred_tools_prompt_section(deferred_names=setup.deferred_names)
+    assert "<available-deferred-tools>" in section
+    assert "mcp_calc" in section and "mcp_other" in section
+
+    turn1 = AIMessage(content="", tool_calls=[{"name": "tool_search", "args": {"query": "select:mcp_calc"}, "id": "c1", "type": "tool_call"}])
+    turn2 = AIMessage(content="done")
+    model = RecordingModel(messages=iter([turn1, turn2]))
+
+    # The middleware DeferredToolFilterMiddleware is exactly what
+    # build_subagent_runtime_middlewares attaches for this setup (locked by
+    # tests/test_tool_error_handling_middleware.py); the subagent build passes
+    # system_prompt=None with state_schema=ThreadState.
+    graph = create_agent(
+        model=model,
+        tools=final_tools,
+        middleware=[DeferredToolFilterMiddleware(setup.deferred_names, setup.catalog_hash)],
+        system_prompt=None,
+        state_schema=ThreadState,
+    )
+
+    result = asyncio.run(graph.ainvoke({"messages": [SystemMessage(content=section), HumanMessage(content="use the deferred calculator")]}))
+
+    assert len(bound) >= 2, f"expected >=2 model binds, got {bound}"
+    # Turn 1: both deferred MCP tools hidden from the subagent's model binding.
+    assert "mcp_calc" not in bound[0] and "mcp_other" not in bound[0]
+    # Turn 2: the searched tool is promoted; the un-searched one stays hidden.
+    assert "mcp_calc" in bound[1]
+    assert "mcp_other" not in bound[1]
+    # Promotion recorded in graph state, scoped by catalog hash.
+    assert result["promoted"] == {"catalog_hash": setup.catalog_hash, "names": ["mcp_calc"]}
+
+
+def test_subagent_builder_emits_working_deferred_filter():
+    """The real build path the executor calls - ``build_subagent_runtime_middlewares`` -
+    must emit a ``DeferredToolFilterMiddleware`` that actually hides/promotes through a
+    graph. The recipe test above hand-builds the filter; this sources it from the real
+    builder given a real setup, so a regression in the builder's wiring is caught: a
+    wrong catalog hash silently stops promotion (turn 2 would keep mcp_calc hidden), a
+    dropped filter stops hiding (turn 1 would bind mcp_calc)."""
+    from deerflow.agents.middlewares.tool_error_handling_middleware import build_subagent_runtime_middlewares
+    from deerflow.config.app_config import AppConfig, CircuitBreakerConfig
+    from deerflow.config.guardrails_config import GuardrailsConfig
+    from deerflow.config.model_config import ModelConfig
+    from deerflow.config.sandbox_config import SandboxConfig
+
+    bound: list[list[str]] = []
+
+    class RecordingModel(GenericFakeChatModel):
+        def bind_tools(self, tools, **kwargs):
+            bound.append([getattr(t, "name", None) for t in tools])
+            return self
+
+    filtered = [active_tool, tag_mcp_tool(mcp_calc), tag_mcp_tool(mcp_other)]
+    final_tools, setup = assemble_deferred_tools(filtered, enabled=True)
+    section = get_deferred_tools_prompt_section(deferred_names=setup.deferred_names)
+
+    app_config = AppConfig(
+        models=[
+            ModelConfig(
+                name="test-model",
+                display_name="test-model",
+                description=None,
+                use="langchain_openai:ChatOpenAI",
+                model="test-model",
+                supports_vision=False,
+            )
+        ],
+        sandbox=SandboxConfig(use="test"),
+        guardrails=GuardrailsConfig(enabled=False),
+        circuit_breaker=CircuitBreakerConfig(failure_threshold=7, recovery_timeout_sec=11),
+    )
+
+    # The exact call executor._create_agent makes. Pull the filter the builder
+    # produced (not a hand-rolled one) so its wiring - deferred set + catalog hash -
+    # is what's under test.
+    middlewares = build_subagent_runtime_middlewares(app_config=app_config, model_name="test-model", deferred_setup=setup)
+    deferred_filters = [m for m in middlewares if isinstance(m, DeferredToolFilterMiddleware)]
+    assert len(deferred_filters) == 1, f"builder must emit exactly one deferred filter, got {[type(m).__name__ for m in middlewares]}"
+
+    turn1 = AIMessage(content="", tool_calls=[{"name": "tool_search", "args": {"query": "select:mcp_calc"}, "id": "c1", "type": "tool_call"}])
+    turn2 = AIMessage(content="done")
+    model = RecordingModel(messages=iter([turn1, turn2]))
+
+    # Run only the builder-produced filter (the component under test). The other
+    # runtime middlewares need sandbox/thread infra to *execute*, so running the
+    # full stack here would be flaky; their attachment + ordering before Safety is
+    # locked in tests/test_tool_error_handling_middleware.py.
+    graph = create_agent(
+        model=model,
+        tools=final_tools,
+        middleware=deferred_filters,
+        system_prompt=None,
+        state_schema=ThreadState,
+    )
+    result = asyncio.run(graph.ainvoke({"messages": [SystemMessage(content=section), HumanMessage(content="use the deferred calculator")]}))
+
+    assert len(bound) >= 2, f"expected >=2 model binds, got {bound}"
+    # Turn 1: both deferred MCP tools hidden - the builder-produced filter is active.
+    assert "mcp_calc" not in bound[0] and "mcp_other" not in bound[0]
+    # Turn 2: the searched tool is promoted - proves the builder wired the catalog
+    # hash correctly (a wrong hash would leave mcp_calc hidden here).
+    assert "mcp_calc" in bound[1]
+    assert "mcp_other" not in bound[1]
+    assert result["promoted"] == {"catalog_hash": setup.catalog_hash, "names": ["mcp_calc"]}
@@ -14,6 +14,7 @@ the real implementation in isolation.
 """

 import asyncio
+import importlib
 import sys
 import threading
 from datetime import datetime
@@ -39,6 +40,21 @@ _MOCKED_MODULE_NAMES = [
 ]


+def _default_app_config():
+    return SimpleNamespace(tool_search=SimpleNamespace(enabled=False))
+
+
+def _patch_default_get_app_config(executor_module):
+    executor_module.get_app_config = _default_app_config
+    return executor_module
+
+
+def _clear_stale_executor_package_attr() -> None:
+    subagents_pkg = sys.modules.get("deerflow.subagents")
+    if subagents_pkg is not None and hasattr(subagents_pkg, "executor"):
+        delattr(subagents_pkg, "executor")
+
+
@pytest.fixture(autouse=True)
 def _setup_executor_classes():
    """Set up mocked modules and import real executor classes.
@@ -53,6 +69,7 @@ def _setup_executor_classes():
    # Remove mocked executor if exists (from conftest.py)
    if "deerflow.subagents.executor" in sys.modules:
        del sys.modules["deerflow.subagents.executor"]
+    _clear_stale_executor_package_attr()

    # Set up mocks
    for name in _MOCKED_MODULE_NAMES:
@@ -71,6 +88,14 @@ def _setup_executor_classes():
        SubagentStatus,
    )

+    executor_module = sys.modules["deerflow.subagents.executor"]
+
+    # Most tests in this module patch _create_agent and exercise executor
+    # control flow only. Keep those tests hermetic: CI checkouts do not include
+    # the gitignored config.yaml, and deferral-specific tests override this
+    # default explicitly.
+    _patch_default_get_app_config(executor_module)
+
    # Store classes in a dict to yield
    classes = {
        "AIMessage": AIMessage,
@@ -287,6 +312,7 @@ class TestAgentConstruction:
            "app_config": app_config,
            "model_name": "parent-model",
            "lazy_init": True,
+            "deferred_setup": None,
        }
        assert captured["agent"]["model"] is model
        assert captured["agent"]["middleware"] is middlewares
@@ -359,7 +385,7 @@ class TestAgentConstruction:
            thread_id="test-thread",
        )

-        state, _filtered_tools = await executor._build_initial_state("Do the task")
+        state, _final_tools, _deferred_setup = await executor._build_initial_state("Do the task")

        messages = state["messages"]
        # Should have exactly 2 messages: one combined SystemMessage + one HumanMessage
@@ -397,7 +423,7 @@ class TestAgentConstruction:
            thread_id="test-thread",
        )

-        state, _filtered_tools = await executor._build_initial_state("Do the task")
+        state, _final_tools, _deferred_setup = await executor._build_initial_state("Do the task")

        messages = state["messages"]
        from langchain_core.messages import HumanMessage, SystemMessage
@@ -439,7 +465,7 @@ class TestAgentConstruction:
        SubagentExecutor = classes["SubagentExecutor"]
        executor = SubagentExecutor(config=config, tools=[], thread_id="test-thread")

-        state, _filtered_tools = await executor._build_initial_state("Do the task")
+        state, _final_tools, _deferred_setup = await executor._build_initial_state("Do the task")

        messages = state["messages"]
        from langchain_core.messages import HumanMessage, SystemMessage
@@ -449,6 +475,192 @@ class TestAgentConstruction:
        assert "Skill content" in messages[0].content
        assert isinstance(messages[1], HumanMessage)

+    @pytest.mark.anyio
+    async def test_build_initial_state_defers_mcp_tools_when_tool_search_enabled(
+        self,
+        classes,
+        base_config,
+        monkeypatch: pytest.MonkeyPatch,
+    ):
+        """tool_search enabled + a surviving MCP tool: _build_initial_state appends
+        the tool_search tool, withholds the MCP schema, and injects the
+        <available-deferred-tools> section into the SystemMessage."""
+        from langchain_core.tools import tool as as_tool
+
+        from deerflow.subagents import executor as executor_module
+        from deerflow.tools.mcp_metadata import tag_mcp_tool
+
+        SubagentExecutor = classes["SubagentExecutor"]
+
+        monkeypatch.setattr(
+            sys.modules["deerflow.skills.storage"],
+            "get_or_new_skill_storage",
+            lambda *, app_config=None: SimpleNamespace(load_skills=lambda *, enabled_only: []),
+        )
+        monkeypatch.setattr(executor_module, "get_app_config", lambda: SimpleNamespace(tool_search=SimpleNamespace(enabled=True)))
+
+        @as_tool
+        def mcp_calc(expression: str) -> str:
+            "Evaluate arithmetic."
+            return expression
+
+        executor = SubagentExecutor(config=base_config, tools=[tag_mcp_tool(mcp_calc)], thread_id="test-thread")
+
+        state, final_tools, deferred_setup = await executor._build_initial_state("Do the task")
+
+        assert "tool_search" in [t.name for t in final_tools]
+        assert deferred_setup.deferred_names == frozenset({"mcp_calc"})
+
+        system_message = state["messages"][0]
+        assert "<available-deferred-tools>" in system_message.content
+        assert "mcp_calc" in system_message.content
+        # The base system_prompt is still present alongside the injected section.
+        assert base_config.system_prompt in system_message.content
+
+    @pytest.mark.anyio
+    async def test_build_initial_state_no_deferral_when_tool_search_disabled(
+        self,
+        classes,
+        base_config,
+        monkeypatch: pytest.MonkeyPatch,
+    ):
+        """tool_search disabled: no tool_search tool, no section - pure no-op even
+        with an MCP-tagged tool present."""
+        from langchain_core.tools import tool as as_tool
+
+        from deerflow.subagents import executor as executor_module
+        from deerflow.tools.mcp_metadata import tag_mcp_tool
+
+        SubagentExecutor = classes["SubagentExecutor"]
+
+        monkeypatch.setattr(
+            sys.modules["deerflow.skills.storage"],
+            "get_or_new_skill_storage",
+            lambda *, app_config=None: SimpleNamespace(load_skills=lambda *, enabled_only: []),
+        )
+        monkeypatch.setattr(executor_module, "get_app_config", lambda: SimpleNamespace(tool_search=SimpleNamespace(enabled=False)))
+
+        @as_tool
+        def mcp_calc(expression: str) -> str:
+            "Evaluate arithmetic."
+            return expression
+
+        executor = SubagentExecutor(config=base_config, tools=[tag_mcp_tool(mcp_calc)], thread_id="test-thread")
+
+        state, final_tools, deferred_setup = await executor._build_initial_state("Do the task")
+
+        assert "tool_search" not in [t.name for t in final_tools]
+        assert deferred_setup.deferred_names == frozenset()
+        assert "<available-deferred-tools>" not in state["messages"][0].content
+
+    @pytest.mark.anyio
+    async def test_build_initial_state_deferral_respects_tool_policy_and_tool_search_is_infra(
+        self,
+        classes,
+        monkeypatch: pytest.MonkeyPatch,
+    ):
+        """Adversarial-review follow-up (#3341): tool_search is appended AFTER the
+        subagent tool-policy filter, mirroring the lead's intentional decision
+        (test_tool_search_appended_after_policy_but_never_exposes_denied_tool).
+        Lock the safe-by-construction property:
+
+        - an MCP tool denied by ``disallowed_tools`` never enters the deferred
+          catalog, so tool_search can never promote/expose it;
+        - tool_search itself is infrastructure: naming it in ``disallowed_tools``
+          does not remove it, because its catalog derives from the already-
+          filtered list and carries no access the policy didn't already grant.
+        """
+        from langchain_core.tools import tool as as_tool
+
+        from deerflow.subagents import executor as executor_module
+        from deerflow.tools.mcp_metadata import tag_mcp_tool
+
+        SubagentConfig = classes["SubagentConfig"]
+        SubagentExecutor = classes["SubagentExecutor"]
+
+        monkeypatch.setattr(
+            sys.modules["deerflow.skills.storage"],
+            "get_or_new_skill_storage",
+            lambda *, app_config=None: SimpleNamespace(load_skills=lambda *, enabled_only: []),
+        )
+        monkeypatch.setattr(executor_module, "get_app_config", lambda: SimpleNamespace(tool_search=SimpleNamespace(enabled=True)))
+
+        @as_tool
+        def active_tool(x: str) -> str:
+            "active"
+            return x
+
+        @as_tool
+        def mcp_allowed(x: str) -> str:
+            "allowed mcp tool"
+            return x
+
+        @as_tool
+        def mcp_denied(x: str) -> str:
+            "denied mcp tool"
+            return x
+
+        config = SubagentConfig(
+            name="test-agent",
+            description="Test agent",
+            system_prompt="You are a test agent.",
+            max_turns=10,
+            timeout_seconds=60,
+            disallowed_tools=["mcp_denied", "tool_search"],
+        )
+        executor = SubagentExecutor(
+            config=config,
+            tools=[active_tool, tag_mcp_tool(mcp_allowed), tag_mcp_tool(mcp_denied)],
+            thread_id="test-thread",
+        )
+
+        _state, final_tools, deferred_setup = await executor._build_initial_state("Do the task")
+
+        names = {t.name for t in final_tools}
+        # The policy-denied MCP tool is gone and never reaches the catalog.
+        assert "mcp_denied" not in names
+        assert "mcp_denied" not in deferred_setup.deferred_names
+        assert deferred_setup.deferred_names == frozenset({"mcp_allowed"})
+        # tool_search is infra: present despite being named in disallowed_tools.
+        assert "tool_search" in names
+
+    def test_create_agent_threads_deferred_setup_to_middlewares(
+        self,
+        classes,
+        base_config,
+        monkeypatch: pytest.MonkeyPatch,
+    ):
+        """A deferred setup passed to _create_agent flows into the subagent
+        middleware factory (so DeferredToolFilterMiddleware can attach)."""
+        from deerflow.subagents import executor as executor_module
+        from deerflow.tools.builtins.tool_search import DeferredToolSetup
+
+        SubagentExecutor = classes["SubagentExecutor"]
+        app_config = SimpleNamespace(models=[SimpleNamespace(name="default-model")])
+        captured: dict[str, object] = {}
+
+        def fake_build_subagent_runtime_middlewares(**kwargs):
+            captured["middlewares"] = kwargs
+            return [object()]
+
+        monkeypatch.setattr(executor_module, "create_chat_model", lambda **kwargs: object())
+        monkeypatch.setattr(executor_module, "create_agent", lambda **kwargs: object())
+        monkeypatch.setitem(
+            sys.modules,
+            "deerflow.agents.middlewares.tool_error_handling_middleware",
+            _module(
+                "deerflow.agents.middlewares.tool_error_handling_middleware",
+                build_subagent_runtime_middlewares=fake_build_subagent_runtime_middlewares,
+            ),
+        )
+
+        deferred_setup = DeferredToolSetup(object(), frozenset({"mcp_calc"}), "hash123")
+        executor = SubagentExecutor(config=base_config, tools=[], app_config=app_config, parent_model="parent-model")
+
+        executor._create_agent(tools=[], deferred_setup=deferred_setup)
+
+        assert captured["middlewares"]["deferred_setup"] is deferred_setup
+

 # -----------------------------------------------------------------------------
 # Async Execution Path Tests
@@ -692,7 +904,7 @@ class TestAsyncExecutionPath:
        if system_messages:
            assert initial_messages[0] is system_messages[0], "SystemMessage must be the first message in the conversation"
            # The consolidated SystemMessage must carry both the system_prompt
-            # and all skill content — nothing should be split across two messages.
+            # and all skill content; nothing should be split across two messages.
            assert base_config.system_prompt in system_messages[0].content
            assert "Skill instruction text" in system_messages[0].content

@@ -1128,11 +1340,9 @@ class TestThreadSafety:
    @pytest.fixture
    def executor_module(self, _setup_executor_classes):
        """Import the executor module with real classes."""
-        import importlib
+        executor = importlib.import_module("deerflow.subagents.executor")

-        from deerflow.subagents import executor
-
-        return importlib.reload(executor)
+        return _patch_default_get_app_config(importlib.reload(executor))

    def test_multiple_executors_in_parallel(self, classes, base_config, msg):
        """Test multiple executors running in parallel via thread pool."""
@@ -1254,11 +1464,9 @@ class TestCleanupBackgroundTask:
    def executor_module(self, _setup_executor_classes):
        """Import the executor module with real classes."""
        # Re-import to get the real module with cleanup_background_task
-        import importlib
+        executor = importlib.import_module("deerflow.subagents.executor")

-        from deerflow.subagents import executor
-
-        return importlib.reload(executor)
+        return _patch_default_get_app_config(importlib.reload(executor))

    def test_cleanup_removes_terminal_completed_task(self, executor_module, classes):
        """Test that cleanup removes a COMPLETED task."""
@@ -1399,11 +1607,9 @@ class TestCooperativeCancellation:
    @pytest.fixture
    def executor_module(self, _setup_executor_classes):
        """Import the executor module with real classes."""
-        import importlib
+        executor = importlib.import_module("deerflow.subagents.executor")

-        from deerflow.subagents import executor
-
-        return importlib.reload(executor)
+        return _patch_default_get_app_config(importlib.reload(executor))

    @pytest.mark.anyio
    async def test_aexecute_cancelled_before_streaming(self, classes, base_config, mock_agent, msg):
@@ -253,3 +253,45 @@ def test_subagent_runtime_middlewares_skip_view_image_for_text_model(monkeypatch
    middlewares = build_subagent_runtime_middlewares(app_config=app_config, model_name="test-model")

    assert not any(isinstance(middleware, ViewImageMiddleware) for middleware in middlewares)
+
+
+def test_subagent_runtime_middlewares_attach_deferred_filter_when_setup_has_names(monkeypatch):
+    """A subagent built with deferred MCP tools gets DeferredToolFilterMiddleware, positioned before SafetyFinishReasonMiddleware (mirrors the lead ordering)."""
+    from langchain_core.tools import tool as as_tool
+
+    from deerflow.agents.middlewares.deferred_tool_filter_middleware import DeferredToolFilterMiddleware
+    from deerflow.agents.middlewares.safety_finish_reason_middleware import SafetyFinishReasonMiddleware
+    from deerflow.tools.builtins.tool_search import build_deferred_tool_setup
+    from deerflow.tools.mcp_metadata import tag_mcp_tool
+
+    app_config = _make_app_config()
+    _stub_runtime_middleware_imports(monkeypatch)
+
+    @as_tool
+    def mcp_thing(x: str) -> str:
+        "deferred mcp tool"
+        return x
+
+    setup = build_deferred_tool_setup([tag_mcp_tool(mcp_thing)], enabled=True)
+    assert setup.deferred_names  # sanity: populated setup
+
+    middlewares = build_subagent_runtime_middlewares(app_config=app_config, deferred_setup=setup)
+
+    filters = [m for m in middlewares if isinstance(m, DeferredToolFilterMiddleware)]
+    assert len(filters) == 1
+    filter_idx = next(i for i, m in enumerate(middlewares) if isinstance(m, DeferredToolFilterMiddleware))
+    safety_idx = next(i for i, m in enumerate(middlewares) if isinstance(m, SafetyFinishReasonMiddleware))
+    assert filter_idx < safety_idx
+
+
+def test_subagent_runtime_middlewares_skip_deferred_filter_without_names(monkeypatch):
+    """No deferred setup (disabled / no MCP tool) -> no DeferredToolFilterMiddleware."""
+    from deerflow.agents.middlewares.deferred_tool_filter_middleware import DeferredToolFilterMiddleware
+    from deerflow.tools.builtins.tool_search import DeferredToolSetup
+
+    app_config = _make_app_config()
+    _stub_runtime_middleware_imports(monkeypatch)
+
+    for setup in (None, DeferredToolSetup(None, frozenset(), None)):
+        middlewares = build_subagent_runtime_middlewares(app_config=app_config, deferred_setup=setup)
+        assert not any(isinstance(m, DeferredToolFilterMiddleware) for m in middlewares)
@@ -8,8 +8,8 @@ filter middleware are covered by:
 - tests/test_thread_state_promoted.py
 """

-from deerflow.agents.lead_agent.prompt import get_deferred_tools_prompt_section
 from deerflow.config.tool_search_config import ToolSearchConfig, load_tool_search_config_from_dict
+from deerflow.tools.builtins.tool_search import get_deferred_tools_prompt_section


 class TestToolSearchConfig: