refactor: thread app_config through middleware factories (#2652)

* refactor: thread app_config through middleware factories

Continues the incremental config-refactor sequence (#2611 root, #2612 lead
path) one layer deeper into the middleware factories. Two ambient lookups
inside _build_runtime_middlewares are eliminated and the LLMErrorHandling
band-aid removed:

- _build_runtime_middlewares / build_lead_runtime_middlewares /
  build_subagent_runtime_middlewares now require app_config: AppConfig.
- get_guardrails_config() inside the factory is replaced with
  app_config.guardrails (semantically identical — same default-factory
  GuardrailsConfig — verified by direct equality check).
- LLMErrorHandlingMiddleware.__init__ now requires app_config and reads
  circuit_breaker fields directly. The class-level
  circuit_failure_threshold / circuit_recovery_timeout_sec defaults are
  removed along with the try/except (FileNotFoundError, RuntimeError):
  pass band-aid — the let-it-crash invariant the rest of the refactor
  enforces.

Caller chain (already-resolved app_config sources):
- _build_middlewares in lead_agent/agent.py: reorder so
  resolved_app_config = app_config or get_app_config() is computed BEFORE
  build_lead_runtime_middlewares is called, then passed as kwarg.
- SubagentExecutor: optional app_config parameter (mirrors the lead-agent
  pattern); _create_agent does the same `or get_app_config()` fallback at
  agent-build time, so task_tool callers don't need to plumb app_config
  through yet (typed-context plumbing for tool runtimes is a separate
  refactor).

Tests:
- test_llm_error_handling_middleware: _make_app_config helper using
  AppConfig(sandbox=SandboxConfig(use="test")) — same minimal-config
  pattern conftest already uses. Three direct LLMErrorHandlingMiddleware()
  calls each followed by post-construction circuit_breaker mutation fold
  cleanly into _build_middleware(circuit_failure_threshold=...,
  circuit_recovery_timeout_sec=...).

Verification:
- tests/test_llm_error_handling_middleware.py — 14 passed
- tests/test_subagent_executor.py — 28 passed
- tests/test_tool_error_handling_middleware.py — 6 passed
- tests/test_task_tool_core_logic.py — 18 passed (verifies task_tool
  unchanged behavior)
- Full suite: 2697 passed, 3 skipped. The single intermittent failure in
  tests/test_client_e2e.py::test_tool_call_produces_events is pre-existing
  LLM flakiness (the test asserts the model decided to call a tool;
  reproduces 1/3 on unchanged main as well).

* fix: address middleware app config review comments

* fix: satisfy app config annotation lint

* test: cover explicit app config middleware wiring

---------

Co-authored-by: greatmengqi <chenmengqi.0376@bytedance.com>
This commit is contained in:
greatmengqi
2026-04-30 12:41:09 +08:00
committed by GitHub
parent 74081a85a6
commit 38714b6ceb
8 changed files with 236 additions and 34 deletions
@@ -259,8 +259,8 @@ def _build_middlewares(
Returns:
List of middleware instances.
"""
middlewares = build_lead_runtime_middlewares(lazy_init=True)
resolved_app_config = app_config or get_app_config()
middlewares = build_lead_runtime_middlewares(app_config=resolved_app_config, lazy_init=True)
# Add summarization middleware if enabled
summarization_middleware = _create_summarization_middleware(app_config=resolved_app_config)
@@ -20,7 +20,7 @@ from langchain.agents.middleware.types import (
from langchain_core.messages import AIMessage
from langgraph.errors import GraphBubbleUp
from deerflow.config import get_app_config
from deerflow.config.app_config import AppConfig
logger = logging.getLogger(__name__)
@@ -70,20 +70,11 @@ class LLMErrorHandlingMiddleware(AgentMiddleware[AgentState]):
retry_base_delay_ms: int = 1000
retry_cap_delay_ms: int = 8000
circuit_failure_threshold: int = 5
circuit_recovery_timeout_sec: int = 60
def __init__(self, **kwargs: Any) -> None:
def __init__(self, *, app_config: AppConfig, **kwargs: Any) -> None:
super().__init__(**kwargs)
# Load Circuit Breaker configs from app config if available, fall back to defaults
try:
app_config = get_app_config()
self.circuit_failure_threshold = app_config.circuit_breaker.failure_threshold
self.circuit_recovery_timeout_sec = app_config.circuit_breaker.recovery_timeout_sec
except (FileNotFoundError, RuntimeError):
# Gracefully fall back to class defaults in test environments
pass
self.circuit_failure_threshold = app_config.circuit_breaker.failure_threshold
self.circuit_recovery_timeout_sec = app_config.circuit_breaker.recovery_timeout_sec
# Circuit Breaker state
self._circuit_lock = threading.Lock()
@@ -11,6 +11,8 @@ from langgraph.errors import GraphBubbleUp
from langgraph.prebuilt.tool_node import ToolCallRequest
from langgraph.types import Command
from deerflow.config.app_config import AppConfig
logger = logging.getLogger(__name__)
_MISSING_TOOL_CALL_ID = "missing_tool_call_id"
@@ -67,6 +69,7 @@ class ToolErrorHandlingMiddleware(AgentMiddleware[AgentState]):
def _build_runtime_middlewares(
*,
app_config: AppConfig,
include_uploads: bool,
include_dangling_tool_call_patch: bool,
lazy_init: bool = True,
@@ -91,12 +94,10 @@ def _build_runtime_middlewares(
middlewares.append(DanglingToolCallMiddleware())
middlewares.append(LLMErrorHandlingMiddleware())
middlewares.append(LLMErrorHandlingMiddleware(app_config=app_config))
# Guardrail middleware (if configured)
from deerflow.config.guardrails_config import get_guardrails_config
guardrails_config = get_guardrails_config()
guardrails_config = app_config.guardrails
if guardrails_config.enabled and guardrails_config.provider:
import inspect
@@ -125,18 +126,20 @@ def _build_runtime_middlewares(
return middlewares
def build_lead_runtime_middlewares(*, lazy_init: bool = True) -> list[AgentMiddleware]:
def build_lead_runtime_middlewares(*, app_config: AppConfig, lazy_init: bool = True) -> list[AgentMiddleware]:
"""Middlewares shared by lead agent runtime before lead-only middlewares."""
return _build_runtime_middlewares(
app_config=app_config,
include_uploads=True,
include_dangling_tool_call_patch=True,
lazy_init=lazy_init,
)
def build_subagent_runtime_middlewares(*, lazy_init: bool = True) -> list[AgentMiddleware]:
def build_subagent_runtime_middlewares(*, app_config: AppConfig, lazy_init: bool = True) -> list[AgentMiddleware]:
"""Middlewares shared by subagent runtime before subagent-only middlewares."""
return _build_runtime_middlewares(
app_config=app_config,
include_uploads=False,
include_dangling_tool_call_patch=True,
lazy_init=lazy_init,
@@ -17,6 +17,7 @@ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_core.runnables import RunnableConfig
from deerflow.agents.thread_state import SandboxState, ThreadDataState, ThreadState
from deerflow.config.app_config import AppConfig
from deerflow.models import create_chat_model
from deerflow.subagents.config import SubagentConfig
@@ -132,6 +133,7 @@ class SubagentExecutor:
self,
config: SubagentConfig,
tools: list[BaseTool],
app_config: AppConfig | None = None,
parent_model: str | None = None,
sandbox_state: SandboxState | None = None,
thread_data: ThreadDataState | None = None,
@@ -143,6 +145,9 @@ class SubagentExecutor:
Args:
config: Subagent configuration.
tools: List of all available tools (will be filtered).
app_config: Resolved AppConfig; threaded into middleware factories
at agent-build time. When None, ``_create_agent`` falls back to
``get_app_config()`` (matches the lead-agent factory's pattern).
parent_model: The parent agent's model name for inheritance.
sandbox_state: Sandbox state from parent agent.
thread_data: Thread data from parent agent.
@@ -150,6 +155,7 @@ class SubagentExecutor:
trace_id: Trace ID from parent for distributed tracing.
"""
self.config = config
self.app_config = app_config
self.parent_model = parent_model
self.sandbox_state = sandbox_state
self.thread_data = thread_data
@@ -168,13 +174,17 @@ class SubagentExecutor:
def _create_agent(self):
"""Create the agent instance."""
# Mirror lead-agent factory pattern: prefer explicit app_config,
# fall back to ambient lookup at agent-build time.
from deerflow.config import get_app_config
resolved_app_config = self.app_config or get_app_config()
model_name = _get_model_name(self.config, self.parent_model)
model = create_chat_model(name=model_name, thinking_enabled=False)
model = create_chat_model(name=model_name, thinking_enabled=False, app_config=resolved_app_config)
from deerflow.agents.middlewares.tool_error_handling_middleware import build_subagent_runtime_middlewares
# Reuse shared middleware composition with lead agent.
middlewares = build_subagent_runtime_middlewares(lazy_init=True)
middlewares = build_subagent_runtime_middlewares(app_config=resolved_app_config, lazy_init=True)
return create_agent(
model=model,