fix(gateway): broaden get_config 503 to any config-load failure

Address review feedback on the previous commit:

1. Narrow exception catch removed. The old contract returned 503 whenever
   `app.state.config is None`. The first cut only mapped
   `FileNotFoundError`, leaving `PermissionError`, YAML parse errors, and
   pydantic `ValidationError` to bubble up as 500. At the request boundary
   we treat any inability to materialise the config as "configuration not
   available" (503) and log the original exception so the operator still
   has the stack.

2. Removed the unused `request: Request` parameter and the matching
   `# noqa: ARG001`. FastAPI's `Depends()` does not require the dependency
   to accept `Request`; the only call site uses the no-arg form.

3. `backend/CLAUDE.md` boundary now lists the *reason* each field is
   restart-required (engine binding, singleton caching, one-shot
   `apply_logging_level`, etc.), not just the field name, so reviewers do
   not have to reverse-engineer the boundary themselves.

Tests parametrise four exception classes (`FileNotFoundError`,
`PermissionError`, `ValueError`, `RuntimeError`) and assert 503 for each.

Refs: bytedance/deer-flow#3107 (BUG-001)
This commit is contained in:
fancyboi999
2026-05-21 16:25:40 +08:00
parent 1a28334b19
commit 7976bdf50c
3 changed files with 56 additions and 4 deletions
+12 -3
View File
@@ -8,6 +8,7 @@ Initialization is handled directly in ``app.py`` via :class:`AsyncExitStack`.
from __future__ import annotations
import logging
from collections.abc import AsyncGenerator, Callable
from contextlib import AsyncExitStack, asynccontextmanager
from typing import TYPE_CHECKING, TypeVar, cast
@@ -21,6 +22,8 @@ from deerflow.runtime import RunContext, RunManager, StreamBridge
from deerflow.runtime.events.store.base import RunEventStore
from deerflow.runtime.runs.store.base import RunStore
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from app.gateway.auth.local_provider import LocalAuthProvider
from app.gateway.auth.repositories.sqlite import SQLiteUserRepository
@@ -30,7 +33,7 @@ if TYPE_CHECKING:
T = TypeVar("T")
def get_config(request: Request) -> AppConfig: # noqa: ARG001 - kept for FastAPI Depends signature
def get_config() -> AppConfig:
"""Return the freshest ``AppConfig`` for the current request.
Routes through :func:`deerflow.config.app_config.get_app_config`, which
@@ -41,10 +44,16 @@ def get_config(request: Request) -> AppConfig: # noqa: ARG001 - kept for FastAP
Reading from ``get_app_config`` here closes the bytedance/deer-flow
issue #3107 BUG-001 split-brain where the worker / lead-agent thread saw
a stale startup snapshot.
Any failure to materialise the config (missing file, permission denied,
YAML parse error, validation error) is reported as 503 — semantically
"the gateway cannot serve requests without a usable configuration" — and
logged with the original exception so operators have something to debug.
"""
try:
return get_app_config()
except FileNotFoundError as exc:
except Exception as exc: # noqa: BLE001 - request boundary: log and degrade gracefully
logger.exception("Failed to load AppConfig at request time")
raise HTTPException(status_code=503, detail="Configuration not available") from exc
@@ -151,7 +160,7 @@ def get_run_context(request: Request) -> RunContext:
Returns a *base* context with infrastructure dependencies.
"""
config = get_config(request)
config = get_config()
return RunContext(
checkpointer=get_checkpointer(request),
store=get_store(request),