mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-23 00:16:48 +00:00
fix(gateway): broaden get_config 503 to any config-load failure
Address review feedback on the previous commit: 1. Narrow exception catch removed. The old contract returned 503 whenever `app.state.config is None`. The first cut only mapped `FileNotFoundError`, leaving `PermissionError`, YAML parse errors, and pydantic `ValidationError` to bubble up as 500. At the request boundary we treat any inability to materialise the config as "configuration not available" (503) and log the original exception so the operator still has the stack. 2. Removed the unused `request: Request` parameter and the matching `# noqa: ARG001`. FastAPI's `Depends()` does not require the dependency to accept `Request`; the only call site uses the no-arg form. 3. `backend/CLAUDE.md` boundary now lists the *reason* each field is restart-required (engine binding, singleton caching, one-shot `apply_logging_level`, etc.), not just the field name, so reviewers do not have to reverse-engineer the boundary themselves. Tests parametrise four exception classes (`FileNotFoundError`, `PermissionError`, `ValueError`, `RuntimeError`) and assert 503 for each. Refs: bytedance/deer-flow#3107 (BUG-001)
This commit is contained in:
+11
-1
@@ -184,7 +184,17 @@ Setup: Copy `config.example.yaml` to `config.yaml` in the **project root** direc
|
||||
|
||||
**Config Caching**: `get_app_config()` caches the parsed config, but automatically reloads it when the resolved config path changes or the file's mtime increases. This keeps Gateway and LangGraph reads aligned with `config.yaml` edits without requiring a manual process restart.
|
||||
|
||||
**Config Hot-Reload Boundary**: Gateway dependencies route through `get_app_config()` on every request, so per-run fields like `models[*].max_tokens`, `summarization.*`, `title.*`, `memory.*`, `subagents.*`, `tools[*]`, and the agent system prompt pick up `config.yaml` edits on the next message. Infrastructure fields that the gateway captures once at startup are **restart-required**: `database.*`, `checkpointer.*` (including SQLite WAL/journal settings), `run_events.*`, `stream_bridge.*`, `sandbox.use`, `log_level`, and the `channels.*` IM platform credentials. Changing those without restart leaves the live process bound to the old engines.
|
||||
**Config Hot-Reload Boundary**: Gateway dependencies route through `get_app_config()` on every request, so per-run fields like `models[*].max_tokens`, `summarization.*`, `title.*`, `memory.*`, `subagents.*`, `tools[*]`, and the agent system prompt pick up `config.yaml` edits on the next message. Infrastructure fields that the gateway captures once at startup are **restart-required**:
|
||||
|
||||
| Field | Why a restart is required |
|
||||
|---|---|
|
||||
| `database.*` | `init_engine_from_config()` runs once during `langgraph_runtime()` startup; the SQLAlchemy engine holds the connection pool. |
|
||||
| `checkpointer.*` (including SQLite WAL/journal settings) | `make_checkpointer()` binds the persistent checkpointer once at startup. |
|
||||
| `run_events.*` | `make_run_event_store()` selects memory- vs. SQL-backed implementation at startup. |
|
||||
| `stream_bridge.*` | `make_stream_bridge()` constructs the bridge object once. |
|
||||
| `sandbox.use` | `get_sandbox_provider()` caches the provider singleton (`_default_sandbox_provider`); a new class path takes effect only on next process start. |
|
||||
| `log_level` | `apply_logging_level()` is called only in `app.py` startup; it mutates the root logger's level, and `get_app_config()` returning a fresh `AppConfig` does not retrigger it. |
|
||||
| `channels.*` IM platform credentials | `start_channel_service()` is invoked once during startup; live channels are not rebuilt on config change. |
|
||||
|
||||
Configuration priority:
|
||||
1. Explicit `config_path` argument
|
||||
|
||||
@@ -8,6 +8,7 @@ Initialization is handled directly in ``app.py`` via :class:`AsyncExitStack`.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from collections.abc import AsyncGenerator, Callable
|
||||
from contextlib import AsyncExitStack, asynccontextmanager
|
||||
from typing import TYPE_CHECKING, TypeVar, cast
|
||||
@@ -21,6 +22,8 @@ from deerflow.runtime import RunContext, RunManager, StreamBridge
|
||||
from deerflow.runtime.events.store.base import RunEventStore
|
||||
from deerflow.runtime.runs.store.base import RunStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from app.gateway.auth.local_provider import LocalAuthProvider
|
||||
from app.gateway.auth.repositories.sqlite import SQLiteUserRepository
|
||||
@@ -30,7 +33,7 @@ if TYPE_CHECKING:
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def get_config(request: Request) -> AppConfig: # noqa: ARG001 - kept for FastAPI Depends signature
|
||||
def get_config() -> AppConfig:
|
||||
"""Return the freshest ``AppConfig`` for the current request.
|
||||
|
||||
Routes through :func:`deerflow.config.app_config.get_app_config`, which
|
||||
@@ -41,10 +44,16 @@ def get_config(request: Request) -> AppConfig: # noqa: ARG001 - kept for FastAP
|
||||
Reading from ``get_app_config`` here closes the bytedance/deer-flow
|
||||
issue #3107 BUG-001 split-brain where the worker / lead-agent thread saw
|
||||
a stale startup snapshot.
|
||||
|
||||
Any failure to materialise the config (missing file, permission denied,
|
||||
YAML parse error, validation error) is reported as 503 — semantically
|
||||
"the gateway cannot serve requests without a usable configuration" — and
|
||||
logged with the original exception so operators have something to debug.
|
||||
"""
|
||||
try:
|
||||
return get_app_config()
|
||||
except FileNotFoundError as exc:
|
||||
except Exception as exc: # noqa: BLE001 - request boundary: log and degrade gracefully
|
||||
logger.exception("Failed to load AppConfig at request time")
|
||||
raise HTTPException(status_code=503, detail="Configuration not available") from exc
|
||||
|
||||
|
||||
@@ -151,7 +160,7 @@ def get_run_context(request: Request) -> RunContext:
|
||||
|
||||
Returns a *base* context with infrastructure dependencies.
|
||||
"""
|
||||
config = get_config(request)
|
||||
config = get_config()
|
||||
return RunContext(
|
||||
checkpointer=get_checkpointer(request),
|
||||
store=get_store(request),
|
||||
|
||||
@@ -20,6 +20,7 @@ import pytest
|
||||
from fastapi import Depends, FastAPI
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.gateway import deps as gateway_deps
|
||||
from app.gateway.deps import get_config
|
||||
from deerflow.config.app_config import (
|
||||
AppConfig,
|
||||
@@ -108,3 +109,35 @@ def test_get_config_respects_test_set_app_config():
|
||||
app = _build_app()
|
||||
client = TestClient(app)
|
||||
assert client.get("/probe").json() == {"log_level": "warning"}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"exception",
|
||||
[
|
||||
FileNotFoundError("config.yaml not found"),
|
||||
PermissionError("config.yaml not readable"),
|
||||
ValueError("invalid config"),
|
||||
RuntimeError("yaml parse error"),
|
||||
],
|
||||
)
|
||||
def test_get_config_returns_503_on_any_load_failure(monkeypatch, exception):
|
||||
"""Any failure to materialise the config must surface as 503, not 500.
|
||||
|
||||
Bytedance/deer-flow issue #3107 BUG-001 review: the original snapshot
|
||||
contract returned 503 when ``app.state.config is None``. The first cut of
|
||||
this fix only mapped ``FileNotFoundError`` to 503, which left
|
||||
``PermissionError`` / ``yaml.YAMLError`` / ``ValidationError`` etc. bubbling
|
||||
up as 500. Catch every load failure at the request boundary.
|
||||
"""
|
||||
|
||||
def _broken_get_app_config():
|
||||
raise exception
|
||||
|
||||
monkeypatch.setattr(gateway_deps, "get_app_config", _broken_get_app_config)
|
||||
|
||||
app = _build_app()
|
||||
client = TestClient(app, raise_server_exceptions=False)
|
||||
response = client.get("/probe")
|
||||
|
||||
assert response.status_code == 503
|
||||
assert response.json() == {"detail": "Configuration not available"}
|
||||
|
||||
Reference in New Issue
Block a user