mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-24 17:06:00 +00:00
fix(gateway): route get_config through get_app_config for mtime hot reload
`get_config(request)` returned the `app.state.config` snapshot captured at startup. The worker / lead-agent path then threaded that frozen `AppConfig` through `RunContext` and `agent_factory`, so per-run fields edited in `config.yaml` (notably `max_tokens`) were ignored until the gateway process was restarted — even though `get_app_config()` already does mtime-based reload at the bottom layer. Route the request dependency through `get_app_config()` directly. Runtime `ContextVar` overrides (`push_current_app_config`) and test-injected singletons (`set_app_config`) keep working; `app.state.config` is now only read at startup for one-shot bootstrap (logging level, IM channels, `langgraph_runtime` engines). `tests/test_gateway_deps_config.py` encoded the old snapshot contract and is removed; `tests/test_gateway_config_freshness.py` replaces it with mtime, ContextVar, and `set_app_config` coverage. `test_skills_custom_router.py` and `test_uploads_router.py` now inject test configs via FastAPI `dependency_overrides[get_config]` instead of mutating `app.state.config`. Document the hot-reload boundary in `backend/CLAUDE.md` so reviewers know which fields are picked up on the next request vs. which still require a restart (`database`, `checkpointer`, `run_events`, `stream_bridge`, `sandbox.use`, `log_level`, `channels.*`). Refs: bytedance/deer-flow#3107 (BUG-001)
This commit is contained in:
@@ -184,6 +184,8 @@ Setup: Copy `config.example.yaml` to `config.yaml` in the **project root** direc
|
|||||||
|
|
||||||
**Config Caching**: `get_app_config()` caches the parsed config, but automatically reloads it when the resolved config path changes or the file's mtime increases. This keeps Gateway and LangGraph reads aligned with `config.yaml` edits without requiring a manual process restart.
|
**Config Caching**: `get_app_config()` caches the parsed config, but automatically reloads it when the resolved config path changes or the file's mtime increases. This keeps Gateway and LangGraph reads aligned with `config.yaml` edits without requiring a manual process restart.
|
||||||
|
|
||||||
|
**Config Hot-Reload Boundary**: Gateway dependencies route through `get_app_config()` on every request, so per-run fields like `models[*].max_tokens`, `summarization.*`, `title.*`, `memory.*`, `subagents.*`, `tools[*]`, and the agent system prompt pick up `config.yaml` edits on the next message. Infrastructure fields that the gateway captures once at startup are **restart-required**: `database.*`, `checkpointer.*` (including SQLite WAL/journal settings), `run_events.*`, `stream_bridge.*`, `sandbox.use`, `log_level`, and the `channels.*` IM platform credentials. Changing those without restart leaves the live process bound to the old engines.
|
||||||
|
|
||||||
Configuration priority:
|
Configuration priority:
|
||||||
1. Explicit `config_path` argument
|
1. Explicit `config_path` argument
|
||||||
2. `DEER_FLOW_CONFIG_PATH` environment variable
|
2. `DEER_FLOW_CONFIG_PATH` environment variable
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ from typing import TYPE_CHECKING, TypeVar, cast
|
|||||||
from fastapi import FastAPI, HTTPException, Request
|
from fastapi import FastAPI, HTTPException, Request
|
||||||
from langgraph.types import Checkpointer
|
from langgraph.types import Checkpointer
|
||||||
|
|
||||||
from deerflow.config.app_config import AppConfig
|
from deerflow.config.app_config import AppConfig, get_app_config
|
||||||
from deerflow.persistence.feedback import FeedbackRepository
|
from deerflow.persistence.feedback import FeedbackRepository
|
||||||
from deerflow.runtime import RunContext, RunManager, StreamBridge
|
from deerflow.runtime import RunContext, RunManager, StreamBridge
|
||||||
from deerflow.runtime.events.store.base import RunEventStore
|
from deerflow.runtime.events.store.base import RunEventStore
|
||||||
@@ -30,12 +30,22 @@ if TYPE_CHECKING:
|
|||||||
T = TypeVar("T")
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
def get_config(request: Request) -> AppConfig:
|
def get_config(request: Request) -> AppConfig: # noqa: ARG001 - kept for FastAPI Depends signature
|
||||||
"""Return the app-scoped ``AppConfig`` stored on ``app.state``."""
|
"""Return the freshest ``AppConfig`` for the current request.
|
||||||
config = getattr(request.app.state, "config", None)
|
|
||||||
if config is None:
|
Routes through :func:`deerflow.config.app_config.get_app_config`, which
|
||||||
raise HTTPException(status_code=503, detail="Configuration not available")
|
honours runtime ``ContextVar`` overrides and reloads ``config.yaml`` from
|
||||||
return config
|
disk when its mtime changes. ``app.state.config`` is no longer consulted
|
||||||
|
on the request hot path — it is set at startup only for one-shot infra
|
||||||
|
bootstrap (logging level, IM channels, ``langgraph_runtime`` engines).
|
||||||
|
Reading from ``get_app_config`` here closes the bytedance/deer-flow
|
||||||
|
issue #3107 BUG-001 split-brain where the worker / lead-agent thread saw
|
||||||
|
a stale startup snapshot.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
return get_app_config()
|
||||||
|
except FileNotFoundError as exc:
|
||||||
|
raise HTTPException(status_code=503, detail="Configuration not available") from exc
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
|
|||||||
@@ -0,0 +1,110 @@
|
|||||||
|
"""Regression tests for gateway config freshness on the request hot path.
|
||||||
|
|
||||||
|
Bytedance/deer-flow issue #3107 BUG-001: the worker and lead-agent path
|
||||||
|
captured ``app.state.config`` at gateway startup. ``config.yaml`` edits during
|
||||||
|
runtime were therefore ignored — ``get_app_config()``'s mtime-based reload
|
||||||
|
existed but was bypassed because the snapshot object was passed through
|
||||||
|
explicitly.
|
||||||
|
|
||||||
|
These tests pin the desired behaviour: a request-time ``get_config`` call must
|
||||||
|
observe the most recent on-disk ``config.yaml`` (mtime reload), and the
|
||||||
|
runtime ``ContextVar`` override must keep working for per-request injection.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from fastapi import Depends, FastAPI
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from app.gateway.deps import get_config
|
||||||
|
from deerflow.config.app_config import (
|
||||||
|
AppConfig,
|
||||||
|
pop_current_app_config,
|
||||||
|
push_current_app_config,
|
||||||
|
reset_app_config,
|
||||||
|
set_app_config,
|
||||||
|
)
|
||||||
|
from deerflow.config.sandbox_config import SandboxConfig
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _isolate_app_config_singleton():
|
||||||
|
"""Ensure each test starts with a clean module-level cache."""
|
||||||
|
reset_app_config()
|
||||||
|
yield
|
||||||
|
reset_app_config()
|
||||||
|
|
||||||
|
|
||||||
|
def _write_config_yaml(path: Path, *, log_level: str) -> None:
|
||||||
|
path.write_text(
|
||||||
|
f"""
|
||||||
|
sandbox:
|
||||||
|
use: deerflow.sandbox.local.provider:LocalSandboxProvider
|
||||||
|
log_level: {log_level}
|
||||||
|
""".strip()
|
||||||
|
+ "\n",
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_app() -> FastAPI:
|
||||||
|
app = FastAPI()
|
||||||
|
|
||||||
|
@app.get("/probe")
|
||||||
|
def probe(cfg: AppConfig = Depends(get_config)):
|
||||||
|
return {"log_level": cfg.log_level}
|
||||||
|
|
||||||
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_config_reflects_file_mtime_reload(tmp_path, monkeypatch):
|
||||||
|
"""Editing config.yaml at runtime must be visible to /probe without restart.
|
||||||
|
|
||||||
|
This is the literal repro for the issue: the gateway must not freeze the
|
||||||
|
config to whatever was on disk when the process started.
|
||||||
|
"""
|
||||||
|
config_file = tmp_path / "config.yaml"
|
||||||
|
_write_config_yaml(config_file, log_level="info")
|
||||||
|
monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_file))
|
||||||
|
|
||||||
|
app = _build_app()
|
||||||
|
client = TestClient(app)
|
||||||
|
assert client.get("/probe").json() == {"log_level": "info"}
|
||||||
|
|
||||||
|
# Edit the file and bump its mtime — simulating a maintainer changing
|
||||||
|
# max_tokens / model settings in production while the gateway is live.
|
||||||
|
_write_config_yaml(config_file, log_level="debug")
|
||||||
|
future_mtime = config_file.stat().st_mtime + 5
|
||||||
|
os.utime(config_file, (future_mtime, future_mtime))
|
||||||
|
|
||||||
|
assert client.get("/probe").json() == {"log_level": "debug"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_config_respects_runtime_context_override(tmp_path, monkeypatch):
|
||||||
|
"""Per-request ``push_current_app_config`` injection must still win."""
|
||||||
|
config_file = tmp_path / "config.yaml"
|
||||||
|
_write_config_yaml(config_file, log_level="info")
|
||||||
|
monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_file))
|
||||||
|
|
||||||
|
override = AppConfig(sandbox=SandboxConfig(use="test"), log_level="trace")
|
||||||
|
push_current_app_config(override)
|
||||||
|
try:
|
||||||
|
app = _build_app()
|
||||||
|
client = TestClient(app)
|
||||||
|
assert client.get("/probe").json() == {"log_level": "trace"}
|
||||||
|
finally:
|
||||||
|
pop_current_app_config()
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_config_respects_test_set_app_config():
|
||||||
|
"""``set_app_config`` (used by upload/skills router tests) keeps working."""
|
||||||
|
injected = AppConfig(sandbox=SandboxConfig(use="test"), log_level="warning")
|
||||||
|
set_app_config(injected)
|
||||||
|
|
||||||
|
app = _build_app()
|
||||||
|
client = TestClient(app)
|
||||||
|
assert client.get("/probe").json() == {"log_level": "warning"}
|
||||||
@@ -1,41 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
from fastapi import Depends, FastAPI
|
|
||||||
from fastapi.testclient import TestClient
|
|
||||||
|
|
||||||
from app.gateway.deps import get_config
|
|
||||||
from deerflow.config.app_config import AppConfig
|
|
||||||
from deerflow.config.sandbox_config import SandboxConfig
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_config_returns_app_state_config():
|
|
||||||
"""get_config should return the exact AppConfig stored on app.state."""
|
|
||||||
app = FastAPI()
|
|
||||||
config = AppConfig(sandbox=SandboxConfig(use="test"))
|
|
||||||
app.state.config = config
|
|
||||||
|
|
||||||
@app.get("/probe")
|
|
||||||
def probe(cfg: AppConfig = Depends(get_config)):
|
|
||||||
return {"same_identity": cfg is config, "log_level": cfg.log_level}
|
|
||||||
|
|
||||||
client = TestClient(app)
|
|
||||||
response = client.get("/probe")
|
|
||||||
|
|
||||||
assert response.status_code == 200
|
|
||||||
assert response.json() == {"same_identity": True, "log_level": "info"}
|
|
||||||
|
|
||||||
|
|
||||||
def test_get_config_reads_updated_app_state():
|
|
||||||
"""Swapping app.state.config should be visible to the dependency."""
|
|
||||||
app = FastAPI()
|
|
||||||
app.state.config = AppConfig(sandbox=SandboxConfig(use="test"), log_level="info")
|
|
||||||
|
|
||||||
@app.get("/log-level")
|
|
||||||
def log_level(cfg: AppConfig = Depends(get_config)):
|
|
||||||
return {"level": cfg.log_level}
|
|
||||||
|
|
||||||
client = TestClient(app)
|
|
||||||
assert client.get("/log-level").json() == {"level": "info"}
|
|
||||||
|
|
||||||
app.state.config = app.state.config.model_copy(update={"log_level": "debug"})
|
|
||||||
assert client.get("/log-level").json() == {"level": "debug"}
|
|
||||||
@@ -7,6 +7,7 @@ from types import SimpleNamespace
|
|||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
from fastapi.testclient import TestClient
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from app.gateway.deps import get_config
|
||||||
from app.gateway.routers import skills as skills_router
|
from app.gateway.routers import skills as skills_router
|
||||||
from deerflow.skills.storage import get_or_new_skill_storage
|
from deerflow.skills.storage import get_or_new_skill_storage
|
||||||
from deerflow.skills.types import Skill
|
from deerflow.skills.types import Skill
|
||||||
@@ -38,7 +39,8 @@ def _make_skill(name: str, *, enabled: bool) -> Skill:
|
|||||||
|
|
||||||
def _make_test_app(config) -> FastAPI:
|
def _make_test_app(config) -> FastAPI:
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
app.state.config = config
|
app.state.config = config # kept for any startup-style reads
|
||||||
|
app.dependency_overrides[get_config] = lambda: config
|
||||||
app.include_router(skills_router.router)
|
app.include_router(skills_router.router)
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ from _router_auth_helpers import call_unwrapped, make_authed_test_app
|
|||||||
from fastapi import HTTPException, UploadFile
|
from fastapi import HTTPException, UploadFile
|
||||||
from fastapi.testclient import TestClient
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from app.gateway.deps import get_config
|
||||||
from app.gateway.routers import uploads
|
from app.gateway.routers import uploads
|
||||||
|
|
||||||
|
|
||||||
@@ -631,6 +632,7 @@ def test_upload_limits_endpoint_requires_thread_access():
|
|||||||
cfg.uploads = {}
|
cfg.uploads = {}
|
||||||
app = make_authed_test_app(owner_check_passes=False)
|
app = make_authed_test_app(owner_check_passes=False)
|
||||||
app.state.config = cfg
|
app.state.config = cfg
|
||||||
|
app.dependency_overrides[get_config] = lambda: cfg
|
||||||
app.include_router(uploads.router)
|
app.include_router(uploads.router)
|
||||||
|
|
||||||
with TestClient(app) as client:
|
with TestClient(app) as client:
|
||||||
|
|||||||
Reference in New Issue
Block a user