diff --git a/backend/CLAUDE.md b/backend/CLAUDE.md index b951f919c..11e255777 100644 --- a/backend/CLAUDE.md +++ b/backend/CLAUDE.md @@ -184,6 +184,8 @@ Setup: Copy `config.example.yaml` to `config.yaml` in the **project root** direc **Config Caching**: `get_app_config()` caches the parsed config, but automatically reloads it when the resolved config path changes or the file's mtime increases. This keeps Gateway and LangGraph reads aligned with `config.yaml` edits without requiring a manual process restart. +**Config Hot-Reload Boundary**: Gateway dependencies route through `get_app_config()` on every request, so per-run fields like `models[*].max_tokens`, `summarization.*`, `title.*`, `memory.*`, `subagents.*`, `tools[*]`, and the agent system prompt pick up `config.yaml` edits on the next message. Infrastructure fields that the gateway captures once at startup are **restart-required**: `database.*`, `checkpointer.*` (including SQLite WAL/journal settings), `run_events.*`, `stream_bridge.*`, `sandbox.use`, `log_level`, and the `channels.*` IM platform credentials. Changing those without restart leaves the live process bound to the old engines. + Configuration priority: 1. Explicit `config_path` argument 2. `DEER_FLOW_CONFIG_PATH` environment variable diff --git a/backend/app/gateway/deps.py b/backend/app/gateway/deps.py index 96ea7c5ea..51363014b 100644 --- a/backend/app/gateway/deps.py +++ b/backend/app/gateway/deps.py @@ -15,7 +15,7 @@ from typing import TYPE_CHECKING, TypeVar, cast from fastapi import FastAPI, HTTPException, Request from langgraph.types import Checkpointer -from deerflow.config.app_config import AppConfig +from deerflow.config.app_config import AppConfig, get_app_config from deerflow.persistence.feedback import FeedbackRepository from deerflow.runtime import RunContext, RunManager, StreamBridge from deerflow.runtime.events.store.base import RunEventStore @@ -30,12 +30,22 @@ if TYPE_CHECKING: T = TypeVar("T") -def get_config(request: Request) -> AppConfig: - """Return the app-scoped ``AppConfig`` stored on ``app.state``.""" - config = getattr(request.app.state, "config", None) - if config is None: - raise HTTPException(status_code=503, detail="Configuration not available") - return config +def get_config(request: Request) -> AppConfig: # noqa: ARG001 - kept for FastAPI Depends signature + """Return the freshest ``AppConfig`` for the current request. + + Routes through :func:`deerflow.config.app_config.get_app_config`, which + honours runtime ``ContextVar`` overrides and reloads ``config.yaml`` from + disk when its mtime changes. ``app.state.config`` is no longer consulted + on the request hot path — it is set at startup only for one-shot infra + bootstrap (logging level, IM channels, ``langgraph_runtime`` engines). + Reading from ``get_app_config`` here closes the bytedance/deer-flow + issue #3107 BUG-001 split-brain where the worker / lead-agent thread saw + a stale startup snapshot. + """ + try: + return get_app_config() + except FileNotFoundError as exc: + raise HTTPException(status_code=503, detail="Configuration not available") from exc @asynccontextmanager diff --git a/backend/tests/test_gateway_config_freshness.py b/backend/tests/test_gateway_config_freshness.py new file mode 100644 index 000000000..9dc7d05cb --- /dev/null +++ b/backend/tests/test_gateway_config_freshness.py @@ -0,0 +1,110 @@ +"""Regression tests for gateway config freshness on the request hot path. + +Bytedance/deer-flow issue #3107 BUG-001: the worker and lead-agent path +captured ``app.state.config`` at gateway startup. ``config.yaml`` edits during +runtime were therefore ignored — ``get_app_config()``'s mtime-based reload +existed but was bypassed because the snapshot object was passed through +explicitly. + +These tests pin the desired behaviour: a request-time ``get_config`` call must +observe the most recent on-disk ``config.yaml`` (mtime reload), and the +runtime ``ContextVar`` override must keep working for per-request injection. +""" + +from __future__ import annotations + +import os +from pathlib import Path + +import pytest +from fastapi import Depends, FastAPI +from fastapi.testclient import TestClient + +from app.gateway.deps import get_config +from deerflow.config.app_config import ( + AppConfig, + pop_current_app_config, + push_current_app_config, + reset_app_config, + set_app_config, +) +from deerflow.config.sandbox_config import SandboxConfig + + +@pytest.fixture(autouse=True) +def _isolate_app_config_singleton(): + """Ensure each test starts with a clean module-level cache.""" + reset_app_config() + yield + reset_app_config() + + +def _write_config_yaml(path: Path, *, log_level: str) -> None: + path.write_text( + f""" +sandbox: + use: deerflow.sandbox.local.provider:LocalSandboxProvider +log_level: {log_level} +""".strip() + + "\n", + encoding="utf-8", + ) + + +def _build_app() -> FastAPI: + app = FastAPI() + + @app.get("/probe") + def probe(cfg: AppConfig = Depends(get_config)): + return {"log_level": cfg.log_level} + + return app + + +def test_get_config_reflects_file_mtime_reload(tmp_path, monkeypatch): + """Editing config.yaml at runtime must be visible to /probe without restart. + + This is the literal repro for the issue: the gateway must not freeze the + config to whatever was on disk when the process started. + """ + config_file = tmp_path / "config.yaml" + _write_config_yaml(config_file, log_level="info") + monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_file)) + + app = _build_app() + client = TestClient(app) + assert client.get("/probe").json() == {"log_level": "info"} + + # Edit the file and bump its mtime — simulating a maintainer changing + # max_tokens / model settings in production while the gateway is live. + _write_config_yaml(config_file, log_level="debug") + future_mtime = config_file.stat().st_mtime + 5 + os.utime(config_file, (future_mtime, future_mtime)) + + assert client.get("/probe").json() == {"log_level": "debug"} + + +def test_get_config_respects_runtime_context_override(tmp_path, monkeypatch): + """Per-request ``push_current_app_config`` injection must still win.""" + config_file = tmp_path / "config.yaml" + _write_config_yaml(config_file, log_level="info") + monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_file)) + + override = AppConfig(sandbox=SandboxConfig(use="test"), log_level="trace") + push_current_app_config(override) + try: + app = _build_app() + client = TestClient(app) + assert client.get("/probe").json() == {"log_level": "trace"} + finally: + pop_current_app_config() + + +def test_get_config_respects_test_set_app_config(): + """``set_app_config`` (used by upload/skills router tests) keeps working.""" + injected = AppConfig(sandbox=SandboxConfig(use="test"), log_level="warning") + set_app_config(injected) + + app = _build_app() + client = TestClient(app) + assert client.get("/probe").json() == {"log_level": "warning"} diff --git a/backend/tests/test_gateway_deps_config.py b/backend/tests/test_gateway_deps_config.py deleted file mode 100644 index 70f9124b6..000000000 --- a/backend/tests/test_gateway_deps_config.py +++ /dev/null @@ -1,41 +0,0 @@ -from __future__ import annotations - -from fastapi import Depends, FastAPI -from fastapi.testclient import TestClient - -from app.gateway.deps import get_config -from deerflow.config.app_config import AppConfig -from deerflow.config.sandbox_config import SandboxConfig - - -def test_get_config_returns_app_state_config(): - """get_config should return the exact AppConfig stored on app.state.""" - app = FastAPI() - config = AppConfig(sandbox=SandboxConfig(use="test")) - app.state.config = config - - @app.get("/probe") - def probe(cfg: AppConfig = Depends(get_config)): - return {"same_identity": cfg is config, "log_level": cfg.log_level} - - client = TestClient(app) - response = client.get("/probe") - - assert response.status_code == 200 - assert response.json() == {"same_identity": True, "log_level": "info"} - - -def test_get_config_reads_updated_app_state(): - """Swapping app.state.config should be visible to the dependency.""" - app = FastAPI() - app.state.config = AppConfig(sandbox=SandboxConfig(use="test"), log_level="info") - - @app.get("/log-level") - def log_level(cfg: AppConfig = Depends(get_config)): - return {"level": cfg.log_level} - - client = TestClient(app) - assert client.get("/log-level").json() == {"level": "info"} - - app.state.config = app.state.config.model_copy(update={"log_level": "debug"}) - assert client.get("/log-level").json() == {"level": "debug"} diff --git a/backend/tests/test_skills_custom_router.py b/backend/tests/test_skills_custom_router.py index ed93e5510..e8a86d8ab 100644 --- a/backend/tests/test_skills_custom_router.py +++ b/backend/tests/test_skills_custom_router.py @@ -7,6 +7,7 @@ from types import SimpleNamespace from fastapi import FastAPI from fastapi.testclient import TestClient +from app.gateway.deps import get_config from app.gateway.routers import skills as skills_router from deerflow.skills.storage import get_or_new_skill_storage from deerflow.skills.types import Skill @@ -38,7 +39,8 @@ def _make_skill(name: str, *, enabled: bool) -> Skill: def _make_test_app(config) -> FastAPI: app = FastAPI() - app.state.config = config + app.state.config = config # kept for any startup-style reads + app.dependency_overrides[get_config] = lambda: config app.include_router(skills_router.router) return app diff --git a/backend/tests/test_uploads_router.py b/backend/tests/test_uploads_router.py index 7846865b8..1bcdb2eb7 100644 --- a/backend/tests/test_uploads_router.py +++ b/backend/tests/test_uploads_router.py @@ -11,6 +11,7 @@ from _router_auth_helpers import call_unwrapped, make_authed_test_app from fastapi import HTTPException, UploadFile from fastapi.testclient import TestClient +from app.gateway.deps import get_config from app.gateway.routers import uploads @@ -631,6 +632,7 @@ def test_upload_limits_endpoint_requires_thread_access(): cfg.uploads = {} app = make_authed_test_app(owner_check_passes=False) app.state.config = cfg + app.dependency_overrides[get_config] = lambda: cfg app.include_router(uploads.router) with TestClient(app) as client: