Files
deer-flow/backend/tests/test_gateway_config_freshness.py
T
fancyboi999 c0bc7a0648 fix(gateway): drop app.state.config snapshot and freeze run_events_config
Address @ShenAC-SAC's BUG-001 review on #3131. The previous cut still
stored an ``AppConfig`` snapshot on ``app.state.config`` for startup
bootstrap. Two follow-on hazards from that:

1. Future code touching the gateway lifespan could accidentally start
   reading ``app.state.config`` again, silently regressing the request
   hot path back to a stale snapshot.
2. ``get_run_context()`` paired a freshly-reloaded ``AppConfig`` with the
   startup-bound ``event_store`` and a *live* ``run_events_config``
   field — so an operator who edited ``run_events.backend`` mid-flight
   would have produced a run context whose ``event_store`` and
   ``run_events_config`` referred to different backends.

Clean approach (aligned with the direction in PR #3128):

- ``lifespan()`` keeps a local ``startup_config`` variable and passes it
  explicitly into ``langgraph_runtime(app, startup_config)`` and into
  ``start_channel_service``. No ``app.state.config`` attribute is set at
  any point.
- ``langgraph_runtime`` now accepts ``startup_config`` as a required
  parameter, removing the ``getattr(app.state, "config", None)`` lookup
  and the "config not initialised" runtime error.
- The matching ``run_events_config`` is frozen onto ``app.state`` next
  to ``run_event_store`` so ``get_run_context`` reads the two from the
  same startup-time source. ``app_config`` continues to be resolved
  live via ``get_app_config()``.
- ``backend/CLAUDE.md`` boundary explanation updated to spell out the
  ``startup_config`` / ``get_app_config()`` split.

New regression test ``test_run_context_app_config_reflects_yaml_edit``
exercises the worker-feeding path: it asserts that ``ctx.app_config``
follows a mid-flight ``config.yaml`` edit while
``ctx.run_events_config`` stays frozen to the startup snapshot the
event store was built from.

Refs: bytedance/deer-flow#3107 (BUG-001), bytedance/deer-flow#3131 review
2026-05-21 18:51:24 +08:00

190 lines
6.5 KiB
Python

"""Regression tests for gateway config freshness on the request hot path.
Bytedance/deer-flow issue #3107 BUG-001: the worker and lead-agent path
captured ``app.state.config`` at gateway startup. ``config.yaml`` edits during
runtime were therefore ignored — ``get_app_config()``'s mtime-based reload
existed but was bypassed because the snapshot object was passed through
explicitly.
These tests pin the desired behaviour: a request-time ``get_config`` call must
observe the most recent on-disk ``config.yaml`` (mtime reload), and the
runtime ``ContextVar`` override must keep working for per-request injection.
"""
from __future__ import annotations
import os
from pathlib import Path
import pytest
from fastapi import Depends, FastAPI
from fastapi.testclient import TestClient
from app.gateway import deps as gateway_deps
from app.gateway.deps import get_config
from deerflow.config.app_config import (
AppConfig,
pop_current_app_config,
push_current_app_config,
reset_app_config,
set_app_config,
)
from deerflow.config.sandbox_config import SandboxConfig
@pytest.fixture(autouse=True)
def _isolate_app_config_singleton():
"""Ensure each test starts with a clean module-level cache."""
reset_app_config()
yield
reset_app_config()
def _write_config_yaml(path: Path, *, log_level: str) -> None:
path.write_text(
f"""
sandbox:
use: deerflow.sandbox.local.provider:LocalSandboxProvider
log_level: {log_level}
""".strip()
+ "\n",
encoding="utf-8",
)
def _build_app() -> FastAPI:
app = FastAPI()
@app.get("/probe")
def probe(cfg: AppConfig = Depends(get_config)):
return {"log_level": cfg.log_level}
return app
def test_get_config_reflects_file_mtime_reload(tmp_path, monkeypatch):
"""Editing config.yaml at runtime must be visible to /probe without restart.
This is the literal repro for the issue: the gateway must not freeze the
config to whatever was on disk when the process started.
"""
config_file = tmp_path / "config.yaml"
_write_config_yaml(config_file, log_level="info")
monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_file))
app = _build_app()
client = TestClient(app)
assert client.get("/probe").json() == {"log_level": "info"}
# Edit the file and bump its mtime — simulating a maintainer changing
# max_tokens / model settings in production while the gateway is live.
_write_config_yaml(config_file, log_level="debug")
future_mtime = config_file.stat().st_mtime + 5
os.utime(config_file, (future_mtime, future_mtime))
assert client.get("/probe").json() == {"log_level": "debug"}
def test_get_config_respects_runtime_context_override(tmp_path, monkeypatch):
"""Per-request ``push_current_app_config`` injection must still win."""
config_file = tmp_path / "config.yaml"
_write_config_yaml(config_file, log_level="info")
monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_file))
override = AppConfig(sandbox=SandboxConfig(use="test"), log_level="trace")
push_current_app_config(override)
try:
app = _build_app()
client = TestClient(app)
assert client.get("/probe").json() == {"log_level": "trace"}
finally:
pop_current_app_config()
def test_get_config_respects_test_set_app_config():
"""``set_app_config`` (used by upload/skills router tests) keeps working."""
injected = AppConfig(sandbox=SandboxConfig(use="test"), log_level="warning")
set_app_config(injected)
app = _build_app()
client = TestClient(app)
assert client.get("/probe").json() == {"log_level": "warning"}
def test_run_context_app_config_reflects_yaml_edit(tmp_path, monkeypatch):
"""``RunContext.app_config`` must follow live `config.yaml` edits.
BUG-001 review feedback: the run-context that feeds worker / lead-agent
factories must observe the same mtime reload that `get_config()` does;
otherwise stale config slips back in through the run path even after the
request dependency is fixed.
"""
from unittest.mock import MagicMock
from app.gateway.deps import get_run_context
config_file = tmp_path / "config.yaml"
_write_config_yaml(config_file, log_level="info")
monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_file))
app = FastAPI()
# Sentinel values for the rest of the RunContext wiring — we only care
# about ``ctx.app_config`` for this assertion.
app.state.checkpointer = MagicMock()
app.state.store = MagicMock()
app.state.run_event_store = MagicMock()
app.state.run_events_config = {"frozen": "startup"}
app.state.thread_store = MagicMock()
@app.get("/run-ctx-log-level")
def probe(ctx=Depends(get_run_context)):
return {
"log_level": ctx.app_config.log_level,
"run_events_config": ctx.run_events_config,
}
client = TestClient(app)
first = client.get("/run-ctx-log-level").json()
assert first == {"log_level": "info", "run_events_config": {"frozen": "startup"}}
_write_config_yaml(config_file, log_level="debug")
future_mtime = config_file.stat().st_mtime + 5
os.utime(config_file, (future_mtime, future_mtime))
second = client.get("/run-ctx-log-level").json()
# app_config follows the edit; run_events_config stays frozen to the
# startup snapshot we wrote onto app.state above.
assert second == {"log_level": "debug", "run_events_config": {"frozen": "startup"}}
@pytest.mark.parametrize(
"exception",
[
FileNotFoundError("config.yaml not found"),
PermissionError("config.yaml not readable"),
ValueError("invalid config"),
RuntimeError("yaml parse error"),
],
)
def test_get_config_returns_503_on_any_load_failure(monkeypatch, exception):
"""Any failure to materialise the config must surface as 503, not 500.
Bytedance/deer-flow issue #3107 BUG-001 review: the original snapshot
contract returned 503 when ``app.state.config is None``. The first cut of
this fix only mapped ``FileNotFoundError`` to 503, which left
``PermissionError`` / ``yaml.YAMLError`` / ``ValidationError`` etc. bubbling
up as 500. Catch every load failure at the request boundary.
"""
def _broken_get_app_config():
raise exception
monkeypatch.setattr(gateway_deps, "get_app_config", _broken_get_app_config)
app = _build_app()
client = TestClient(app, raise_server_exceptions=False)
response = client.get("/probe")
assert response.status_code == 503
assert response.json() == {"detail": "Configuration not available"}