diff --git a/backend/CLAUDE.md b/backend/CLAUDE.md index 6cc0aebb1..caa36f579 100644 --- a/backend/CLAUDE.md +++ b/backend/CLAUDE.md @@ -223,17 +223,9 @@ Setup: Copy `config.example.yaml` to `config.yaml` in the **project root** direc **Config Caching**: `get_app_config()` caches the parsed config, but automatically reloads it when the resolved config path changes or the file's mtime increases. This keeps Gateway and LangGraph reads aligned with `config.yaml` edits without requiring a manual process restart. -**Config Hot-Reload Boundary**: Gateway dependencies route through `get_app_config()` on every request, so per-run fields like `models[*].max_tokens`, `summarization.*`, `title.*`, `memory.*`, `subagents.*`, `tools[*]`, and the agent system prompt pick up `config.yaml` edits on the next message. `AppConfig` is intentionally **not** cached on `app.state` — `lifespan()` keeps a local `startup_config` variable for one-shot bootstrap work (logging level, channels, `langgraph_runtime` engines) and passes it explicitly to `langgraph_runtime(app, startup_config)`. Infrastructure fields are **restart-required**: +**Config Hot-Reload Boundary**: Gateway dependencies route through `get_app_config()` on every request, so per-run fields like `models[*].max_tokens`, `summarization.*`, `title.*`, `memory.*`, `subagents.*`, `tools[*]`, and the agent system prompt pick up `config.yaml` edits on the next message. `AppConfig` is intentionally **not** cached on `app.state` — `lifespan()` keeps a local `startup_config` variable for one-shot bootstrap work and passes it to `langgraph_runtime(app, startup_config)`. -| Field | Why a restart is required | -|---|---| -| `database.*` | `init_engine_from_config()` runs once during `langgraph_runtime()` startup; the SQLAlchemy engine holds the connection pool. | -| `checkpointer.*` (including SQLite WAL/journal settings) | `make_checkpointer()` binds the persistent checkpointer once at startup. | -| `run_events.*` | `make_run_event_store()` selects memory- vs. SQL-backed implementation at startup. | -| `stream_bridge.*` | `make_stream_bridge()` constructs the bridge object once. | -| `sandbox.use` | `get_sandbox_provider()` caches the provider singleton (`_default_sandbox_provider`); a new class path takes effect only on next process start. | -| `log_level` | `apply_logging_level()` is called only in `app.py` startup; it mutates the root logger's level, and `get_app_config()` returning a fresh `AppConfig` does not retrigger it. | -| `channels.*` IM platform credentials | `start_channel_service()` is invoked once during startup; live channels are not rebuilt on config change. | +Infrastructure fields are **restart-required**. The authoritative list lives in `packages/harness/deerflow/config/reload_boundary.py::STARTUP_ONLY_FIELDS` and is mirrored by the standardised `"startup-only:"` prefix on the corresponding `Field(description=...)` in `AppConfig`, so IDE hover on those fields surfaces the reason inline (no need to context-switch into this table). Currently registered: `database`, `checkpointer`, `run_events`, `stream_bridge`, `sandbox`, `log_level`, `channels`. Adding a new restart-required field requires updating the registry; drift is pinned by `tests/test_reload_boundary.py`. Configuration priority: 1. Explicit `config_path` argument diff --git a/backend/app/gateway/deps.py b/backend/app/gateway/deps.py index 24af1deeb..5739d217d 100644 --- a/backend/app/gateway/deps.py +++ b/backend/app/gateway/deps.py @@ -119,6 +119,16 @@ def get_config() -> AppConfig: split-brain where the worker / lead-agent thread saw a stale startup snapshot. + Hot-reload boundary: fields backed by startup-time singletons + (engines, sandbox provider, IM channels, logging handler) require a + process restart to change at runtime. The authoritative list lives in + :mod:`deerflow.config.reload_boundary` and is mirrored by the + standardised ``"startup-only:"`` prefix on the matching + ``Field(description=...)`` in :class:`AppConfig` — IDE hover on those + fields will surface the boundary inline. See + ``backend/CLAUDE.md`` "Config Hot-Reload Boundary" for the operator + summary. + Any failure to materialise the config (missing file, permission denied, YAML parse error, validation error) is reported as 503 — semantically "the gateway cannot serve requests without a usable configuration" — and diff --git a/backend/packages/harness/deerflow/config/app_config.py b/backend/packages/harness/deerflow/config/app_config.py index 8fcc564a8..842b49d7a 100644 --- a/backend/packages/harness/deerflow/config/app_config.py +++ b/backend/packages/harness/deerflow/config/app_config.py @@ -18,6 +18,7 @@ from deerflow.config.guardrails_config import GuardrailsConfig, load_guardrails_ from deerflow.config.loop_detection_config import LoopDetectionConfig from deerflow.config.memory_config import MemoryConfig, load_memory_config_from_dict from deerflow.config.model_config import ModelConfig +from deerflow.config.reload_boundary import format_field_description from deerflow.config.run_events_config import RunEventsConfig from deerflow.config.runtime_paths import existing_project_file from deerflow.config.safety_finish_reason_config import SafetyFinishReasonConfig @@ -85,10 +86,21 @@ def apply_logging_level(name: str | None) -> None: class AppConfig(BaseModel): """Config for the DeerFlow application""" - log_level: str = Field(default="info", description="Logging level for deerflow and app modules (debug/info/warning/error); third-party libraries are not affected") + log_level: str = Field( + default="info", + description=format_field_description( + "log_level", + field_doc="Logging level for deerflow and app modules (debug/info/warning/error); third-party libraries are not affected.", + ), + ) token_usage: TokenUsageConfig = Field(default_factory=TokenUsageConfig, description="Token usage tracking configuration") models: list[ModelConfig] = Field(default_factory=list, description="Available models") - sandbox: SandboxConfig = Field(description="Sandbox configuration") + sandbox: SandboxConfig = Field( + description=format_field_description( + "sandbox", + field_doc="Sandbox provider configuration (local filesystem or Docker-based aio sandbox).", + ), + ) tools: list[ToolConfig] = Field(default_factory=list, description="Available tools") tool_groups: list[ToolGroupConfig] = Field(default_factory=list, description="Available tool groups") skills: SkillsConfig = Field(default_factory=SkillsConfig, description="Skills configuration") @@ -107,10 +119,34 @@ class AppConfig(BaseModel): loop_detection: LoopDetectionConfig = Field(default_factory=LoopDetectionConfig, description="Loop detection middleware configuration") safety_finish_reason: SafetyFinishReasonConfig = Field(default_factory=SafetyFinishReasonConfig, description="Provider safety-filter finish_reason interception middleware configuration") model_config = ConfigDict(extra="allow") - database: DatabaseConfig = Field(default_factory=DatabaseConfig, description="Unified database backend configuration") - run_events: RunEventsConfig = Field(default_factory=RunEventsConfig, description="Run event storage configuration") - checkpointer: CheckpointerConfig | None = Field(default=None, description="Checkpointer configuration") - stream_bridge: StreamBridgeConfig | None = Field(default=None, description="Stream bridge configuration") + database: DatabaseConfig = Field( + default_factory=DatabaseConfig, + description=format_field_description( + "database", + field_doc="Unified database backend for run/feedback metadata (memory, sqlite, or postgres).", + ), + ) + run_events: RunEventsConfig = Field( + default_factory=RunEventsConfig, + description=format_field_description( + "run_events", + field_doc="Run-event store backend (memory for dev, db for production queries, jsonl for lightweight single-node persistence).", + ), + ) + checkpointer: CheckpointerConfig | None = Field( + default=None, + description=format_field_description( + "checkpointer", + field_doc="LangGraph state-persistence checkpointer configuration.", + ), + ) + stream_bridge: StreamBridgeConfig | None = Field( + default=None, + description=format_field_description( + "stream_bridge", + field_doc="Stream bridge connecting agent workers to SSE endpoints.", + ), + ) @classmethod def resolve_config_path(cls, config_path: str | None = None) -> Path: diff --git a/backend/packages/harness/deerflow/config/reload_boundary.py b/backend/packages/harness/deerflow/config/reload_boundary.py new file mode 100644 index 000000000..d39502776 --- /dev/null +++ b/backend/packages/harness/deerflow/config/reload_boundary.py @@ -0,0 +1,104 @@ +"""Single source of truth for the config hot-reload boundary. + +Bytedance/deer-flow issue #3144: gateway request dependencies resolve +``AppConfig`` through ``get_app_config()`` on every request, so per-run +fields take effect on the next message without restarting the gateway. +The fields listed in this module are the **infrastructure** subset that +the gateway captures once at startup — engines, singletons, IM clients, +the logging handler — and that therefore require a process restart to +change at runtime. + +The registry covers two kinds of entries: + +- Top-level ``AppConfig`` fields (``database``, ``checkpointer``, + ``run_events``, ``stream_bridge``, ``sandbox``, ``log_level``). For + these, :func:`format_field_description` produces the standardised + ``"startup-only: ..."`` prefix that the matching Pydantic + ``Field(description=...)`` carries, so the boundary surfaces in IDE + hover next to the field itself. +- Top-level ``config.yaml`` sections that are not part of the + ``AppConfig`` schema (``channels``). These cannot be standardised at + the schema level, so the registry is their only canonical location. + +Any future "needs restart" scanner — operator tooling, lint hooks, doc +generators — should drive off this registry rather than re-parsing +prose. +""" + +from __future__ import annotations + +from collections.abc import Iterator + +#: The standardised prefix every restart-required field description starts +#: with. ``test_reload_boundary`` enforces both directions: registered +#: fields must use this prefix in the schema, and any schema field using +#: this prefix must be in the registry. +STARTUP_ONLY_PREFIX = "startup-only:" + + +#: Restart-required field paths mapped to the human-readable reason. +#: +#: The reason text is what surfaces in ``Field(description=...)``, so it +#: must explain *what* code captures the snapshot — not just that the +#: field is restart-required — so an operator changing the value knows +#: which subsystem to restart. +STARTUP_ONLY_FIELDS: dict[str, str] = { + "database": ("init_engine_from_config() runs once during langgraph_runtime() startup; the SQLAlchemy engine holds the connection pool and is not rebuilt on config.yaml edits."), + "checkpointer": ("make_checkpointer() binds the persistent checkpointer once at startup, including SQLite WAL / busy_timeout settings."), + "run_events": ("make_run_event_store() picks the memory- vs SQL-backed implementation at startup and is frozen onto app.state.run_events_config to stay paired with the underlying event store."), + "stream_bridge": ("make_stream_bridge() constructs the stream-bridge singleton once during startup."), + "sandbox": ("get_sandbox_provider() caches the provider singleton (``_default_sandbox_provider``); a different ``sandbox.use`` class path only takes effect on next process start."), + "log_level": ( + "apply_logging_level() runs only during app.py startup; it sets the deerflow/app logger levels and may lower root handler thresholds so configured messages can propagate. A freshly reloaded AppConfig does not retrigger it." + ), + # Not part of the AppConfig Pydantic schema — channel credentials are + # consumed directly by ``start_channel_service()`` once at lifespan + # startup and the live channel clients are not rebuilt on + # config.yaml edits. + "channels": ("start_channel_service() is invoked once during startup; the live IM channel clients (Feishu, Slack, Telegram, DingTalk) are not rebuilt when channels.* changes."), +} + + +def iter_startup_only_field_paths() -> Iterator[str]: + """Yield every registered restart-required field path.""" + return iter(STARTUP_ONLY_FIELDS) + + +def is_startup_only_field(field_path: str) -> bool: + """Return ``True`` when *field_path* is registered as restart-required. + + Accepts only top-level paths (``"database"``, ``"sandbox"`` etc.); + nested keys like ``"database.url"`` are not modelled here because the + boundary is per-section, not per-leaf. + """ + return field_path in STARTUP_ONLY_FIELDS + + +def format_field_description(field_path: str, *, field_doc: str | None = None) -> str: + """Build the standardised description for a registered field. + + Used inside ``AppConfig`` ``Field(description=...)`` so the hover + text in IDEs matches the registry and the drift tests can pin one + side against the other. + + Args: + field_path: A registered top-level field path (e.g. ``"log_level"``). + field_doc: Optional human-facing description for the field itself + (allowed values, semantics, etc.). When supplied, it is + appended after the ``startup-only:`` marker block separated by + a blank line so IDE hover shows both the restart-required + reason *and* the field's normal documentation. Composition + keeps the marker as the leading token machine-readable tooling + pivots on while restoring the prose that ``Field(description=)`` + used to carry before the registry took over. + + Raises: + KeyError: when *field_path* is not registered. This is deliberate + — silently returning a placeholder would let a typo bypass + the drift coverage. + """ + reason = STARTUP_ONLY_FIELDS[field_path] + header = f"{STARTUP_ONLY_PREFIX} {reason}" + if field_doc is None: + return header + return f"{header}\n\n{field_doc.strip()}" diff --git a/backend/tests/test_reload_boundary.py b/backend/tests/test_reload_boundary.py new file mode 100644 index 000000000..5610ccafb --- /dev/null +++ b/backend/tests/test_reload_boundary.py @@ -0,0 +1,140 @@ +"""Regression tests for the config reload boundary registry. + +Bytedance/deer-flow issue #3144: the hot-reload boundary is the contract +between gateway dependencies that resolve ``AppConfig`` every request and the +infrastructure that captures the snapshot once at startup. The registry in +``deerflow.config.reload_boundary`` is the machine-readable source of truth; +these tests pin the registry against the actual Pydantic schema so a future +field rename / addition / boundary change cannot silently drift. +""" + +from __future__ import annotations + +import pytest + +from deerflow.config.app_config import AppConfig +from deerflow.config.reload_boundary import ( + STARTUP_ONLY_FIELDS, + STARTUP_ONLY_PREFIX, + format_field_description, + is_startup_only_field, + iter_startup_only_field_paths, +) + + +def test_registry_has_a_reason_for_every_field(): + """Every registry entry must explain *why* the field is restart-required. + + The reason text is what surfaces in IDE hover and in the AppConfig schema + description, so an empty / placeholder value would defeat the purpose. + """ + for field_path, reason in STARTUP_ONLY_FIELDS.items(): + assert reason.strip(), f"empty reason for {field_path}" + assert len(reason) > 20, f"reason for {field_path} too short to be useful: {reason!r}" + + +def test_iter_startup_only_field_paths_matches_registry(): + """Iterator stays in sync with the registry mapping.""" + assert sorted(iter_startup_only_field_paths()) == sorted(STARTUP_ONLY_FIELDS) + + +def test_is_startup_only_field_recognises_registered_fields(): + """The membership helper accepts every registered field path.""" + for field_path in STARTUP_ONLY_FIELDS: + assert is_startup_only_field(field_path) + assert not is_startup_only_field("memory") # hot-reloadable + assert not is_startup_only_field("models") + assert not is_startup_only_field("nonexistent_field") + + +def test_format_field_description_prefixes_with_marker(): + """The formatter produces a description that machine-readable tooling can + pivot on (drift tests, future "needs-restart" scanners).""" + for field_path in STARTUP_ONLY_FIELDS: + text = format_field_description(field_path) + assert text.startswith(STARTUP_ONLY_PREFIX), text + # The reason is appended after the prefix; the formatter must not + # silently drop it. + assert STARTUP_ONLY_FIELDS[field_path] in text + + +def test_format_field_description_rejects_unknown_field(): + with pytest.raises(KeyError): + format_field_description("not_in_registry") + + +def test_format_field_description_appends_optional_field_doc(): + """The formatter composes the startup-only marker with the field's own + human-facing description when supplied. + + The original ``Field(description=)`` used to document allowed values + (e.g. ``log_level`` listed ``debug/info/warning/error``); registry + adoption must not drop that. The composed output keeps the marker as + the leading token so machine-readable tooling still pivots on it, + then appends the prose after a blank line. + """ + text = format_field_description("log_level", field_doc="Logging level (debug/info/warning/error).") + assert text.startswith(STARTUP_ONLY_PREFIX) + assert STARTUP_ONLY_FIELDS["log_level"] in text + assert "debug/info/warning/error" in text + + +def test_appconfig_descriptions_retain_original_field_documentation(): + """``AppConfig.model_fields[name].description`` for restart-required + fields should still carry the original human-facing field doc so IDE + hover documents what the field is *and* why a restart is needed.""" + descriptions = { + "log_level": "debug/info/warning/error", + "database": "memory, sqlite, or postgres", + "sandbox": "Sandbox provider", + "run_events": "memory for dev", + "checkpointer": "state-persistence checkpointer", + "stream_bridge": "Stream bridge", + } + for field_name, expected_substring in descriptions.items(): + description = AppConfig.model_fields[field_name].description or "" + assert description.startswith(STARTUP_ONLY_PREFIX), f"AppConfig.{field_name} missing startup-only marker" + assert expected_substring in description, f"AppConfig.{field_name} description lost original field doc; got {description!r}" + + +def test_appconfig_schema_marks_registered_fields_with_prefix(): + """Every registry entry that corresponds to a top-level AppConfig field + must carry the standardized ``startup-only:`` prefix in its Pydantic + ``Field(description=...)``. This is the contract IDE hover relies on. + """ + schema_fields = AppConfig.model_fields + for field_path in STARTUP_ONLY_FIELDS: + if field_path not in schema_fields: + # Some entries (e.g. ``channels``) live outside the AppConfig + # schema. The registry still owns them, but the schema-prefix + # assertion does not apply. + continue + description = schema_fields[field_path].description or "" + assert description.startswith(STARTUP_ONLY_PREFIX), f"AppConfig.{field_path} should have Field(description=) starting with {STARTUP_ONLY_PREFIX!r}, got {description!r}" + + +def test_no_appconfig_field_uses_prefix_without_registration(): + """Reverse drift check: if a future schema edit adds the + ``startup-only:`` prefix to a new field, the registry must list it. + + This catches the silent-drift case where someone marks a field + restart-required in the schema but forgets to update the registry + that the operator-facing scanners and docs consume. + """ + for name, info in AppConfig.model_fields.items(): + description = info.description or "" + if not description.startswith(STARTUP_ONLY_PREFIX): + continue + assert name in STARTUP_ONLY_FIELDS, f"AppConfig.{name} schema description starts with {STARTUP_ONLY_PREFIX!r} but the field is not listed in reload_boundary.STARTUP_ONLY_FIELDS — update the registry." + + +def test_pydantic_field_descriptions_are_introspectable_at_runtime(): + """``AppConfig.model_fields[name].description`` is the IDE-hover source. + + If this read ever breaks (e.g. Pydantic deprecation, schema swap), the + IDE-hover guarantee #3144 promises silently regresses. Pin it. + """ + assert "database" in AppConfig.model_fields + description = AppConfig.model_fields["database"].description + assert description is not None + assert description.startswith(STARTUP_ONLY_PREFIX)