mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-23 00:16:48 +00:00
Merge branch 'main' into rayhpeng/persistence-scaffold
# Conflicts: # .env.example # backend/packages/harness/deerflow/agents/middlewares/title_middleware.py
This commit is contained in:
@@ -3,7 +3,13 @@ from .extensions_config import ExtensionsConfig, get_extensions_config
|
||||
from .memory_config import MemoryConfig, get_memory_config
|
||||
from .paths import Paths, get_paths
|
||||
from .skills_config import SkillsConfig
|
||||
from .tracing_config import get_tracing_config, is_tracing_enabled
|
||||
from .tracing_config import (
|
||||
get_enabled_tracing_providers,
|
||||
get_explicitly_enabled_tracing_providers,
|
||||
get_tracing_config,
|
||||
is_tracing_enabled,
|
||||
validate_enabled_tracing_providers,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"get_app_config",
|
||||
@@ -15,5 +21,8 @@ __all__ = [
|
||||
"MemoryConfig",
|
||||
"get_memory_config",
|
||||
"get_tracing_config",
|
||||
"get_explicitly_enabled_tracing_providers",
|
||||
"get_enabled_tracing_providers",
|
||||
"is_tracing_enabled",
|
||||
"validate_enabled_tracing_providers",
|
||||
]
|
||||
|
||||
@@ -22,6 +22,11 @@ class AgentConfig(BaseModel):
|
||||
description: str = ""
|
||||
model: str | None = None
|
||||
tool_groups: list[str] | None = None
|
||||
# skills controls which skills are loaded into the agent's prompt:
|
||||
# - None (or omitted): load all enabled skills (default fallback behavior)
|
||||
# - [] (explicit empty list): disable all skills
|
||||
# - ["skill1", "skill2"]: load only the specified skills
|
||||
skills: list[str] | None = None
|
||||
|
||||
|
||||
def load_agent_config(name: str | None) -> AgentConfig | None:
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
import os
|
||||
from contextvars import ContextVar
|
||||
from pathlib import Path
|
||||
from typing import Any, Self
|
||||
|
||||
@@ -11,16 +12,16 @@ from deerflow.config.acp_config import load_acp_config_from_dict
|
||||
from deerflow.config.checkpointer_config import CheckpointerConfig, load_checkpointer_config_from_dict
|
||||
from deerflow.config.database_config import DatabaseConfig
|
||||
from deerflow.config.extensions_config import ExtensionsConfig
|
||||
from deerflow.config.guardrails_config import load_guardrails_config_from_dict
|
||||
from deerflow.config.memory_config import load_memory_config_from_dict
|
||||
from deerflow.config.guardrails_config import GuardrailsConfig, load_guardrails_config_from_dict
|
||||
from deerflow.config.memory_config import MemoryConfig, load_memory_config_from_dict
|
||||
from deerflow.config.model_config import ModelConfig
|
||||
from deerflow.config.run_events_config import RunEventsConfig
|
||||
from deerflow.config.sandbox_config import SandboxConfig
|
||||
from deerflow.config.skills_config import SkillsConfig
|
||||
from deerflow.config.stream_bridge_config import StreamBridgeConfig, load_stream_bridge_config_from_dict
|
||||
from deerflow.config.subagents_config import load_subagents_config_from_dict
|
||||
from deerflow.config.summarization_config import load_summarization_config_from_dict
|
||||
from deerflow.config.title_config import load_title_config_from_dict
|
||||
from deerflow.config.subagents_config import SubagentsAppConfig, load_subagents_config_from_dict
|
||||
from deerflow.config.summarization_config import SummarizationConfig, load_summarization_config_from_dict
|
||||
from deerflow.config.title_config import TitleConfig, load_title_config_from_dict
|
||||
from deerflow.config.token_usage_config import TokenUsageConfig
|
||||
from deerflow.config.tool_config import ToolConfig, ToolGroupConfig
|
||||
from deerflow.config.tool_search_config import ToolSearchConfig, load_tool_search_config_from_dict
|
||||
@@ -30,6 +31,13 @@ load_dotenv()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _default_config_candidates() -> tuple[Path, ...]:
|
||||
"""Return deterministic config.yaml locations without relying on cwd."""
|
||||
backend_dir = Path(__file__).resolve().parents[4]
|
||||
repo_root = backend_dir.parent
|
||||
return (backend_dir / "config.yaml", repo_root / "config.yaml")
|
||||
|
||||
|
||||
class AppConfig(BaseModel):
|
||||
"""Config for the DeerFlow application"""
|
||||
|
||||
@@ -42,6 +50,11 @@ class AppConfig(BaseModel):
|
||||
skills: SkillsConfig = Field(default_factory=SkillsConfig, description="Skills configuration")
|
||||
extensions: ExtensionsConfig = Field(default_factory=ExtensionsConfig, description="Extensions configuration (MCP servers and skills state)")
|
||||
tool_search: ToolSearchConfig = Field(default_factory=ToolSearchConfig, description="Tool search / deferred loading configuration")
|
||||
title: TitleConfig = Field(default_factory=TitleConfig, description="Automatic title generation configuration")
|
||||
summarization: SummarizationConfig = Field(default_factory=SummarizationConfig, description="Conversation summarization configuration")
|
||||
memory: MemoryConfig = Field(default_factory=MemoryConfig, description="Memory subsystem configuration")
|
||||
subagents: SubagentsAppConfig = Field(default_factory=SubagentsAppConfig, description="Subagent runtime configuration")
|
||||
guardrails: GuardrailsConfig = Field(default_factory=GuardrailsConfig, description="Guardrail middleware configuration")
|
||||
model_config = ConfigDict(extra="allow", frozen=False)
|
||||
database: DatabaseConfig = Field(default_factory=DatabaseConfig, description="Unified database backend configuration")
|
||||
run_events: RunEventsConfig = Field(default_factory=RunEventsConfig, description="Run event storage configuration")
|
||||
@@ -55,7 +68,7 @@ class AppConfig(BaseModel):
|
||||
Priority:
|
||||
1. If provided `config_path` argument, use it.
|
||||
2. If provided `DEER_FLOW_CONFIG_PATH` environment variable, use it.
|
||||
3. Otherwise, first check the `config.yaml` in the current directory, then fallback to `config.yaml` in the parent directory.
|
||||
3. Otherwise, search deterministic backend/repository-root defaults from `_default_config_candidates()`.
|
||||
"""
|
||||
if config_path:
|
||||
path = Path(config_path)
|
||||
@@ -68,14 +81,10 @@ class AppConfig(BaseModel):
|
||||
raise FileNotFoundError(f"Config file specified by environment variable `DEER_FLOW_CONFIG_PATH` not found at {path}")
|
||||
return path
|
||||
else:
|
||||
# Check if the config.yaml is in the current directory
|
||||
path = Path(os.getcwd()) / "config.yaml"
|
||||
if not path.exists():
|
||||
# Check if the config.yaml is in the parent directory of CWD
|
||||
path = Path(os.getcwd()).parent / "config.yaml"
|
||||
if not path.exists():
|
||||
raise FileNotFoundError("`config.yaml` file not found at the current directory nor its parent directory")
|
||||
return path
|
||||
for path in _default_config_candidates():
|
||||
if path.exists():
|
||||
return path
|
||||
raise FileNotFoundError("`config.yaml` file not found at the default backend or repository root locations")
|
||||
|
||||
@classmethod
|
||||
def from_file(cls, config_path: str | None = None) -> Self:
|
||||
@@ -248,6 +257,8 @@ _app_config: AppConfig | None = None
|
||||
_app_config_path: Path | None = None
|
||||
_app_config_mtime: float | None = None
|
||||
_app_config_is_custom = False
|
||||
_current_app_config: ContextVar[AppConfig | None] = ContextVar("deerflow_current_app_config", default=None)
|
||||
_current_app_config_stack: ContextVar[tuple[AppConfig | None, ...]] = ContextVar("deerflow_current_app_config_stack", default=())
|
||||
|
||||
|
||||
def _get_config_mtime(config_path: Path) -> float | None:
|
||||
@@ -280,6 +291,10 @@ def get_app_config() -> AppConfig:
|
||||
"""
|
||||
global _app_config, _app_config_path, _app_config_mtime
|
||||
|
||||
runtime_override = _current_app_config.get()
|
||||
if runtime_override is not None:
|
||||
return runtime_override
|
||||
|
||||
if _app_config is not None and _app_config_is_custom:
|
||||
return _app_config
|
||||
|
||||
@@ -341,3 +356,26 @@ def set_app_config(config: AppConfig) -> None:
|
||||
_app_config_path = None
|
||||
_app_config_mtime = None
|
||||
_app_config_is_custom = True
|
||||
|
||||
|
||||
def peek_current_app_config() -> AppConfig | None:
|
||||
"""Return the runtime-scoped AppConfig override, if one is active."""
|
||||
return _current_app_config.get()
|
||||
|
||||
|
||||
def push_current_app_config(config: AppConfig) -> None:
|
||||
"""Push a runtime-scoped AppConfig override for the current execution context."""
|
||||
stack = _current_app_config_stack.get()
|
||||
_current_app_config_stack.set(stack + (_current_app_config.get(),))
|
||||
_current_app_config.set(config)
|
||||
|
||||
|
||||
def pop_current_app_config() -> None:
|
||||
"""Pop the latest runtime-scoped AppConfig override for the current execution context."""
|
||||
stack = _current_app_config_stack.get()
|
||||
if not stack:
|
||||
_current_app_config.set(None)
|
||||
return
|
||||
previous = stack[-1]
|
||||
_current_app_config_stack.set(stack[:-1])
|
||||
_current_app_config.set(previous)
|
||||
|
||||
@@ -80,6 +80,12 @@ class ExtensionsConfig(BaseModel):
|
||||
Args:
|
||||
config_path: Optional path to extensions config file.
|
||||
|
||||
Resolution order:
|
||||
1. If provided `config_path` argument, use it.
|
||||
2. If provided `DEER_FLOW_EXTENSIONS_CONFIG_PATH` environment variable, use it.
|
||||
3. Otherwise, search backend/repository-root defaults for
|
||||
`extensions_config.json`, then legacy `mcp_config.json`.
|
||||
|
||||
Returns:
|
||||
Path to the extensions config file if found, otherwise None.
|
||||
"""
|
||||
@@ -94,24 +100,16 @@ class ExtensionsConfig(BaseModel):
|
||||
raise FileNotFoundError(f"Extensions config file specified by environment variable `DEER_FLOW_EXTENSIONS_CONFIG_PATH` not found at {path}")
|
||||
return path
|
||||
else:
|
||||
# Check if the extensions_config.json is in the current directory
|
||||
path = Path(os.getcwd()) / "extensions_config.json"
|
||||
if path.exists():
|
||||
return path
|
||||
|
||||
# Check if the extensions_config.json is in the parent directory of CWD
|
||||
path = Path(os.getcwd()).parent / "extensions_config.json"
|
||||
if path.exists():
|
||||
return path
|
||||
|
||||
# Backward compatibility: check for mcp_config.json
|
||||
path = Path(os.getcwd()) / "mcp_config.json"
|
||||
if path.exists():
|
||||
return path
|
||||
|
||||
path = Path(os.getcwd()).parent / "mcp_config.json"
|
||||
if path.exists():
|
||||
return path
|
||||
backend_dir = Path(__file__).resolve().parents[4]
|
||||
repo_root = backend_dir.parent
|
||||
for path in (
|
||||
backend_dir / "extensions_config.json",
|
||||
repo_root / "extensions_config.json",
|
||||
backend_dir / "mcp_config.json",
|
||||
repo_root / "mcp_config.json",
|
||||
):
|
||||
if path.exists():
|
||||
return path
|
||||
|
||||
# Extensions are optional, so return None if not found
|
||||
return None
|
||||
|
||||
@@ -9,6 +9,12 @@ VIRTUAL_PATH_PREFIX = "/mnt/user-data"
|
||||
_SAFE_THREAD_ID_RE = re.compile(r"^[A-Za-z0-9_\-]+$")
|
||||
|
||||
|
||||
def _default_local_base_dir() -> Path:
|
||||
"""Return the repo-local DeerFlow state directory without relying on cwd."""
|
||||
backend_dir = Path(__file__).resolve().parents[4]
|
||||
return backend_dir / ".deer-flow"
|
||||
|
||||
|
||||
def _validate_thread_id(thread_id: str) -> str:
|
||||
"""Validate a thread ID before using it in filesystem paths."""
|
||||
if not _SAFE_THREAD_ID_RE.match(thread_id):
|
||||
@@ -67,8 +73,7 @@ class Paths:
|
||||
BaseDir resolution (in priority order):
|
||||
1. Constructor argument `base_dir`
|
||||
2. DEER_FLOW_HOME environment variable
|
||||
3. Local dev fallback: cwd/.deer-flow (when cwd is the backend/ dir)
|
||||
4. Default: $HOME/.deer-flow
|
||||
3. Repo-local fallback derived from this module path: `{backend_dir}/.deer-flow`
|
||||
"""
|
||||
|
||||
def __init__(self, base_dir: str | Path | None = None) -> None:
|
||||
@@ -104,11 +109,7 @@ class Paths:
|
||||
if env_home := os.getenv("DEER_FLOW_HOME"):
|
||||
return Path(env_home).resolve()
|
||||
|
||||
cwd = Path.cwd()
|
||||
if cwd.name == "backend" or (cwd / "pyproject.toml").exists():
|
||||
return cwd / ".deer-flow"
|
||||
|
||||
return Path.home() / ".deer-flow"
|
||||
return _default_local_base_dir()
|
||||
|
||||
@property
|
||||
def memory_file(self) -> Path:
|
||||
|
||||
@@ -64,4 +64,15 @@ class SandboxConfig(BaseModel):
|
||||
description="Environment variables to inject into the sandbox container. Values starting with $ will be resolved from host environment variables.",
|
||||
)
|
||||
|
||||
bash_output_max_chars: int = Field(
|
||||
default=20000,
|
||||
ge=0,
|
||||
description="Maximum characters to keep from bash tool output. Output exceeding this limit is middle-truncated (head + tail), preserving the first and last half. Set to 0 to disable truncation.",
|
||||
)
|
||||
read_file_output_max_chars: int = Field(
|
||||
default=50000,
|
||||
ge=0,
|
||||
description="Maximum characters to keep from read_file tool output. Output exceeding this limit is head-truncated. Set to 0 to disable truncation.",
|
||||
)
|
||||
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
@@ -3,6 +3,11 @@ from pathlib import Path
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
def _default_repo_root() -> Path:
|
||||
"""Resolve the repo root without relying on the current working directory."""
|
||||
return Path(__file__).resolve().parents[5]
|
||||
|
||||
|
||||
class SkillsConfig(BaseModel):
|
||||
"""Configuration for skills system"""
|
||||
|
||||
@@ -26,8 +31,8 @@ class SkillsConfig(BaseModel):
|
||||
# Use configured path (can be absolute or relative)
|
||||
path = Path(self.path)
|
||||
if not path.is_absolute():
|
||||
# If relative, resolve from current working directory
|
||||
path = Path.cwd() / path
|
||||
# If relative, resolve from the repo root for deterministic behavior.
|
||||
path = _default_repo_root() / path
|
||||
return path.resolve()
|
||||
else:
|
||||
# Default: ../skills relative to backend directory
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_config_lock = threading.Lock()
|
||||
|
||||
|
||||
class TracingConfig(BaseModel):
|
||||
class LangSmithTracingConfig(BaseModel):
|
||||
"""Configuration for LangSmith tracing."""
|
||||
|
||||
enabled: bool = Field(...)
|
||||
@@ -18,9 +16,69 @@ class TracingConfig(BaseModel):
|
||||
|
||||
@property
|
||||
def is_configured(self) -> bool:
|
||||
"""Check if tracing is fully configured (enabled and has API key)."""
|
||||
return self.enabled and bool(self.api_key)
|
||||
|
||||
def validate(self) -> None:
|
||||
if self.enabled and not self.api_key:
|
||||
raise ValueError("LangSmith tracing is enabled but LANGSMITH_API_KEY (or LANGCHAIN_API_KEY) is not set.")
|
||||
|
||||
|
||||
class LangfuseTracingConfig(BaseModel):
|
||||
"""Configuration for Langfuse tracing."""
|
||||
|
||||
enabled: bool = Field(...)
|
||||
public_key: str | None = Field(...)
|
||||
secret_key: str | None = Field(...)
|
||||
host: str = Field(...)
|
||||
|
||||
@property
|
||||
def is_configured(self) -> bool:
|
||||
return self.enabled and bool(self.public_key) and bool(self.secret_key)
|
||||
|
||||
def validate(self) -> None:
|
||||
if not self.enabled:
|
||||
return
|
||||
missing: list[str] = []
|
||||
if not self.public_key:
|
||||
missing.append("LANGFUSE_PUBLIC_KEY")
|
||||
if not self.secret_key:
|
||||
missing.append("LANGFUSE_SECRET_KEY")
|
||||
if missing:
|
||||
raise ValueError(f"Langfuse tracing is enabled but required settings are missing: {', '.join(missing)}")
|
||||
|
||||
|
||||
class TracingConfig(BaseModel):
|
||||
"""Tracing configuration for supported providers."""
|
||||
|
||||
langsmith: LangSmithTracingConfig = Field(...)
|
||||
langfuse: LangfuseTracingConfig = Field(...)
|
||||
|
||||
@property
|
||||
def is_configured(self) -> bool:
|
||||
return bool(self.enabled_providers)
|
||||
|
||||
@property
|
||||
def explicitly_enabled_providers(self) -> list[str]:
|
||||
enabled: list[str] = []
|
||||
if self.langsmith.enabled:
|
||||
enabled.append("langsmith")
|
||||
if self.langfuse.enabled:
|
||||
enabled.append("langfuse")
|
||||
return enabled
|
||||
|
||||
@property
|
||||
def enabled_providers(self) -> list[str]:
|
||||
enabled: list[str] = []
|
||||
if self.langsmith.is_configured:
|
||||
enabled.append("langsmith")
|
||||
if self.langfuse.is_configured:
|
||||
enabled.append("langfuse")
|
||||
return enabled
|
||||
|
||||
def validate_enabled(self) -> None:
|
||||
self.langsmith.validate()
|
||||
self.langfuse.validate()
|
||||
|
||||
|
||||
_tracing_config: TracingConfig | None = None
|
||||
|
||||
@@ -29,12 +87,7 @@ _TRUTHY_VALUES = {"1", "true", "yes", "on"}
|
||||
|
||||
|
||||
def _env_flag_preferred(*names: str) -> bool:
|
||||
"""Return the boolean value of the first env var that is present and non-empty.
|
||||
|
||||
Accepted truthy values (case-insensitive): ``1``, ``true``, ``yes``, ``on``.
|
||||
Any other non-empty value is treated as falsy. If none of the named
|
||||
variables is set, returns ``False``.
|
||||
"""
|
||||
"""Return the boolean value of the first env var that is present and non-empty."""
|
||||
for name in names:
|
||||
value = os.environ.get(name)
|
||||
if value is not None and value.strip():
|
||||
@@ -52,43 +105,45 @@ def _first_env_value(*names: str) -> str | None:
|
||||
|
||||
|
||||
def get_tracing_config() -> TracingConfig:
|
||||
"""Get the current tracing configuration from environment variables.
|
||||
|
||||
``LANGSMITH_*`` variables take precedence over their legacy ``LANGCHAIN_*``
|
||||
counterparts. For boolean flags (``enabled``), the *first* variable that is
|
||||
present and non-empty in the priority list is the sole authority – its value
|
||||
is parsed and returned without consulting the remaining candidates. Accepted
|
||||
truthy values are ``1``, ``true``, ``yes``, and ``on`` (case-insensitive);
|
||||
any other non-empty value is treated as falsy.
|
||||
|
||||
Priority order:
|
||||
enabled : LANGSMITH_TRACING > LANGCHAIN_TRACING_V2 > LANGCHAIN_TRACING
|
||||
api_key : LANGSMITH_API_KEY > LANGCHAIN_API_KEY
|
||||
project : LANGSMITH_PROJECT > LANGCHAIN_PROJECT (default: "deer-flow")
|
||||
endpoint : LANGSMITH_ENDPOINT > LANGCHAIN_ENDPOINT (default: https://api.smith.langchain.com)
|
||||
|
||||
Returns:
|
||||
TracingConfig with current settings.
|
||||
"""
|
||||
"""Get the current tracing configuration from environment variables."""
|
||||
global _tracing_config
|
||||
if _tracing_config is not None:
|
||||
return _tracing_config
|
||||
with _config_lock:
|
||||
if _tracing_config is not None: # Double-check after acquiring lock
|
||||
if _tracing_config is not None:
|
||||
return _tracing_config
|
||||
_tracing_config = TracingConfig(
|
||||
# Keep compatibility with both legacy LANGCHAIN_* and newer LANGSMITH_* variables.
|
||||
enabled=_env_flag_preferred("LANGSMITH_TRACING", "LANGCHAIN_TRACING_V2", "LANGCHAIN_TRACING"),
|
||||
api_key=_first_env_value("LANGSMITH_API_KEY", "LANGCHAIN_API_KEY"),
|
||||
project=_first_env_value("LANGSMITH_PROJECT", "LANGCHAIN_PROJECT") or "deer-flow",
|
||||
endpoint=_first_env_value("LANGSMITH_ENDPOINT", "LANGCHAIN_ENDPOINT") or "https://api.smith.langchain.com",
|
||||
langsmith=LangSmithTracingConfig(
|
||||
enabled=_env_flag_preferred("LANGSMITH_TRACING", "LANGCHAIN_TRACING_V2", "LANGCHAIN_TRACING"),
|
||||
api_key=_first_env_value("LANGSMITH_API_KEY", "LANGCHAIN_API_KEY"),
|
||||
project=_first_env_value("LANGSMITH_PROJECT", "LANGCHAIN_PROJECT") or "deer-flow",
|
||||
endpoint=_first_env_value("LANGSMITH_ENDPOINT", "LANGCHAIN_ENDPOINT") or "https://api.smith.langchain.com",
|
||||
),
|
||||
langfuse=LangfuseTracingConfig(
|
||||
enabled=_env_flag_preferred("LANGFUSE_TRACING"),
|
||||
public_key=_first_env_value("LANGFUSE_PUBLIC_KEY"),
|
||||
secret_key=_first_env_value("LANGFUSE_SECRET_KEY"),
|
||||
host=_first_env_value("LANGFUSE_BASE_URL") or "https://cloud.langfuse.com",
|
||||
),
|
||||
)
|
||||
return _tracing_config
|
||||
|
||||
|
||||
def get_enabled_tracing_providers() -> list[str]:
|
||||
"""Return the configured tracing providers that are enabled and complete."""
|
||||
return get_tracing_config().enabled_providers
|
||||
|
||||
|
||||
def get_explicitly_enabled_tracing_providers() -> list[str]:
|
||||
"""Return tracing providers explicitly enabled by config, even if incomplete."""
|
||||
return get_tracing_config().explicitly_enabled_providers
|
||||
|
||||
|
||||
def validate_enabled_tracing_providers() -> None:
|
||||
"""Validate that any explicitly enabled providers are fully configured."""
|
||||
get_tracing_config().validate_enabled()
|
||||
|
||||
|
||||
def is_tracing_enabled() -> bool:
|
||||
"""Check if LangSmith tracing is enabled and configured.
|
||||
Returns:
|
||||
True if tracing is enabled and has an API key.
|
||||
"""
|
||||
"""Check if any tracing provider is enabled and fully configured."""
|
||||
return get_tracing_config().is_configured
|
||||
|
||||
Reference in New Issue
Block a user