feat(subagents): support per-subagent skill loading and custom subagent types (#2253)

* feat(subagents): support per-subagent skill loading and custom subagent types (#2230) Add per-subagent skill configuration and custom subagent type registration, aligned with Codex's role-based config layering and per-session skill injection. Backend: - SubagentConfig gains `skills` field (None=all, []=none, list=whitelist) - New CustomSubagentConfig for user-defined subagent types in config.yaml - SubagentsAppConfig gains `custom_agents` section and `get_skills_for()` - Registry resolves custom agents with three-layer config precedence - SubagentExecutor loads skills per-session as conversation items (Codex pattern) - task_tool no longer appends skills to system_prompt - Lead agent system prompt dynamically lists all registered subagent types - setup_agent tool accepts optional skills parameter - Gateway agents API transparently passes skills in CRUD operations Frontend: - Agent/CreateAgentRequest/UpdateAgentRequest types include skills field - Agent card displays skills as badges alongside tool_groups Config: - config.example.yaml documents custom_agents and per-agent skills override Tests: - 40 new tests covering all skill config, custom agents, and registry logic - Existing tests updated for new get_skills_prompt_section signature Closes #2230 * fix: address review feedback on skills PR - Remove stale get_skills_prompt_section monkeypatches from test_task_tool_core_logic.py (task_tool no longer imports this function after skill injection moved to executor) - Add key prefixes (tg:/sk:) to agent-card badges to prevent React key collisions between tool_groups and skills * fix(ci): resolve lint and test failures - Format agent-card.tsx with prettier (lint-frontend) - Remove stale "Skills Appendix" system_prompt assertion — skills are now loaded per-session by SubagentExecutor, not appended to system_prompt * fix(ci): sort imports in test_subagent_skills_config.py (ruff I001) * fix(ci): use nullish coalescing in agent-card badge condition (eslint) * fix: address review feedback on skills PR - Use model_fields_set in AgentUpdateRequest to distinguish "field omitted" from "explicitly set to null" — fixes skills=None ambiguity where None means "inherit all" but was treated as "don't change" - Move lazy import of get_subagent_config outside loop in _build_available_subagents_description to avoid repeated import overhead --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
2026-05-23 16:35:59 +00:00 · 2026-04-23 23:59:47 +08:00
parent 4e72410154
commit 30d619de08
14 changed files with 962 additions and 72 deletions
@@ -164,6 +164,36 @@ Skip simple one-off tasks.
 """


+def _build_available_subagents_description(available_names: list[str], bash_available: bool) -> str:
+    """Dynamically build subagent type descriptions from registry.
+
+    Mirrors Codex's pattern where agent_type_description is dynamically generated
+    from all registered roles, so the LLM knows about every available type.
+    """
+    # Built-in descriptions (kept for backward compatibility with existing prompt quality)
+    builtin_descriptions = {
+        "general-purpose": "For ANY non-trivial task - web research, code exploration, file operations, analysis, etc.",
+        "bash": (
+            "For command execution (git, build, test, deploy operations)" if bash_available else "Not available in the current sandbox configuration. Use direct file/web tools or switch to AioSandboxProvider for isolated shell access."
+        ),
+    }
+
+    # Lazy import moved outside loop to avoid repeated import overhead
+    from deerflow.subagents.registry import get_subagent_config
+
+    lines = []
+    for name in available_names:
+        if name in builtin_descriptions:
+            lines.append(f"- **{name}**: {builtin_descriptions[name]}")
+        else:
+            config = get_subagent_config(name)
+            if config is not None:
+                desc = config.description.split("\n")[0].strip()  # First line only for brevity
+                lines.append(f"- **{name}**: {desc}")
+
+    return "\n".join(lines)
+
+
 def _build_subagent_section(max_concurrent: int) -> str:
    """Build the subagent system prompt section with dynamic concurrency limit.

@@ -174,13 +204,12 @@ def _build_subagent_section(max_concurrent: int) -> str:
        Formatted subagent section string.
    """
    n = max_concurrent
-    bash_available = "bash" in get_available_subagent_names()
-    available_subagents = (
-        "- **general-purpose**: For ANY non-trivial task - web research, code exploration, file operations, analysis, etc.\n- **bash**: For command execution (git, build, test, deploy operations)"
-        if bash_available
-        else "- **general-purpose**: For ANY non-trivial task - web research, code exploration, file operations, analysis, etc.\n"
-        "- **bash**: Not available in the current sandbox configuration. Use direct file/web tools or switch to AioSandboxProvider for isolated shell access."
-    )
+    available_names = get_available_subagent_names()
+    bash_available = "bash" in available_names
+
+    # Dynamically build subagent type descriptions from registry (aligned with Codex's
+    # agent_type_description pattern where all registered roles are listed in the tool spec).
+    available_subagents = _build_available_subagents_description(available_names, bash_available)
    direct_tool_examples = "bash, ls, read_file, web_search, etc." if bash_available else "ls, read_file, web_search, etc."
    direct_execution_example = (
        '# User asks: "Run the tests"\n# Thinking: Cannot decompose into parallel sub-tasks\n# → Execute directly\n\nbash("npm test")  # Direct execution, not task()'
@@ -25,6 +25,47 @@ class SubagentOverrideConfig(BaseModel):
        min_length=1,
        description="Model name for this subagent (None = inherit from parent agent)",
    )
+    skills: list[str] | None = Field(
+        default=None,
+        description="Skill names whitelist for this subagent (None = inherit all enabled skills, [] = no skills)",
+    )
+
+
+class CustomSubagentConfig(BaseModel):
+    """User-defined subagent type declared in config.yaml."""
+
+    description: str = Field(
+        description="When the lead agent should delegate to this subagent",
+    )
+    system_prompt: str = Field(
+        description="System prompt that guides the subagent's behavior",
+    )
+    tools: list[str] | None = Field(
+        default=None,
+        description="Tool names whitelist (None = inherit all tools from parent)",
+    )
+    disallowed_tools: list[str] | None = Field(
+        default_factory=lambda: ["task", "ask_clarification", "present_files"],
+        description="Tool names to deny",
+    )
+    skills: list[str] | None = Field(
+        default=None,
+        description="Skill names whitelist (None = inherit all enabled skills, [] = no skills)",
+    )
+    model: str = Field(
+        default="inherit",
+        description="Model to use - 'inherit' uses parent's model",
+    )
+    max_turns: int = Field(
+        default=50,
+        ge=1,
+        description="Maximum number of agent turns before stopping",
+    )
+    timeout_seconds: int = Field(
+        default=900,
+        ge=1,
+        description="Maximum execution time in seconds",
+    )


 class SubagentsAppConfig(BaseModel):
@@ -44,6 +85,10 @@ class SubagentsAppConfig(BaseModel):
        default_factory=dict,
        description="Per-agent configuration overrides keyed by agent name",
    )
+    custom_agents: dict[str, CustomSubagentConfig] = Field(
+        default_factory=dict,
+        description="User-defined subagent types keyed by agent name",
+    )

    def get_timeout_for(self, agent_name: str) -> int:
        """Get the effective timeout for a specific agent.
@@ -82,6 +127,20 @@ class SubagentsAppConfig(BaseModel):
            return self.max_turns
        return builtin_default

+    def get_skills_for(self, agent_name: str) -> list[str] | None:
+        """Get the skills override for a specific agent.
+
+        Args:
+            agent_name: The name of the subagent.
+
+        Returns:
+            Skill names whitelist if overridden, None otherwise (subagent will inherit all enabled skills).
+        """
+        override = self.agents.get(agent_name)
+        if override is not None and override.skills is not None:
+            return override.skills
+        return None
+

 _subagents_config: SubagentsAppConfig = SubagentsAppConfig()

@@ -105,15 +164,20 @@ def load_subagents_config_from_dict(config_dict: dict) -> None:
            parts.append(f"max_turns={override.max_turns}")
        if override.model is not None:
            parts.append(f"model={override.model}")
+        if override.skills is not None:
+            parts.append(f"skills={override.skills}")
        if parts:
            overrides_summary[name] = ", ".join(parts)

-    if overrides_summary:
+    custom_agents_names = list(_subagents_config.custom_agents.keys())
+
+    if overrides_summary or custom_agents_names:
        logger.info(
-            "Subagents config loaded: default timeout=%ss, default max_turns=%s, per-agent overrides=%s",
+            "Subagents config loaded: default timeout=%ss, default max_turns=%s, per-agent overrides=%s, custom_agents=%s",
            _subagents_config.timeout_seconds,
            _subagents_config.max_turns,
-            overrides_summary,
+            overrides_summary or "none",
+            custom_agents_names or "none",
        )
    else:
        logger.info(
@@ -13,6 +13,8 @@ class SubagentConfig:
        system_prompt: The system prompt that guides the subagent's behavior.
        tools: Optional list of tool names to allow. If None, inherits all tools.
        disallowed_tools: Optional list of tool names to deny.
+        skills: Optional list of skill names to load. If None, inherits all enabled skills.
+                If an empty list, no skills are loaded.
        model: Model to use - 'inherit' uses parent's model.
        max_turns: Maximum number of agent turns before stopping.
        timeout_seconds: Maximum execution time in seconds (default: 900 = 15 minutes).
@@ -23,6 +25,7 @@ class SubagentConfig:
    system_prompt: str
    tools: list[str] | None = None
    disallowed_tools: list[str] | None = field(default_factory=lambda: ["task"])
+    skills: list[str] | None = None
    model: str = "inherit"
    max_turns: int = 50
    timeout_seconds: int = 900
@@ -13,7 +13,7 @@ from typing import Any

 from langchain.agents import create_agent
 from langchain.tools import BaseTool
-from langchain_core.messages import AIMessage, HumanMessage
+from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
 from langchain_core.runnables import RunnableConfig

 from deerflow.agents.thread_state import SandboxState, ThreadDataState, ThreadState
@@ -184,7 +184,63 @@ class SubagentExecutor:
            state_schema=ThreadState,
        )

-    def _build_initial_state(self, task: str) -> dict[str, Any]:
+    async def _load_skill_messages(self) -> list[SystemMessage]:
+        """Load skill content as conversation items based on config.skills.
+
+        Aligned with Codex's pattern: each subagent loads its own skills
+        per-session and injects them as conversation items (developer messages),
+        not as system prompt text. The config.skills whitelist controls which
+        skills are loaded:
+        - None: load all enabled skills
+        - []: no skills
+        - ["skill-a", "skill-b"]: only these skills
+
+        Returns:
+            List of SystemMessages containing skill content.
+        """
+        if self.config.skills is not None and len(self.config.skills) == 0:
+            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} skills=[] — skipping skill loading")
+            return []
+
+        try:
+            from deerflow.skills.loader import load_skills
+
+            # Use asyncio.to_thread to avoid blocking the event loop (LangGraph ASGI requirement)
+            all_skills = await asyncio.to_thread(load_skills, enabled_only=True)
+            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} loaded {len(all_skills)} enabled skills from disk")
+        except Exception:
+            logger.warning(f"[trace={self.trace_id}] Failed to load skills for subagent {self.config.name}", exc_info=True)
+            return []
+
+        if not all_skills:
+            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} no enabled skills found")
+            return []
+
+        # Filter by config.skills whitelist
+        if self.config.skills is not None:
+            allowed = set(self.config.skills)
+            skills = [s for s in all_skills if s.name in allowed]
+        else:
+            skills = all_skills
+
+        if not skills:
+            return []
+
+        # Read each skill's SKILL.md content and create conversation items
+        messages = []
+        for skill in skills:
+            try:
+                content = await asyncio.to_thread(skill.skill_file.read_text, encoding="utf-8")
+                content = content.strip()
+                if content:
+                    messages.append(SystemMessage(content=f'<skill name="{skill.name}">\n{content}\n</skill>'))
+                    logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} loaded skill: {skill.name}")
+            except Exception:
+                logger.debug(f"[trace={self.trace_id}] Failed to read skill {skill.name}", exc_info=True)
+
+        return messages
+
+    async def _build_initial_state(self, task: str) -> dict[str, Any]:
        """Build the initial state for agent execution.

        Args:
@@ -193,8 +249,17 @@ class SubagentExecutor:
        Returns:
            Initial state dictionary.
        """
+        # Load skills as conversation items (Codex pattern)
+        skill_messages = await self._load_skill_messages()
+
+        messages: list = []
+        # Skill content injected as developer/system messages before the task
+        messages.extend(skill_messages)
+        # Then the actual task
+        messages.append(HumanMessage(content=task))
+
        state: dict[str, Any] = {
-            "messages": [HumanMessage(content=task)],
+            "messages": messages,
        }

        # Pass through sandbox and thread data from parent
@@ -230,7 +295,7 @@ class SubagentExecutor:

        try:
            agent = self._create_agent()
-            state = self._build_initial_state(task)
+            state = await self._build_initial_state(task)

            # Build config with thread_id for sandbox access and recursion limit
            run_config: RunnableConfig = {
@@ -10,53 +10,100 @@ from deerflow.subagents.config import SubagentConfig
 logger = logging.getLogger(__name__)


+def _build_custom_subagent_config(name: str) -> SubagentConfig | None:
+    """Build a SubagentConfig from config.yaml custom_agents section.
+
+    Args:
+        name: The name of the custom subagent.
+
+    Returns:
+        SubagentConfig if found in custom_agents, None otherwise.
+    """
+    from deerflow.config.subagents_config import get_subagents_app_config
+
+    app_config = get_subagents_app_config()
+    custom = app_config.custom_agents.get(name)
+    if custom is None:
+        return None
+
+    return SubagentConfig(
+        name=name,
+        description=custom.description,
+        system_prompt=custom.system_prompt,
+        tools=custom.tools,
+        disallowed_tools=custom.disallowed_tools,
+        skills=custom.skills,
+        model=custom.model,
+        max_turns=custom.max_turns,
+        timeout_seconds=custom.timeout_seconds,
+    )
+
+
 def get_subagent_config(name: str) -> SubagentConfig | None:
    """Get a subagent configuration by name, with config.yaml overrides applied.

+    Resolution order (mirrors Codex's config layering):
+    1. Built-in subagents (general-purpose, bash)
+    2. Custom subagents from config.yaml custom_agents section
+    3. Per-agent overrides from config.yaml agents section (timeout, max_turns, model, skills)
+
    Args:
        name: The name of the subagent.

    Returns:
        SubagentConfig if found (with any config.yaml overrides applied), None otherwise.
    """
+    # Step 1: Look up built-in, then fall back to custom_agents
    config = BUILTIN_SUBAGENTS.get(name)
+    if config is None:
+        config = _build_custom_subagent_config(name)
    if config is None:
        return None

-    # Apply runtime overrides (timeout, max_turns, model) from config.yaml
+    # Step 2: Apply per-agent overrides from config.yaml agents section.
+    # Only explicit per-agent overrides are applied here. Global defaults
+    # (timeout_seconds, max_turns at the top level) apply to built-in agents
+    # but must NOT override custom agents' own values — custom agents define
+    # their own defaults in the custom_agents section.
    # Lazy import to avoid circular deps.
    from deerflow.config.subagents_config import get_subagents_app_config

    app_config = get_subagents_app_config()
-    effective_timeout = app_config.get_timeout_for(name)
-    effective_max_turns = app_config.get_max_turns_for(name, config.max_turns)
+    is_builtin = name in BUILTIN_SUBAGENTS
+    agent_override = app_config.agents.get(name)

    overrides = {}
-    if effective_timeout != config.timeout_seconds:
-        logger.debug(
-            "Subagent '%s': timeout overridden by config.yaml (%ss -> %ss)",
-            name,
-            config.timeout_seconds,
-            effective_timeout,
-        )
-        overrides["timeout_seconds"] = effective_timeout
-    if effective_max_turns != config.max_turns:
-        logger.debug(
-            "Subagent '%s': max_turns overridden by config.yaml (%s -> %s)",
-            name,
-            config.max_turns,
-            effective_max_turns,
-        )
-        overrides["max_turns"] = effective_max_turns
+
+    # Timeout: per-agent override > global default (builtins only) > config's own value
+    if agent_override is not None and agent_override.timeout_seconds is not None:
+        if agent_override.timeout_seconds != config.timeout_seconds:
+            logger.debug("Subagent '%s': timeout overridden (%ss -> %ss)", name, config.timeout_seconds, agent_override.timeout_seconds)
+            overrides["timeout_seconds"] = agent_override.timeout_seconds
+    elif is_builtin and app_config.timeout_seconds != config.timeout_seconds:
+        logger.debug("Subagent '%s': timeout from global default (%ss -> %ss)", name, config.timeout_seconds, app_config.timeout_seconds)
+        overrides["timeout_seconds"] = app_config.timeout_seconds
+
+    # Max turns: per-agent override > global default (builtins only) > config's own value
+    if agent_override is not None and agent_override.max_turns is not None:
+        if agent_override.max_turns != config.max_turns:
+            logger.debug("Subagent '%s': max_turns overridden (%s -> %s)", name, config.max_turns, agent_override.max_turns)
+            overrides["max_turns"] = agent_override.max_turns
+    elif is_builtin and app_config.max_turns is not None and app_config.max_turns != config.max_turns:
+        logger.debug("Subagent '%s': max_turns from global default (%s -> %s)", name, config.max_turns, app_config.max_turns)
+        overrides["max_turns"] = app_config.max_turns
+
+    # Model: per-agent override only (no global default for model)
    effective_model = app_config.get_model_for(name)
    if effective_model is not None and effective_model != config.model:
-        logger.debug(
-            "Subagent '%s': model overridden by config.yaml (%s -> %s)",
-            name,
-            config.model,
-            effective_model,
-        )
+        logger.debug("Subagent '%s': model overridden (%s -> %s)", name, config.model, effective_model)
        overrides["model"] = effective_model
+
+    # Skills: per-agent override only (no global default for skills)
+    effective_skills = app_config.get_skills_for(name)
+    if effective_skills is not None and effective_skills != config.skills:
+        logger.debug("Subagent '%s': skills overridden (%s -> %s)", name, config.skills, effective_skills)
+        overrides["skills"] = effective_skills
+
    if overrides:
        config = replace(config, **overrides)

@@ -67,18 +114,33 @@ def list_subagents() -> list[SubagentConfig]:
    """List all available subagent configurations (with config.yaml overrides applied).

    Returns:
-        List of all registered SubagentConfig instances.
+        List of all registered SubagentConfig instances (built-in + custom).
    """
-    return [get_subagent_config(name) for name in BUILTIN_SUBAGENTS]
+    configs = []
+    for name in get_subagent_names():
+        config = get_subagent_config(name)
+        if config is not None:
+            configs.append(config)
+    return configs


 def get_subagent_names() -> list[str]:
-    """Get all available subagent names.
+    """Get all available subagent names (built-in + custom).

    Returns:
        List of subagent names.
    """
-    return list(BUILTIN_SUBAGENTS.keys())
+    names = list(BUILTIN_SUBAGENTS.keys())
+
+    # Merge custom_agents from config.yaml
+    from deerflow.config.subagents_config import get_subagents_app_config
+
+    app_config = get_subagents_app_config()
+    for custom_name in app_config.custom_agents:
+        if custom_name not in names:
+            names.append(custom_name)
+
+    return names


 def get_available_subagent_names() -> list[str]:
@@ -87,11 +149,11 @@ def get_available_subagent_names() -> list[str]:
    Returns:
        List of subagent names visible to the current sandbox configuration.
    """
-    names = list(BUILTIN_SUBAGENTS.keys())
+    names = get_subagent_names()
    try:
        host_bash_allowed = is_host_bash_allowed()
    except Exception:
-        logger.debug("Could not determine host bash availability; exposing all built-in subagents")
+        logger.debug("Could not determine host bash availability; exposing all subagents")
        return names

    if not host_bash_allowed:
@@ -17,12 +17,14 @@ def setup_agent(
    soul: str,
    description: str,
    runtime: ToolRuntime,
+    skills: list[str] | None = None,
 ) -> Command:
    """Setup the custom DeerFlow agent.

    Args:
        soul: Full SOUL.md content defining the agent's personality and behavior.
        description: One-line description of what the agent does.
+        skills: Optional list of skill names this agent should use. None means use all enabled skills, empty list means no skills.
    """

    agent_name: str | None = runtime.context.get("agent_name") if runtime.context else None
@@ -41,6 +43,8 @@ def setup_agent(
            config_data: dict = {"name": agent_name}
            if description:
                config_data["description"] = description
+            if skills is not None:
+                config_data["skills"] = skills

            config_file = agent_dir / "config.yaml"
            with open(config_file, "w", encoding="utf-8") as f:
@@ -10,7 +10,6 @@ from langchain.tools import InjectedToolCallId, ToolRuntime, tool
 from langgraph.config import get_stream_writer
 from langgraph.typing import ContextT

-from deerflow.agents.lead_agent.prompt import get_skills_prompt_section
 from deerflow.agents.thread_state import ThreadState
 from deerflow.sandbox.security import LOCAL_BASH_SUBAGENT_DISABLED_MESSAGE, is_host_bash_allowed
 from deerflow.subagents import SubagentExecutor, get_available_subagent_names, get_subagent_config
@@ -35,7 +34,7 @@ async def task_tool(
    - Handle complex multi-step tasks autonomously
    - Execute commands or operations in isolated contexts

-    Available subagent types depend on the active sandbox configuration:
+    Built-in subagent types:
    - **general-purpose**: A capable agent for complex, multi-step tasks that require
      both exploration and action. Use when the task requires complex reasoning,
      multiple dependent steps, or would benefit from isolated context.
@@ -43,6 +42,11 @@ async def task_tool(
      available when host bash is explicitly allowed or when using an isolated shell
      sandbox such as `AioSandboxProvider`.

+    Additional custom subagent types may be defined in config.yaml under
+    `subagents.custom_agents`. Each custom type can have its own system prompt,
+    tools, skills, model, and timeout configuration. If an unknown subagent_type
+    is provided, the error message will list all available types.
+
    When to use this tool:
    - Complex tasks requiring multiple steps or tools
    - Tasks that produce verbose output
@@ -72,9 +76,9 @@ async def task_tool(
    # Build config overrides
    overrides: dict = {}

-    skills_section = get_skills_prompt_section()
-    if skills_section:
-        overrides["system_prompt"] = config.system_prompt + "\n\n" + skills_section
+    # Skills are loaded by SubagentExecutor per-session (aligned with Codex's pattern:
+    # each subagent loads its own skills based on config, injected as conversation items).
+    # No longer appended to system_prompt here.

    if max_turns is not None:
        overrides["max_turns"] = max_turns