fix(skills): enforce allowed-tools metadata (#2626)

* fix(skills): parse allowed-tools frontmatter * fix(skills): validate allowed-tools metadata * fix(skills): add shared allowed-tools policy * fix(subagents): enforce skill allowed-tools * fix(agent): enforce skill allowed-tools * refactor(skills): dedupe TypeVar and reuse cached enabled skills - Drop redundant module-level TypeVar in tool_policy; rely on PEP 695 syntax. - Expose get_cached_enabled_skills() and have the lead agent reuse it instead of synchronously rescanning skills on every request. * fix(agent): expose config-scoped skill cache * fix(subagents): pass filtered tools explicitly * fix(skills): clean allowed-tools policy feedback
2026-05-24 17:06:00 +00:00 · 2026-05-07 08:34:43 +08:00
parent 2b0e62f679
commit cef4224381
12 changed files with 553 additions and 55 deletions
@@ -23,6 +23,8 @@ from deerflow.agents.thread_state import SandboxState, ThreadDataState, ThreadSt
 from deerflow.config import get_app_config
 from deerflow.config.app_config import AppConfig
 from deerflow.models import create_chat_model
+from deerflow.skills.tool_policy import filter_tools_by_skill_allowed_tools
+from deerflow.skills.types import Skill
 from deerflow.subagents.config import SubagentConfig, resolve_subagent_model_name

 logger = logging.getLogger(__name__)
@@ -260,16 +262,16 @@ class SubagentExecutor:
        # Generate trace_id if not provided (for top-level calls)
        self.trace_id = trace_id or str(uuid.uuid4())[:8]

-        # Filter tools based on config
-        self.tools = _filter_tools(
+        self._base_tools = _filter_tools(
            tools,
            config.tools,
            config.disallowed_tools,
        )
+        self.tools = self._base_tools

        logger.info(f"[trace={self.trace_id}] SubagentExecutor initialized: {config.name} with {len(self.tools)} tools")

-    def _create_agent(self):
+    def _create_agent(self, tools: list[BaseTool] | None = None):
        """Create the agent instance."""
        app_config = self.app_config or get_app_config()
        if self.model_name is None:
@@ -283,26 +285,14 @@ class SubagentExecutor:

        return create_agent(
            model=model,
-            tools=self.tools,
+            tools=tools if tools is not None else self.tools,
            middleware=middlewares,
            system_prompt=self.config.system_prompt,
            state_schema=ThreadState,
        )

-    async def _load_skill_messages(self) -> list[SystemMessage]:
-        """Load skill content as conversation items based on config.skills.
-
-        Aligned with Codex's pattern: each subagent loads its own skills
-        per-session and injects them as conversation items (developer messages),
-        not as system prompt text. The config.skills whitelist controls which
-        skills are loaded:
-        - None: load all enabled skills
-        - []: no skills
-        - ["skill-a", "skill-b"]: only these skills
-
-        Returns:
-            List of SystemMessages containing skill content.
-        """
+    async def _load_skills(self) -> list[Skill]:
+        """Load enabled skill metadata based on config.skills."""
        if self.config.skills is not None and len(self.config.skills) == 0:
            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} skills=[] — skipping skill loading")
            return []
@@ -316,8 +306,8 @@ class SubagentExecutor:
            all_skills = await asyncio.to_thread(storage.load_skills, enabled_only=True)
            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} loaded {len(all_skills)} enabled skills from disk")
        except Exception:
-            logger.warning(f"[trace={self.trace_id}] Failed to load skills for subagent {self.config.name}", exc_info=True)
-            return []
+            logger.exception(f"[trace={self.trace_id}] Failed to load skills for subagent {self.config.name}")
+            raise

        if not all_skills:
            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} no enabled skills found")
@@ -326,10 +316,26 @@ class SubagentExecutor:
        # Filter by config.skills whitelist
        if self.config.skills is not None:
            allowed = set(self.config.skills)
-            skills = [s for s in all_skills if s.name in allowed]
-        else:
-            skills = all_skills
+            return [s for s in all_skills if s.name in allowed]
+        return all_skills

+    def _apply_skill_allowed_tools(self, skills: list[Skill]) -> list[BaseTool]:
+        return filter_tools_by_skill_allowed_tools(self._base_tools, skills)
+
+    async def _load_skill_messages(self, skills: list[Skill]) -> list[SystemMessage]:
+        """Load skill content as conversation items based on config.skills.
+
+        Aligned with Codex's pattern: each subagent loads its own skills
+        per-session and injects them as conversation items (developer messages),
+        not as system prompt text. The config.skills whitelist controls which
+        skills are loaded:
+        - None: load all enabled skills
+        - []: no skills
+        - ["skill-a", "skill-b"]: only these skills
+
+        Returns:
+            List of SystemMessages containing skill content.
+        """
        if not skills:
            return []

@@ -347,19 +353,21 @@ class SubagentExecutor:

        return messages

-    async def _build_initial_state(self, task: str) -> dict[str, Any]:
+    async def _build_initial_state(self, task: str) -> tuple[dict[str, Any], list[BaseTool]]:
        """Build the initial state for agent execution.

        Args:
            task: The task description.

        Returns:
-            Initial state dictionary.
+            Initial state dictionary and tools filtered by loaded skill metadata.
        """
        # Load skills as conversation items (Codex pattern)
-        skill_messages = await self._load_skill_messages()
+        skills = await self._load_skills()
+        filtered_tools = self._apply_skill_allowed_tools(skills)
+        skill_messages = await self._load_skill_messages(skills)

-        messages: list = []
+        messages: list[Any] = []
        # Skill content injected as developer/system messages before the task
        messages.extend(skill_messages)
        # Then the actual task
@@ -375,7 +383,7 @@ class SubagentExecutor:
        if self.thread_data is not None:
            state["thread_data"] = self.thread_data

-        return state
+        return state, filtered_tools

    async def _aexecute(self, task: str, result_holder: SubagentResult | None = None) -> SubagentResult:
        """Execute a task asynchronously.
@@ -405,8 +413,8 @@ class SubagentExecutor:
            result.ai_messages = ai_messages

        try:
-            agent = self._create_agent()
-            state = await self._build_initial_state(task)
+            state, filtered_tools = await self._build_initial_state(task)
+            agent = self._create_agent(filtered_tools)

            # Build config with thread_id for sandbox access and recursion limit
            run_config: RunnableConfig = {