fix(skills): enforce allowed-tools metadata (#2626)

* fix(skills): parse allowed-tools frontmatter

* fix(skills): validate allowed-tools metadata

* fix(skills): add shared allowed-tools policy

* fix(subagents): enforce skill allowed-tools

* fix(agent): enforce skill allowed-tools

* refactor(skills): dedupe TypeVar and reuse cached enabled skills

- Drop redundant module-level TypeVar in tool_policy; rely on PEP 695 syntax.
- Expose get_cached_enabled_skills() and have the lead agent reuse it
  instead of synchronously rescanning skills on every request.

* fix(agent): expose config-scoped skill cache

* fix(subagents): pass filtered tools explicitly

* fix(skills): clean allowed-tools policy feedback
This commit is contained in:
AochenShen99
2026-05-07 08:34:43 +08:00
committed by GitHub
parent 2b0e62f679
commit cef4224381
12 changed files with 553 additions and 55 deletions
+141 -4
View File
@@ -17,11 +17,14 @@ import asyncio
import sys
import threading
from datetime import datetime
from pathlib import Path
from types import ModuleType, SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
from deerflow.skills.types import Skill
# Module names that need to be mocked to break circular imports
_MOCKED_MODULE_NAMES = [
"deerflow.agents",
@@ -32,14 +35,15 @@ _MOCKED_MODULE_NAMES = [
"deerflow.sandbox.middleware",
"deerflow.sandbox.security",
"deerflow.models",
"deerflow.skills.storage",
]
@pytest.fixture(scope="session", autouse=True)
@pytest.fixture(autouse=True)
def _setup_executor_classes():
"""Set up mocked modules and import real executor classes.
This fixture runs once per session and yields the executor classes.
This fixture runs once per test and yields the executor classes.
It handles module cleanup to avoid affecting other test files.
"""
# Save original modules
@@ -53,6 +57,9 @@ def _setup_executor_classes():
# Set up mocks
for name in _MOCKED_MODULE_NAMES:
sys.modules[name] = MagicMock()
storage_module = ModuleType("deerflow.skills.storage")
storage_module.get_or_new_skill_storage = lambda **kwargs: SimpleNamespace(load_skills=lambda *, enabled_only: [])
sys.modules["deerflow.skills.storage"] = storage_module
# Import real classes inside fixture
from langchain_core.messages import AIMessage, HumanMessage
@@ -117,6 +124,26 @@ class MockAIMessage:
return msg
class NamedTool:
def __init__(self, name: str):
self.name = name
def _skill(name: str, allowed_tools: list[str] | None) -> Skill:
skill_dir = Path(f"/tmp/{name}")
return Skill(
name=name,
description=f"{name} skill",
license=None,
skill_dir=skill_dir,
skill_file=skill_dir / "SKILL.md",
relative_path=Path(name),
category="custom",
allowed_tools=allowed_tools,
enabled=True,
)
async def async_iterator(items):
"""Helper to create an async iterator from a list."""
for item in items:
@@ -288,7 +315,7 @@ class TestAgentConstruction:
captured["app_config"] = app_config
return SimpleNamespace(load_skills=lambda *, enabled_only: [SimpleNamespace(name="demo-skill", skill_file=skill_file)])
monkeypatch.setattr("deerflow.skills.storage.get_or_new_skill_storage", fake_get_or_new_skill_storage)
monkeypatch.setattr(sys.modules["deerflow.skills.storage"], "get_or_new_skill_storage", fake_get_or_new_skill_storage)
executor = SubagentExecutor(
config=base_config,
@@ -297,7 +324,8 @@ class TestAgentConstruction:
thread_id="test-thread",
)
messages = await executor._load_skill_messages()
skills = await executor._load_skills()
messages = await executor._load_skill_messages(skills)
assert captured["app_config"] is app_config
assert len(messages) == 1
@@ -487,6 +515,115 @@ class TestAsyncExecutionPath:
assert "Task" in result.result
class TestSkillAllowedTools:
@pytest.mark.anyio
async def test_skill_allowed_tools_union_filters_agent_tools(self, classes, base_config, mock_agent, msg):
SubagentExecutor = classes["SubagentExecutor"]
final_state = {"messages": [msg.human("Task"), msg.ai("Done", "msg-1")]}
mock_agent.astream = lambda *args, **kwargs: async_iterator([final_state])
tools = [NamedTool("bash"), NamedTool("read_file"), NamedTool("web_search")]
executor = SubagentExecutor(config=base_config, tools=tools, thread_id="test-thread")
async def load_skills():
return [_skill("a", ["bash"]), _skill("b", ["read_file"])]
with patch.object(executor, "_load_skills", load_skills), patch.object(executor, "_create_agent", return_value=mock_agent) as create_agent_mock:
await executor._aexecute("Task")
create_agent_mock.assert_called_once()
assert [tool.name for tool in create_agent_mock.call_args.args[0]] == ["bash", "read_file"]
assert [tool.name for tool in executor.tools] == ["bash", "read_file", "web_search"]
@pytest.mark.anyio
async def test_all_missing_allowed_tools_preserves_legacy_allow_all(self, classes, base_config, mock_agent, msg):
SubagentExecutor = classes["SubagentExecutor"]
final_state = {"messages": [msg.human("Task"), msg.ai("Done", "msg-1")]}
mock_agent.astream = lambda *args, **kwargs: async_iterator([final_state])
tools = [NamedTool("bash"), NamedTool("read_file"), NamedTool("web_search")]
executor = SubagentExecutor(config=base_config, tools=tools, thread_id="test-thread")
async def load_skills():
return [_skill("legacy-a", None), _skill("legacy-b", None)]
with patch.object(executor, "_load_skills", load_skills), patch.object(executor, "_create_agent", return_value=mock_agent) as create_agent_mock:
await executor._aexecute("Task")
assert [tool.name for tool in create_agent_mock.call_args.args[0]] == ["bash", "read_file", "web_search"]
assert [tool.name for tool in executor.tools] == ["bash", "read_file", "web_search"]
@pytest.mark.anyio
async def test_mixed_missing_allowed_tools_does_not_disable_explicit_restrictions(self, classes, base_config, mock_agent, msg):
SubagentExecutor = classes["SubagentExecutor"]
final_state = {"messages": [msg.human("Task"), msg.ai("Done", "msg-1")]}
mock_agent.astream = lambda *args, **kwargs: async_iterator([final_state])
tools = [NamedTool("bash"), NamedTool("read_file"), NamedTool("web_search")]
executor = SubagentExecutor(config=base_config, tools=tools, thread_id="test-thread")
async def load_skills():
return [_skill("legacy", None), _skill("restricted", ["bash"])]
with patch.object(executor, "_load_skills", load_skills), patch.object(executor, "_create_agent", return_value=mock_agent) as create_agent_mock:
await executor._aexecute("Task")
assert [tool.name for tool in create_agent_mock.call_args.args[0]] == ["bash"]
assert [tool.name for tool in executor.tools] == ["bash", "read_file", "web_search"]
@pytest.mark.anyio
async def test_mixed_missing_allowed_tools_order_does_not_disable_explicit_restrictions(self, classes, base_config, mock_agent, msg):
SubagentExecutor = classes["SubagentExecutor"]
final_state = {"messages": [msg.human("Task"), msg.ai("Done", "msg-1")]}
mock_agent.astream = lambda *args, **kwargs: async_iterator([final_state])
tools = [NamedTool("bash"), NamedTool("read_file"), NamedTool("web_search")]
executor = SubagentExecutor(config=base_config, tools=tools, thread_id="test-thread")
async def load_skills():
return [_skill("restricted", ["bash"]), _skill("legacy", None)]
with patch.object(executor, "_load_skills", load_skills), patch.object(executor, "_create_agent", return_value=mock_agent) as create_agent_mock:
await executor._aexecute("Task")
assert [tool.name for tool in create_agent_mock.call_args.args[0]] == ["bash"]
assert [tool.name for tool in executor.tools] == ["bash", "read_file", "web_search"]
@pytest.mark.anyio
async def test_empty_allowed_tools_contributes_no_tools(self, classes, base_config, mock_agent, msg, caplog):
SubagentExecutor = classes["SubagentExecutor"]
final_state = {"messages": [msg.human("Task"), msg.ai("Done", "msg-1")]}
mock_agent.astream = lambda *args, **kwargs: async_iterator([final_state])
tools = [NamedTool("bash"), NamedTool("read_file"), NamedTool("web_search")]
executor = SubagentExecutor(config=base_config, tools=tools, thread_id="test-thread")
async def load_skills():
return [_skill("empty", []), _skill("reader", ["read_file"])]
with patch.object(executor, "_load_skills", load_skills), patch.object(executor, "_create_agent", return_value=mock_agent) as create_agent_mock, caplog.at_level("INFO"):
await executor._aexecute("Task")
assert [tool.name for tool in create_agent_mock.call_args.args[0]] == ["read_file"]
assert [tool.name for tool in executor.tools] == ["bash", "read_file", "web_search"]
assert "declared empty allowed-tools" in caplog.text
@pytest.mark.anyio
async def test_skill_load_failure_fails_without_creating_agent(self, classes, base_config, mock_agent):
SubagentExecutor = classes["SubagentExecutor"]
executor = SubagentExecutor(config=base_config, tools=[NamedTool("bash")], thread_id="test-thread")
async def load_skills():
raise RuntimeError("skill storage unavailable")
with patch.object(executor, "_load_skills", load_skills), patch.object(executor, "_create_agent", return_value=mock_agent) as create_agent_mock:
result = await executor._aexecute("Task")
assert result.status == classes["SubagentStatus"].FAILED
assert result.error == "skill storage unavailable"
create_agent_mock.assert_not_called()
# -----------------------------------------------------------------------------
# Sync Execution Path Tests
# -----------------------------------------------------------------------------