Implement skill self-evolution and skill_manage flow (#1874)

* chore: ignore .worktrees directory

* Add skill_manage self-evolution flow

* Fix CI regressions for skill_manage

* Address PR review feedback for skill evolution

* fix(skill-evolution): preserve history on delete

* fix(skill-evolution): tighten scanner fallbacks

* docs: add skill_manage e2e evidence screenshot

* fix(skill-manage): avoid blocking fs ops in session runtime

---------

Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
DanielWalnut
2026-04-06 22:07:11 +08:00
committed by GitHub
parent 055e4df049
commit 888f7bfb9d
20 changed files with 1164 additions and 59 deletions
+48
View File
@@ -1,4 +1,5 @@
from pathlib import Path
from types import SimpleNamespace
from deerflow.agents.lead_agent.prompt import get_skills_prompt_section
from deerflow.config.agents_config import AgentConfig
@@ -41,6 +42,7 @@ def test_get_skills_prompt_section_returns_skills(monkeypatch):
result = get_skills_prompt_section(available_skills={"skill1"})
assert "skill1" in result
assert "skill2" not in result
assert "[built-in]" in result
def test_get_skills_prompt_section_returns_all_when_available_skills_is_none(monkeypatch):
@@ -52,6 +54,52 @@ def test_get_skills_prompt_section_returns_all_when_available_skills_is_none(mon
assert "skill2" in result
def test_get_skills_prompt_section_includes_self_evolution_rules(monkeypatch):
skills = [_make_skill("skill1")]
monkeypatch.setattr("deerflow.agents.lead_agent.prompt.load_skills", lambda enabled_only: skills)
monkeypatch.setattr(
"deerflow.config.get_app_config",
lambda: SimpleNamespace(
skills=SimpleNamespace(container_path="/mnt/skills"),
skill_evolution=SimpleNamespace(enabled=True),
),
)
result = get_skills_prompt_section(available_skills=None)
assert "Skill Self-Evolution" in result
def test_get_skills_prompt_section_includes_self_evolution_rules_without_skills(monkeypatch):
monkeypatch.setattr("deerflow.agents.lead_agent.prompt.load_skills", lambda enabled_only: [])
monkeypatch.setattr(
"deerflow.config.get_app_config",
lambda: SimpleNamespace(
skills=SimpleNamespace(container_path="/mnt/skills"),
skill_evolution=SimpleNamespace(enabled=True),
),
)
result = get_skills_prompt_section(available_skills=None)
assert "Skill Self-Evolution" in result
def test_get_skills_prompt_section_cache_respects_skill_evolution_toggle(monkeypatch):
skills = [_make_skill("skill1")]
monkeypatch.setattr("deerflow.agents.lead_agent.prompt.load_skills", lambda enabled_only: skills)
config = SimpleNamespace(
skills=SimpleNamespace(container_path="/mnt/skills"),
skill_evolution=SimpleNamespace(enabled=True),
)
monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
enabled_result = get_skills_prompt_section(available_skills=None)
assert "Skill Self-Evolution" in enabled_result
config.skill_evolution.enabled = False
disabled_result = get_skills_prompt_section(available_skills=None)
assert "Skill Self-Evolution" not in disabled_result
def test_make_lead_agent_empty_skills_passed_correctly(monkeypatch):
from unittest.mock import MagicMock
+17
View File
@@ -0,0 +1,17 @@
from types import SimpleNamespace
import pytest
from deerflow.skills.security_scanner import scan_skill_content
@pytest.mark.anyio
async def test_scan_skill_content_blocks_when_model_unavailable(monkeypatch):
config = SimpleNamespace(skill_evolution=SimpleNamespace(moderation_model_name=None))
monkeypatch.setattr("deerflow.skills.security_scanner.get_app_config", lambda: config)
monkeypatch.setattr("deerflow.skills.security_scanner.create_chat_model", lambda **kwargs: (_ for _ in ()).throw(RuntimeError("boom")))
result = await scan_skill_content("---\nname: demo-skill\ndescription: demo\n---\n", executable=False)
assert result.decision == "block"
assert "manual review required" in result.reason
+163
View File
@@ -0,0 +1,163 @@
import importlib
from types import SimpleNamespace
import anyio
import pytest
skill_manage_module = importlib.import_module("deerflow.tools.skill_manage_tool")
def _skill_content(name: str, description: str = "Demo skill") -> str:
return f"---\nname: {name}\ndescription: {description}\n---\n\n# {name}\n"
async def _async_result(decision: str, reason: str):
from deerflow.skills.security_scanner import ScanResult
return ScanResult(decision=decision, reason=reason)
def test_skill_manage_create_and_patch(monkeypatch, tmp_path):
skills_root = tmp_path / "skills"
config = SimpleNamespace(
skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
)
monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
monkeypatch.setattr("deerflow.skills.security_scanner.get_app_config", lambda: config)
monkeypatch.setattr(skill_manage_module, "clear_skills_system_prompt_cache", lambda: None)
monkeypatch.setattr(skill_manage_module, "scan_skill_content", lambda *args, **kwargs: _async_result("allow", "ok"))
runtime = SimpleNamespace(context={"thread_id": "thread-1"}, config={"configurable": {"thread_id": "thread-1"}})
result = anyio.run(
skill_manage_module.skill_manage_tool.coroutine,
runtime,
"create",
"demo-skill",
_skill_content("demo-skill"),
)
assert "Created custom skill" in result
patch_result = anyio.run(
skill_manage_module.skill_manage_tool.coroutine,
runtime,
"patch",
"demo-skill",
None,
None,
"Demo skill",
"Patched skill",
1,
)
assert "Patched custom skill" in patch_result
assert "Patched skill" in (skills_root / "custom" / "demo-skill" / "SKILL.md").read_text(encoding="utf-8")
def test_skill_manage_patch_replaces_single_occurrence_by_default(monkeypatch, tmp_path):
skills_root = tmp_path / "skills"
config = SimpleNamespace(
skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
)
monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
monkeypatch.setattr("deerflow.skills.security_scanner.get_app_config", lambda: config)
monkeypatch.setattr(skill_manage_module, "clear_skills_system_prompt_cache", lambda: None)
monkeypatch.setattr(skill_manage_module, "scan_skill_content", lambda *args, **kwargs: _async_result("allow", "ok"))
runtime = SimpleNamespace(context={"thread_id": "thread-1"}, config={"configurable": {"thread_id": "thread-1"}})
content = _skill_content("demo-skill", "Demo skill") + "\nRepeated: Demo skill\n"
anyio.run(skill_manage_module.skill_manage_tool.coroutine, runtime, "create", "demo-skill", content)
patch_result = anyio.run(
skill_manage_module.skill_manage_tool.coroutine,
runtime,
"patch",
"demo-skill",
None,
None,
"Demo skill",
"Patched skill",
)
skill_text = (skills_root / "custom" / "demo-skill" / "SKILL.md").read_text(encoding="utf-8")
assert "1 replacement(s) applied, 2 match(es) found" in patch_result
assert skill_text.count("Patched skill") == 1
assert skill_text.count("Demo skill") == 1
def test_skill_manage_rejects_public_skill_patch(monkeypatch, tmp_path):
skills_root = tmp_path / "skills"
public_dir = skills_root / "public" / "deep-research"
public_dir.mkdir(parents=True, exist_ok=True)
(public_dir / "SKILL.md").write_text(_skill_content("deep-research"), encoding="utf-8")
config = SimpleNamespace(
skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
)
monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
runtime = SimpleNamespace(context={}, config={"configurable": {}})
with pytest.raises(ValueError, match="built-in skill"):
anyio.run(
skill_manage_module.skill_manage_tool.coroutine,
runtime,
"patch",
"deep-research",
None,
None,
"Demo skill",
"Patched",
)
def test_skill_manage_sync_wrapper_supported(monkeypatch, tmp_path):
skills_root = tmp_path / "skills"
config = SimpleNamespace(
skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
)
monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
monkeypatch.setattr(skill_manage_module, "clear_skills_system_prompt_cache", lambda: None)
monkeypatch.setattr(skill_manage_module, "scan_skill_content", lambda *args, **kwargs: _async_result("allow", "ok"))
runtime = SimpleNamespace(context={"thread_id": "thread-sync"}, config={"configurable": {"thread_id": "thread-sync"}})
result = skill_manage_module.skill_manage_tool.func(
runtime=runtime,
action="create",
name="sync-skill",
content=_skill_content("sync-skill"),
)
assert "Created custom skill" in result
def test_skill_manage_rejects_support_path_traversal(monkeypatch, tmp_path):
skills_root = tmp_path / "skills"
config = SimpleNamespace(
skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
)
monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
monkeypatch.setattr("deerflow.skills.security_scanner.get_app_config", lambda: config)
monkeypatch.setattr(skill_manage_module, "clear_skills_system_prompt_cache", lambda: None)
monkeypatch.setattr(skill_manage_module, "scan_skill_content", lambda *args, **kwargs: _async_result("allow", "ok"))
runtime = SimpleNamespace(context={"thread_id": "thread-1"}, config={"configurable": {"thread_id": "thread-1"}})
anyio.run(skill_manage_module.skill_manage_tool.coroutine, runtime, "create", "demo-skill", _skill_content("demo-skill"))
with pytest.raises(ValueError, match="parent-directory traversal|selected support directory"):
anyio.run(
skill_manage_module.skill_manage_tool.coroutine,
runtime,
"write_file",
"demo-skill",
"malicious overwrite",
"references/../SKILL.md",
)
+132
View File
@@ -0,0 +1,132 @@
import json
from types import SimpleNamespace
from fastapi import FastAPI
from fastapi.testclient import TestClient
from app.gateway.routers import skills as skills_router
from deerflow.skills.manager import get_skill_history_file
def _skill_content(name: str, description: str = "Demo skill") -> str:
return f"---\nname: {name}\ndescription: {description}\n---\n\n# {name}\n"
async def _async_scan(decision: str, reason: str):
from deerflow.skills.security_scanner import ScanResult
return ScanResult(decision=decision, reason=reason)
def test_custom_skills_router_lifecycle(monkeypatch, tmp_path):
skills_root = tmp_path / "skills"
custom_dir = skills_root / "custom" / "demo-skill"
custom_dir.mkdir(parents=True, exist_ok=True)
(custom_dir / "SKILL.md").write_text(_skill_content("demo-skill"), encoding="utf-8")
config = SimpleNamespace(
skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
)
monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
monkeypatch.setattr("app.gateway.routers.skills.scan_skill_content", lambda *args, **kwargs: _async_scan("allow", "ok"))
monkeypatch.setattr("app.gateway.routers.skills.clear_skills_system_prompt_cache", lambda: None)
app = FastAPI()
app.include_router(skills_router.router)
with TestClient(app) as client:
response = client.get("/api/skills/custom")
assert response.status_code == 200
assert response.json()["skills"][0]["name"] == "demo-skill"
get_response = client.get("/api/skills/custom/demo-skill")
assert get_response.status_code == 200
assert "# demo-skill" in get_response.json()["content"]
update_response = client.put(
"/api/skills/custom/demo-skill",
json={"content": _skill_content("demo-skill", "Edited skill")},
)
assert update_response.status_code == 200
assert update_response.json()["description"] == "Edited skill"
history_response = client.get("/api/skills/custom/demo-skill/history")
assert history_response.status_code == 200
assert history_response.json()["history"][-1]["action"] == "human_edit"
rollback_response = client.post("/api/skills/custom/demo-skill/rollback", json={"history_index": -1})
assert rollback_response.status_code == 200
assert rollback_response.json()["description"] == "Demo skill"
def test_custom_skill_rollback_blocked_by_scanner(monkeypatch, tmp_path):
skills_root = tmp_path / "skills"
custom_dir = skills_root / "custom" / "demo-skill"
custom_dir.mkdir(parents=True, exist_ok=True)
original_content = _skill_content("demo-skill")
edited_content = _skill_content("demo-skill", "Edited skill")
(custom_dir / "SKILL.md").write_text(edited_content, encoding="utf-8")
config = SimpleNamespace(
skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
)
monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
get_skill_history_file("demo-skill").write_text(
'{"action":"human_edit","prev_content":' + json.dumps(original_content) + ',"new_content":' + json.dumps(edited_content) + "}\n",
encoding="utf-8",
)
monkeypatch.setattr("app.gateway.routers.skills.clear_skills_system_prompt_cache", lambda: None)
async def _scan(*args, **kwargs):
from deerflow.skills.security_scanner import ScanResult
return ScanResult(decision="block", reason="unsafe rollback")
monkeypatch.setattr("app.gateway.routers.skills.scan_skill_content", _scan)
app = FastAPI()
app.include_router(skills_router.router)
with TestClient(app) as client:
rollback_response = client.post("/api/skills/custom/demo-skill/rollback", json={"history_index": -1})
assert rollback_response.status_code == 400
assert "unsafe rollback" in rollback_response.json()["detail"]
history_response = client.get("/api/skills/custom/demo-skill/history")
assert history_response.status_code == 200
assert history_response.json()["history"][-1]["scanner"]["decision"] == "block"
def test_custom_skill_delete_preserves_history_and_allows_restore(monkeypatch, tmp_path):
skills_root = tmp_path / "skills"
custom_dir = skills_root / "custom" / "demo-skill"
custom_dir.mkdir(parents=True, exist_ok=True)
original_content = _skill_content("demo-skill")
(custom_dir / "SKILL.md").write_text(original_content, encoding="utf-8")
config = SimpleNamespace(
skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
)
monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
monkeypatch.setattr("app.gateway.routers.skills.scan_skill_content", lambda *args, **kwargs: _async_scan("allow", "ok"))
monkeypatch.setattr("app.gateway.routers.skills.clear_skills_system_prompt_cache", lambda: None)
app = FastAPI()
app.include_router(skills_router.router)
with TestClient(app) as client:
delete_response = client.delete("/api/skills/custom/demo-skill")
assert delete_response.status_code == 200
assert not (custom_dir / "SKILL.md").exists()
history_response = client.get("/api/skills/custom/demo-skill/history")
assert history_response.status_code == 200
assert history_response.json()["history"][-1]["action"] == "human_delete"
rollback_response = client.post("/api/skills/custom/demo-skill/rollback", json={"history_index": -1})
assert rollback_response.status_code == 200
assert rollback_response.json()["description"] == "Demo skill"
assert (custom_dir / "SKILL.md").read_text(encoding="utf-8") == original_content
+12
View File
@@ -62,3 +62,15 @@ def test_load_skills_skips_hidden_directories(tmp_path: Path):
assert "ok-skill" in names
assert "secret-skill" not in names
def test_load_skills_prefers_custom_over_public_with_same_name(tmp_path: Path):
skills_root = tmp_path / "skills"
_write_skill(skills_root / "public" / "shared-skill", "shared-skill", "Public version")
_write_skill(skills_root / "custom" / "shared-skill", "shared-skill", "Custom version")
skills = load_skills(skills_path=skills_root, use_config=False, enabled_only=False)
shared = next(skill for skill in skills if skill.name == "shared-skill")
assert shared.category == "custom"
assert shared.description == "Custom version"