feat(agent): add custom-agent self-updates with user isolation (#2713)

* feat(agent): add update_agent tool for in-chat custom-agent self-updates (#2616)

Custom agents had no built-in way to persist updates to their own SOUL.md /
config.yaml from a normal chat — `setup_agent` was only bound during the
bootstrap flow, so when the user asked the agent to refine its description
or personality, the agent would shell out via bash/write_file and the edits
landed in a temporary sandbox/tool workspace instead of
`{base_dir}/agents/{agent_name}/`.

Changes:
- New `update_agent` builtin tool with partial-update semantics (only the
  fields you pass are written) and atomic temp-file + os.replace writes so
  a failed update never corrupts existing SOUL.md / config.yaml.
- Lead agent now binds `update_agent` in the non-bootstrap path whenever
  `agent_name` is set in the runtime context. Default agent (no
  agent_name) and bootstrap flow are unchanged.
- New `<self_update>` system-prompt section is injected for custom agents,
  instructing them to use `update_agent` — and explicitly NOT bash /
  write_file — to persist self-updates.
- Tests: 11 new cases in `tests/test_update_agent_tool.py` covering
  validation (missing/invalid agent_name, unknown agent, no fields),
  partial updates (soul-only, description-only, skills=[] vs omitted),
  no-op detection, atomic-write safety, and AgentConfig round-tripping;
  plus 2 new cases in `tests/test_lead_agent_prompt.py` covering the
  self-update prompt section.
- Docs: updated backend/CLAUDE.md builtin tools list and tools.mdx
  (en/zh) with the new tool description.

* feat(agent): isolate custom agents per user

Store custom agent definitions under the effective user, keep legacy agents readable until migration, and cover API/tool/migration behavior with tests.

Co-authored-by: Cursor <cursoragent@cursor.com>

* feat: consistent write/delete targets & add --user-id to migration

---------

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
yangzheli
2026-05-05 23:17:42 +08:00
committed by GitHub
parent e8675f266d
commit 59c4a3f0a4
18 changed files with 955 additions and 60 deletions
+16 -2
View File
@@ -537,7 +537,10 @@ class TestAgentsAPI:
def test_create_persists_files_on_disk(self, agent_client, tmp_path):
agent_client.post("/api/agents", json={"name": "disk-check", "soul": "disk soul"})
agent_dir = tmp_path / "agents" / "disk-check"
# tests/conftest.py installs an autouse fixture that sets the
# contextvar to "test-user-autouse", so the agent is persisted under
# users/test-user-autouse/agents/ rather than the legacy shared dir.
agent_dir = tmp_path / "users" / "test-user-autouse" / "agents" / "disk-check"
assert agent_dir.exists()
assert (agent_dir / "config.yaml").exists()
assert (agent_dir / "SOUL.md").exists()
@@ -545,12 +548,23 @@ class TestAgentsAPI:
def test_delete_removes_files_from_disk(self, agent_client, tmp_path):
agent_client.post("/api/agents", json={"name": "remove-me", "soul": "bye"})
agent_dir = tmp_path / "agents" / "remove-me"
agent_dir = tmp_path / "users" / "test-user-autouse" / "agents" / "remove-me"
assert agent_dir.exists()
agent_client.delete("/api/agents/remove-me")
assert not agent_dir.exists()
def test_create_rejects_legacy_name_collision(self, agent_client, tmp_path):
"""An unmigrated legacy agent must still block name collision so that
running the migration script later won't shadow the legacy entry."""
legacy_dir = tmp_path / "agents" / "legacy-agent"
legacy_dir.mkdir(parents=True)
(legacy_dir / "config.yaml").write_text("name: legacy-agent\n", encoding="utf-8")
(legacy_dir / "SOUL.md").write_text("legacy soul", encoding="utf-8")
response = agent_client.post("/api/agents", json={"name": "legacy-agent", "soul": "x"})
assert response.status_code == 409
# ===========================================================================
# 9. Gateway API User Profile endpoints
+12
View File
@@ -17,6 +17,18 @@ def _set_skills_cache_state(*, skills=None, active=False, version=0):
prompt_module._enabled_skills_refresh_event.clear()
def test_build_self_update_section_empty_for_default_agent():
assert prompt_module._build_self_update_section(None) == ""
def test_build_self_update_section_present_for_custom_agent():
section = prompt_module._build_self_update_section("my-agent")
assert "<self_update>" in section
assert "my-agent" in section
assert "update_agent" in section
def test_build_custom_mounts_section_returns_empty_when_no_mounts(monkeypatch):
config = SimpleNamespace(sandbox=SimpleNamespace(mounts=[]))
monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
@@ -125,3 +125,68 @@ class TestMigrateMemory:
from scripts.migrate_user_isolation import migrate_memory
migrate_memory(paths, user_id="default") # should not raise
class TestMigrateAgents:
@staticmethod
def _seed_legacy_agent(paths: Paths, name: str, *, soul: str = "soul", description: str = "d") -> Path:
legacy_dir = paths.agents_dir / name
legacy_dir.mkdir(parents=True, exist_ok=True)
(legacy_dir / "config.yaml").write_text(f"name: {name}\ndescription: {description}\n", encoding="utf-8")
(legacy_dir / "SOUL.md").write_text(soul, encoding="utf-8")
return legacy_dir
def test_moves_legacy_into_user_layout(self, base_dir: Path, paths: Paths):
self._seed_legacy_agent(paths, "agent-a", soul="soul-a")
self._seed_legacy_agent(paths, "agent-b", soul="soul-b")
from scripts.migrate_user_isolation import migrate_agents
report = migrate_agents(paths, user_id="default")
assert {entry["agent"] for entry in report} == {"agent-a", "agent-b"}
for entry in report:
assert entry["user_id"] == "default"
assert "moved -> " in entry["action"]
for name, soul in [("agent-a", "soul-a"), ("agent-b", "soul-b")]:
dest = paths.user_agent_dir("default", name)
assert dest.exists(), f"{name} should have moved into the per-user layout"
assert (dest / "SOUL.md").read_text() == soul
# Legacy agents/ root is cleaned up once empty.
assert not paths.agents_dir.exists()
def test_dry_run_does_not_move(self, base_dir: Path, paths: Paths):
legacy_dir = self._seed_legacy_agent(paths, "agent-a")
from scripts.migrate_user_isolation import migrate_agents
report = migrate_agents(paths, user_id="default", dry_run=True)
assert len(report) == 1
assert legacy_dir.exists(), "dry-run must not touch the filesystem"
assert not paths.user_agent_dir("default", "agent-a").exists()
def test_existing_destination_is_treated_as_conflict(self, base_dir: Path, paths: Paths):
self._seed_legacy_agent(paths, "agent-a", soul="legacy soul")
dest = paths.user_agent_dir("default", "agent-a")
dest.mkdir(parents=True)
(dest / "SOUL.md").write_text("preexisting", encoding="utf-8")
from scripts.migrate_user_isolation import migrate_agents
report = migrate_agents(paths, user_id="default")
assert report[0]["action"].startswith("conflict -> ")
# Per-user destination must be left untouched.
assert (dest / "SOUL.md").read_text() == "preexisting"
# Legacy copy lands under migration-conflicts/agents/.
conflicts_dir = paths.base_dir / "migration-conflicts" / "agents" / "agent-a"
assert (conflicts_dir / "SOUL.md").read_text() == "legacy soul"
def test_no_legacy_dir_is_noop(self, base_dir: Path, paths: Paths):
from scripts.migrate_user_isolation import migrate_agents
report = migrate_agents(paths, user_id="default")
assert report == []
@@ -50,6 +50,21 @@ class TestUserAgentMemoryFile:
assert paths.user_agent_memory_file("bob", "MyAgent") == expected
class TestUserAgentDir:
def test_user_agents_dir(self, paths: Paths):
assert paths.user_agents_dir("alice") == paths.base_dir / "users" / "alice" / "agents"
def test_user_agent_dir(self, paths: Paths):
assert paths.user_agent_dir("alice", "code-reviewer") == paths.base_dir / "users" / "alice" / "agents" / "code-reviewer"
def test_user_agent_dir_lowercases_name(self, paths: Paths):
assert paths.user_agent_dir("alice", "CodeReviewer") == paths.base_dir / "users" / "alice" / "agents" / "codereviewer"
def test_user_agent_dir_validates_user_id(self, paths: Paths):
with pytest.raises(ValueError, match="Invalid user_id"):
paths.user_agent_dir("../escape", "myagent")
class TestUserThreadDir:
def test_user_thread_dir(self, paths: Paths):
expected = paths.base_dir / "users" / "u1" / "threads" / "t1"
+7 -6
View File
@@ -27,6 +27,7 @@ def _make_paths_mock(tmp_path: Path):
paths = MagicMock()
paths.base_dir = tmp_path
paths.agent_dir = lambda name: tmp_path / "agents" / name
paths.user_agent_dir = lambda user_id, name: tmp_path / "users" / user_id / "agents" / name
return paths
@@ -54,7 +55,7 @@ def test_setup_agent_rejects_invalid_agent_name_before_writing(tmp_path, monkeyp
messages = result.update["messages"]
assert len(messages) == 1
assert "Invalid agent name" in messages[0].content
assert not (tmp_path / "agents").exists()
assert not (tmp_path / "users" / "test-user-autouse" / "agents").exists()
assert not (outside_dir / "evil" / "SOUL.md").exists()
@@ -68,7 +69,7 @@ def test_setup_agent_rejects_absolute_agent_name_before_writing(tmp_path, monkey
messages = result.update["messages"]
assert len(messages) == 1
assert "Invalid agent name" in messages[0].content
assert not (tmp_path / "agents").exists()
assert not (tmp_path / "users" / "test-user-autouse" / "agents").exists()
assert not (Path(absolute_agent) / "SOUL.md").exists()
@@ -81,10 +82,10 @@ class TestSetupAgentNoDataLoss:
def test_existing_agent_dir_preserved_on_failure(self, tmp_path: Path):
"""If the agent directory already exists and setup fails,
the directory and its contents must NOT be deleted."""
agent_dir = tmp_path / "agents" / "test-agent"
agent_dir = tmp_path / "users" / "test-user-autouse" / "agents" / "test-agent"
agent_dir.mkdir(parents=True)
old_soul = agent_dir / "SOUL.md"
old_soul.write_text("original soul content")
old_soul.write_text("original soul content", encoding="utf-8")
with patch("deerflow.tools.builtins.setup_agent_tool.get_paths", return_value=_make_paths_mock(tmp_path)):
# Force soul_file.write_text to raise after directory already exists
@@ -103,7 +104,7 @@ class TestSetupAgentNoDataLoss:
def test_new_agent_dir_cleaned_up_on_failure(self, tmp_path: Path):
"""If the agent directory is newly created and setup fails,
the directory should be cleaned up."""
agent_dir = tmp_path / "agents" / "test-agent"
agent_dir = tmp_path / "users" / "test-user-autouse" / "agents" / "test-agent"
assert not agent_dir.exists()
with patch("deerflow.tools.builtins.setup_agent_tool.get_paths", return_value=_make_paths_mock(tmp_path)):
@@ -121,7 +122,7 @@ class TestSetupAgentNoDataLoss:
"""Happy path: setup_agent creates config.yaml and SOUL.md."""
_call_setup_agent(tmp_path, soul="# My Agent", description="A test agent")
agent_dir = tmp_path / "agents" / "test-agent"
agent_dir = tmp_path / "users" / "test-user-autouse" / "agents" / "test-agent"
assert agent_dir.exists()
assert (agent_dir / "SOUL.md").read_text() == "# My Agent"
assert (agent_dir / "config.yaml").exists()
+310
View File
@@ -0,0 +1,310 @@
"""Tests for update_agent tool — partial updates, atomic writes, and validation.
Resolves issue #2616: a custom agent must be able to persist updates to its
own SOUL.md / config.yaml from inside a normal chat (not only from bootstrap).
The tool writes per-user (``{base_dir}/users/{user_id}/agents/{name}/``) so
that one user's update cannot mutate another user's agent.
"""
from __future__ import annotations
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
import yaml
from deerflow.config.agents_config import AgentConfig
from deerflow.tools.builtins.update_agent_tool import update_agent
DEFAULT_USER = "test-user-autouse" # matches the autouse fixture in tests/conftest.py
class _DummyRuntime(SimpleNamespace):
context: dict
tool_call_id: str
def _runtime(agent_name: str | None = "test-agent", tool_call_id: str = "call_1") -> _DummyRuntime:
return _DummyRuntime(context={"agent_name": agent_name} if agent_name is not None else {}, tool_call_id=tool_call_id)
def _make_paths_mock(tmp_path: Path) -> MagicMock:
paths = MagicMock()
paths.base_dir = tmp_path
paths.agent_dir = lambda name: tmp_path / "agents" / name
paths.agents_dir = tmp_path / "agents"
paths.user_agent_dir = lambda user_id, name: tmp_path / "users" / user_id / "agents" / name
paths.user_agents_dir = lambda user_id: tmp_path / "users" / user_id / "agents"
return paths
def _user_agent_dir(tmp_path: Path, name: str = "test-agent", user_id: str = DEFAULT_USER) -> Path:
return tmp_path / "users" / user_id / "agents" / name
def _seed_agent(
tmp_path: Path,
name: str = "test-agent",
*,
description: str = "old desc",
soul: str = "old soul",
skills: list[str] | None = None,
user_id: str = DEFAULT_USER,
) -> Path:
"""Create a baseline agent dir with config.yaml and SOUL.md for tests to mutate."""
agent_dir = _user_agent_dir(tmp_path, name, user_id=user_id)
agent_dir.mkdir(parents=True, exist_ok=True)
cfg: dict = {"name": name, "description": description}
if skills is not None:
cfg["skills"] = skills
(agent_dir / "config.yaml").write_text(yaml.safe_dump(cfg, sort_keys=False), encoding="utf-8")
(agent_dir / "SOUL.md").write_text(soul, encoding="utf-8")
return agent_dir
@pytest.fixture()
def patched_paths(tmp_path: Path):
paths_mock = _make_paths_mock(tmp_path)
with patch("deerflow.tools.builtins.update_agent_tool.get_paths", return_value=paths_mock):
# load_agent_config also calls get_paths(); patch the same target it uses.
with patch("deerflow.config.agents_config.get_paths", return_value=paths_mock):
yield paths_mock
@pytest.fixture()
def stub_app_config():
"""Stub get_app_config so model validation accepts only known names."""
fake = MagicMock()
fake.get_model_config.side_effect = lambda name: object() if name in {"gpt-known", "m1"} else None
with patch("deerflow.tools.builtins.update_agent_tool.get_app_config", return_value=fake):
yield fake
# --- Validation tests ---
def test_update_agent_rejects_missing_agent_name(patched_paths):
result = update_agent.func(runtime=_runtime(agent_name=None), soul="new soul")
msg = result.update["messages"][0]
assert "only available inside a custom agent's chat" in msg.content
def test_update_agent_rejects_invalid_agent_name(patched_paths):
result = update_agent.func(runtime=_runtime(agent_name="../../etc/passwd"), soul="x")
msg = result.update["messages"][0]
assert "Invalid agent name" in msg.content
def test_update_agent_rejects_unknown_agent(tmp_path, patched_paths):
result = update_agent.func(runtime=_runtime(agent_name="ghost"), soul="x")
msg = result.update["messages"][0]
assert "does not exist" in msg.content
assert not _user_agent_dir(tmp_path, "ghost").exists()
def test_update_agent_requires_at_least_one_field(tmp_path, patched_paths):
_seed_agent(tmp_path)
result = update_agent.func(runtime=_runtime())
msg = result.update["messages"][0]
assert "No fields provided" in msg.content
def test_update_agent_rejects_unknown_model(tmp_path, patched_paths, stub_app_config):
"""Copilot review: model must be validated against configured models before
being persisted; otherwise _resolve_model_name silently falls back to the
default and the user gets repeated warnings on every later turn."""
_seed_agent(tmp_path)
result = update_agent.func(runtime=_runtime(), model="not-in-config")
msg = result.update["messages"][0]
assert "Unknown model" in msg.content
cfg = yaml.safe_load((_user_agent_dir(tmp_path) / "config.yaml").read_text())
assert "model" not in cfg, "Invalid model must not have been written to config.yaml"
def test_update_agent_accepts_known_model(tmp_path, patched_paths, stub_app_config):
_seed_agent(tmp_path)
result = update_agent.func(runtime=_runtime(), model="gpt-known")
cfg = yaml.safe_load((_user_agent_dir(tmp_path) / "config.yaml").read_text())
assert cfg["model"] == "gpt-known"
assert "model" in result.update["messages"][0].content
# --- Partial update tests ---
def test_update_agent_updates_soul_only(tmp_path, patched_paths):
agent_dir = _seed_agent(tmp_path, description="keep me", soul="old soul")
result = update_agent.func(runtime=_runtime(), soul="brand new soul")
assert (agent_dir / "SOUL.md").read_text() == "brand new soul"
cfg = yaml.safe_load((agent_dir / "config.yaml").read_text())
assert cfg["description"] == "keep me", "description must be preserved"
assert "soul" in result.update["messages"][0].content
def test_update_agent_updates_description_only(tmp_path, patched_paths):
agent_dir = _seed_agent(tmp_path, description="old desc", soul="keep this soul")
result = update_agent.func(runtime=_runtime(), description="new desc")
cfg = yaml.safe_load((agent_dir / "config.yaml").read_text())
assert cfg["description"] == "new desc"
assert (agent_dir / "SOUL.md").read_text() == "keep this soul", "SOUL.md must be preserved"
assert "description" in result.update["messages"][0].content
def test_update_agent_skills_empty_list_disables_all(tmp_path, patched_paths):
agent_dir = _seed_agent(tmp_path, skills=["a", "b"])
result = update_agent.func(runtime=_runtime(), skills=[])
cfg = yaml.safe_load((agent_dir / "config.yaml").read_text())
assert cfg["skills"] == [], "empty list must persist as empty list (not be omitted)"
assert "skills" in result.update["messages"][0].content
def test_update_agent_skills_omitted_keeps_existing(tmp_path, patched_paths):
agent_dir = _seed_agent(tmp_path, skills=["alpha", "beta"])
update_agent.func(runtime=_runtime(), description="bumped")
cfg = yaml.safe_load((agent_dir / "config.yaml").read_text())
assert cfg["skills"] == ["alpha", "beta"], "omitting skills must preserve the existing whitelist"
def test_update_agent_no_op_when_values_match_existing(tmp_path, patched_paths):
_seed_agent(tmp_path, description="same")
result = update_agent.func(runtime=_runtime(), description="same")
assert "No changes applied" in result.update["messages"][0].content
def test_update_agent_forces_name_to_directory(tmp_path, patched_paths):
"""Copilot review: if the existing config.yaml has a drifted ``name`` field,
update_agent must rewrite it to match the directory name so on-disk state
stays consistent with the runtime context."""
agent_dir = _user_agent_dir(tmp_path)
agent_dir.mkdir(parents=True)
(agent_dir / "config.yaml").write_text(yaml.safe_dump({"name": "drifted-name", "description": "old"}, sort_keys=False), encoding="utf-8")
(agent_dir / "SOUL.md").write_text("soul", encoding="utf-8")
update_agent.func(runtime=_runtime(), description="bumped")
cfg = yaml.safe_load((agent_dir / "config.yaml").read_text())
assert cfg["name"] == "test-agent", "config.yaml name must follow the directory name, not legacy yaml content"
# --- Atomicity tests ---
def test_update_agent_failure_preserves_existing_files(tmp_path, patched_paths):
agent_dir = _seed_agent(tmp_path, soul="original soul")
real_replace = Path.replace
def _explode(self, target):
if str(target).endswith("SOUL.md"):
raise OSError("disk full")
return real_replace(self, target)
with patch.object(Path, "replace", _explode):
result = update_agent.func(runtime=_runtime(), soul="poisoned content")
assert (agent_dir / "SOUL.md").read_text() == "original soul", "atomic write must not corrupt existing SOUL.md"
assert "Error" in result.update["messages"][0].content
leftover_tmps = list(agent_dir.glob("*.tmp"))
assert leftover_tmps == [], "temp files must be cleaned up on failure"
def test_update_agent_soul_failure_does_not_replace_config(tmp_path, patched_paths):
"""Copilot review: if both config.yaml and SOUL.md are scheduled to be
written and SOUL.md staging fails *before* any rename, config.yaml must
NOT be replaced. The fix stages every temp file first and only renames
after all temps exist on disk."""
agent_dir = _seed_agent(tmp_path, description="original-desc", soul="original soul")
real_named_temp_file = __import__("tempfile").NamedTemporaryFile
call_count = {"n": 0}
def _explode_on_soul(*args, **kwargs):
# Inspect target dir + suffix; the SOUL temp file is the second one we stage.
call_count["n"] += 1
if call_count["n"] >= 2:
raise OSError("disk full while staging SOUL.md")
return real_named_temp_file(*args, **kwargs)
with patch("deerflow.tools.builtins.update_agent_tool.tempfile.NamedTemporaryFile", side_effect=_explode_on_soul):
result = update_agent.func(runtime=_runtime(), description="new-desc", soul="new soul")
cfg = yaml.safe_load((agent_dir / "config.yaml").read_text())
assert cfg["description"] == "original-desc", "config.yaml must not be replaced when SOUL.md staging fails"
assert (agent_dir / "SOUL.md").read_text() == "original soul"
assert "Error" in result.update["messages"][0].content
assert list(agent_dir.glob("*.tmp")) == [], "staged config.yaml temp must be cleaned up on SOUL.md failure"
# --- Per-user isolation ---
def test_update_agent_only_writes_under_current_user(tmp_path, patched_paths):
"""An update from user 'alice' must never touch user 'bob's agent files."""
from deerflow.runtime.user_context import reset_current_user, set_current_user
# Seed an agent for both users with the same name.
alice_dir = _seed_agent(tmp_path, name="shared", description="alice-desc", soul="alice soul", user_id="alice")
bob_dir = _seed_agent(tmp_path, name="shared", description="bob-desc", soul="bob soul", user_id="bob")
# Override the autouse contextvar so update_agent runs as Alice.
token = set_current_user(SimpleNamespace(id="alice"))
try:
update_agent.func(runtime=_runtime(agent_name="shared"), description="alice-bumped")
finally:
reset_current_user(token)
alice_cfg = yaml.safe_load((alice_dir / "config.yaml").read_text())
bob_cfg = yaml.safe_load((bob_dir / "config.yaml").read_text())
assert alice_cfg["description"] == "alice-bumped"
assert bob_cfg["description"] == "bob-desc", "bob's config.yaml must not have been touched"
assert (bob_dir / "SOUL.md").read_text() == "bob soul"
# --- Loader passthrough sanity check ---
def test_update_agent_round_trips_known_fields(tmp_path, patched_paths):
"""update_agent reads through load_agent_config so all fields the loader
knows about (name, description, model, tool_groups, skills) round-trip
on a partial update.
Note: ``load_agent_config`` strips unknown fields before constructing
AgentConfig, so legacy/extra YAML keys are NOT preserved across
updates — by design.
"""
_seed_agent(tmp_path, description="legacy")
fake_cfg = AgentConfig(name="test-agent", description="legacy", skills=["s1"], tool_groups=["g1"], model="m1")
fake_app_config = MagicMock()
fake_app_config.get_model_config.return_value = object()
with patch("deerflow.tools.builtins.update_agent_tool.load_agent_config", return_value=fake_cfg):
with patch("deerflow.tools.builtins.update_agent_tool.get_app_config", return_value=fake_app_config):
update_agent.func(runtime=_runtime(), description="bumped")
cfg = yaml.safe_load((_user_agent_dir(tmp_path) / "config.yaml").read_text())
assert cfg["description"] == "bumped"
assert cfg["skills"] == ["s1"]
assert cfg["tool_groups"] == ["g1"]
assert cfg["model"] == "m1"