fix(agents): sync agent_name across context/configurable and reject empty soul (#3549) (#3553)

* fix(agents): sync agent_name across context/configurable and reject empty soul (#3549)

Two independent issues caused custom agent creation to silently fail:

1. build_run_config only wrote agent_name into one container (configurable
   or context), so setup_agent — which reads ToolRuntime.context exclusively
   since LangGraph >=1.1.9 — saw agent_name=None and wrote SOUL.md to the
   global base_dir instead of users/{user_id}/agents/{name}/. Mirror the
   dual-write pattern already used by merge_run_context_overrides and
   naming.py so both containers always carry the same value.

2. setup_agent persisted whatever soul string it received, including empty
   or whitespace-only content, and still reported success. The frontend
   then surfaced an unusable agent and the global default SOUL.md could be
   silently overwritten with empty content. Reject empty soul before any
   filesystem operation so the model can retry.

Tests:
- test_gateway_services.py: dual-write regressions for both configurable
  and context entry paths, explicit-agent-name precedence on both sides,
  and a shape-parity test against merge_run_context_overrides.
- test_setup_agent_tool.py: empty/whitespace soul rejection, plus
  no-overwrite guarantees for existing global and per-agent SOUL.md.

* Update services.py
This commit is contained in:
Huixin615
2026-06-14 10:40:16 +08:00
committed by GitHub
parent 47e9570d86
commit f43aa78107
4 changed files with 158 additions and 19 deletions
+21 -14
View File
@@ -211,11 +211,14 @@ def build_run_config(
When *assistant_id* refers to a custom agent (anything other than
``"lead_agent"`` / ``None``), the name is forwarded as ``agent_name`` in
whichever runtime options container is active: ``context`` for
LangGraph >= 0.6.0 requests, otherwise ``configurable``.
``make_lead_agent`` reads this key to load the matching
``agents/<name>/SOUL.md`` and per-agent config — without it the agent
silently runs as the default lead agent.
both ``configurable`` and ``context`` so it is visible to legacy
configurable readers and to LangGraph ``ToolRuntime.context`` consumers
(e.g. the ``setup_agent`` tool, which since LangGraph >=1.1.9 no longer
falls back from ``context`` to ``configurable``). An explicit
``agent_name`` in either container takes precedence over the value
derived from ``assistant_id``. ``make_lead_agent`` reads this key to
load the matching ``agents/<name>/SOUL.md`` and per-agent config —
without it the agent silently runs as the default lead agent.
This mirrors the channel manager's ``_resolve_run_params`` logic so that
the LangGraph Platform-compatible HTTP API and the IM channel path behave
@@ -253,19 +256,23 @@ def build_run_config(
config["configurable"] = {"thread_id": thread_id}
# Inject custom agent name when the caller specified a non-default assistant.
# Honour an explicit agent_name in the active runtime options container.
# Honour an explicit agent_name in either runtime options container.
if assistant_id and assistant_id != _DEFAULT_ASSISTANT_ID:
normalized = assistant_id.strip().lower().replace("_", "-")
if not normalized or not re.fullmatch(r"[a-z0-9-]+", normalized):
raise ValueError(f"Invalid assistant_id {assistant_id!r}: must contain only letters, digits, and hyphens after normalization.")
if "configurable" in config:
target = config["configurable"]
elif "context" in config:
target = config["context"]
else:
target = config.setdefault("configurable", {})
if target is not None and "agent_name" not in target:
target["agent_name"] = normalized
configurable = config.setdefault("configurable", {})
runtime_context = config.setdefault("context", {})
explicit_agent_name: str | None = None
if isinstance(configurable, dict) and isinstance(configurable.get("agent_name"), str):
explicit_agent_name = configurable["agent_name"]
elif isinstance(runtime_context, dict) and isinstance(runtime_context.get("agent_name"), str):
explicit_agent_name = runtime_context["agent_name"]
effective_agent_name = explicit_agent_name or normalized
if isinstance(configurable, dict):
configurable["agent_name"] = effective_agent_name
if isinstance(runtime_context, dict):
runtime_context["agent_name"] = effective_agent_name
config.setdefault("run_name", resolve_root_run_name(config, normalized))
if metadata:
config.setdefault("metadata", {}).update(metadata)
@@ -28,6 +28,25 @@ def setup_agent(
skills: Optional list of skill names this agent should use. None means use all enabled skills, empty list means no skills.
"""
# Reject empty / whitespace-only soul before touching the filesystem.
# Without this guard the tool would happily persist an empty SOUL.md and
# still report success, which caused the frontend to enter the "agent
# created" state for an unusable agent (issue #3549). Failing loud lets
# the model retry instead of silently producing a broken artifact and,
# together with the upstream agent_name fix, prevents the global default
# SOUL.md from being overwritten with empty content.
if not soul or not soul.strip():
return Command(
update={
"messages": [
ToolMessage(
content="Error: soul content is empty; refusing to create agent with an empty SOUL.md",
tool_call_id=runtime.tool_call_id,
)
]
}
)
agent_name: str | None = runtime.context.get("agent_name") if runtime.context else None
agent_dir = None
is_new_dir = False
+63 -5
View File
@@ -252,11 +252,17 @@ def test_build_run_config_explicit_agent_name_not_overwritten():
assistant_id="other-agent",
)
assert config["configurable"]["agent_name"] == "explicit-agent"
assert config["context"]["agent_name"] == "explicit-agent"
assert config["run_name"] == "explicit-agent"
def test_build_run_config_context_custom_agent_injects_agent_name():
"""Custom assistant_id must be forwarded as context['agent_name'] in context mode."""
"""Custom assistant_id must be forwarded as ``agent_name`` in both
``context`` and ``configurable`` (issue #3549). Previously only the
active container was populated, so when the caller sent context-only the
setup_agent tool — which reads ``ToolRuntime.context`` — saw
``agent_name=None`` and wrote SOUL.md to the global base_dir.
"""
from app.gateway.services import build_run_config
config = build_run_config(
@@ -267,7 +273,7 @@ def test_build_run_config_context_custom_agent_injects_agent_name():
)
assert config["context"]["agent_name"] == "finalis"
assert "configurable" not in config
assert config["configurable"]["agent_name"] == "finalis"
def test_resolve_agent_factory_returns_make_lead_agent():
@@ -281,6 +287,56 @@ def test_resolve_agent_factory_returns_make_lead_agent():
assert resolve_agent_factory("custom-agent-123") is make_lead_agent
def test_build_run_config_configurable_custom_agent_dual_writes_agent_name():
"""Regression for issue #3549: even when the caller uses the legacy
``configurable`` path, ``agent_name`` must also land in
``config['context']`` so LangGraph >=1.1.9 ``ToolRuntime.context`` consumers
(e.g. ``setup_agent``) observe the same value.
"""
from app.gateway.services import build_run_config
config = build_run_config("thread-1", None, None, assistant_id="finalis")
assert config["configurable"]["agent_name"] == "finalis"
assert config["context"]["agent_name"] == "finalis"
def test_build_run_config_context_explicit_agent_name_not_overwritten():
"""An explicit ``context['agent_name']`` from the request must take
precedence over the value derived from ``assistant_id`` and be mirrored
to ``configurable`` so the two containers never diverge.
"""
from app.gateway.services import build_run_config
config = build_run_config(
"thread-1",
{"context": {"agent_name": "explicit-agent"}},
None,
assistant_id="other-agent",
)
assert config["context"]["agent_name"] == "explicit-agent"
assert config["configurable"]["agent_name"] == "explicit-agent"
assert config["run_name"] == "explicit-agent"
def test_build_run_config_dual_write_matches_merge_run_context_overrides_shape():
"""The shape produced by ``build_run_config`` for a custom agent must be
indistinguishable from what ``merge_run_context_overrides`` would produce
when ``agent_name`` is supplied via ``body.context`` — guarding against
the two code paths drifting apart again (issue #3549).
"""
from app.gateway.services import build_run_config, merge_run_context_overrides
via_assistant_id = build_run_config("thread-1", None, None, assistant_id="finalis")
via_context = build_run_config("thread-1", None, None)
merge_run_context_overrides(via_context, {"agent_name": "finalis"})
assert via_assistant_id["configurable"]["agent_name"] == via_context["configurable"]["agent_name"]
assert via_assistant_id["context"]["agent_name"] == via_context["context"]["agent_name"]
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
@@ -610,13 +666,15 @@ def test_build_run_config_rejects_non_mapping_context():
def test_build_run_config_null_context_custom_agent_injects_agent_name():
"""Custom assistant_id can still be injected when context=null starts context mode."""
"""Custom assistant_id must be injected into both containers even when the
request started in context-only mode with ``context=null`` .
"""
from app.gateway.services import build_run_config
config = build_run_config("thread-1", {"context": None}, None, assistant_id="finalis")
assert config["context"] == {"agent_name": "finalis"}
assert "configurable" not in config
assert config["context"]["agent_name"] == "finalis"
assert config["configurable"]["agent_name"] == "finalis"
def test_build_run_config_context_plus_configurable_warns(caplog):
+55
View File
@@ -148,3 +148,58 @@ class TestSetupAgentNoDataLoss:
default_dir = tmp_path / "users" / "default" / "agents" / "test-agent"
assert (expected_dir / "SOUL.md").read_text() == "# My Agent"
assert not default_dir.exists()
# --- Empty soul guard tests ---
class TestSetupAgentEmptySoulGuard:
"""The tool must refuse to persist an empty / whitespace-only SOUL.md and
must not touch the filesystem at all, so an existing SOUL.md (per-agent or
global default) cannot be silently overwritten with empty content.
"""
def test_empty_soul_returns_error_and_does_not_write(self, tmp_path: Path):
result = _call_setup_agent(tmp_path, soul="", description="desc")
messages = result.update["messages"]
assert len(messages) == 1
assert "soul content is empty" in messages[0].content
assert "created_agent_name" not in result.update
agent_dir = tmp_path / "users" / "test-user-autouse" / "agents" / "test-agent"
assert not agent_dir.exists()
def test_whitespace_only_soul_returns_error_and_does_not_write(self, tmp_path: Path):
result = _call_setup_agent(tmp_path, soul=" \n\t ", description="desc")
messages = result.update["messages"]
assert len(messages) == 1
assert "soul content is empty" in messages[0].content
agent_dir = tmp_path / "users" / "test-user-autouse" / "agents" / "test-agent"
assert not agent_dir.exists()
def test_empty_soul_does_not_overwrite_existing_global_soul(self, tmp_path: Path):
"""If agent_name resolution would have fallen back to base_dir, an
empty soul must not clobber a pre-existing global SOUL.md.
"""
global_soul = tmp_path / "SOUL.md"
global_soul.write_text("original global soul", encoding="utf-8")
with patch("deerflow.tools.builtins.setup_agent_tool.get_paths", return_value=_make_paths_mock(tmp_path)):
setup_agent.func(
soul="",
description="desc",
runtime=_DummyRuntime(context={"agent_name": None}, tool_call_id="tool-empty"),
)
assert global_soul.read_text(encoding="utf-8") == "original global soul"
def test_empty_soul_does_not_overwrite_existing_per_agent_soul(self, tmp_path: Path):
agent_dir = tmp_path / "users" / "test-user-autouse" / "agents" / "test-agent"
agent_dir.mkdir(parents=True)
existing_soul = agent_dir / "SOUL.md"
existing_soul.write_text("original per-agent soul", encoding="utf-8")
_call_setup_agent(tmp_path, soul=" ", description="desc")
assert existing_soul.read_text(encoding="utf-8") == "original per-agent soul"