fix(agents): sync agent_name across context/configurable and reject empty soul (#3549) (#3553)

* fix(agents): sync agent_name across context/configurable and reject empty soul (#3549) Two independent issues caused custom agent creation to silently fail: 1. build_run_config only wrote agent_name into one container (configurable or context), so setup_agent — which reads ToolRuntime.context exclusively since LangGraph >=1.1.9 — saw agent_name=None and wrote SOUL.md to the global base_dir instead of users/{user_id}/agents/{name}/. Mirror the dual-write pattern already used by merge_run_context_overrides and naming.py so both containers always carry the same value. 2. setup_agent persisted whatever soul string it received, including empty or whitespace-only content, and still reported success. The frontend then surfaced an unusable agent and the global default SOUL.md could be silently overwritten with empty content. Reject empty soul before any filesystem operation so the model can retry. Tests: - test_gateway_services.py: dual-write regressions for both configurable and context entry paths, explicit-agent-name precedence on both sides, and a shape-parity test against merge_run_context_overrides. - test_setup_agent_tool.py: empty/whitespace soul rejection, plus no-overwrite guarantees for existing global and per-agent SOUL.md. * Update services.py
2026-06-14 03:15:58 +00:00 · 2026-06-14 10:40:16 +08:00
parent 47e9570d86
commit f43aa78107
4 changed files with 158 additions and 19 deletions
@@ -211,11 +211,14 @@ def build_run_config(

    When *assistant_id* refers to a custom agent (anything other than
    ``"lead_agent"`` / ``None``), the name is forwarded as ``agent_name`` in
-    whichever runtime options container is active: ``context`` for
-    LangGraph >= 0.6.0 requests, otherwise ``configurable``.
-    ``make_lead_agent`` reads this key to load the matching
-    ``agents/<name>/SOUL.md`` and per-agent config — without it the agent
-    silently runs as the default lead agent.
+    both ``configurable`` and ``context`` so it is visible to legacy
+    configurable readers and to LangGraph ``ToolRuntime.context`` consumers
+    (e.g. the ``setup_agent`` tool, which since LangGraph >=1.1.9 no longer
+    falls back from ``context`` to ``configurable``).  An explicit
+    ``agent_name`` in either container takes precedence over the value
+    derived from ``assistant_id``.  ``make_lead_agent`` reads this key to
+    load the matching ``agents/<name>/SOUL.md`` and per-agent config —
+    without it the agent silently runs as the default lead agent.

    This mirrors the channel manager's ``_resolve_run_params`` logic so that
    the LangGraph Platform-compatible HTTP API and the IM channel path behave
@@ -253,19 +256,23 @@ def build_run_config(
        config["configurable"] = {"thread_id": thread_id}

    # Inject custom agent name when the caller specified a non-default assistant.
-    # Honour an explicit agent_name in the active runtime options container.
+    # Honour an explicit agent_name in either runtime options container.
    if assistant_id and assistant_id != _DEFAULT_ASSISTANT_ID:
        normalized = assistant_id.strip().lower().replace("_", "-")
        if not normalized or not re.fullmatch(r"[a-z0-9-]+", normalized):
            raise ValueError(f"Invalid assistant_id {assistant_id!r}: must contain only letters, digits, and hyphens after normalization.")
-        if "configurable" in config:
-            target = config["configurable"]
-        elif "context" in config:
-            target = config["context"]
-        else:
-            target = config.setdefault("configurable", {})
-        if target is not None and "agent_name" not in target:
-            target["agent_name"] = normalized
+        configurable = config.setdefault("configurable", {})
+        runtime_context = config.setdefault("context", {})
+        explicit_agent_name: str | None = None
+        if isinstance(configurable, dict) and isinstance(configurable.get("agent_name"), str):
+            explicit_agent_name = configurable["agent_name"]
+        elif isinstance(runtime_context, dict) and isinstance(runtime_context.get("agent_name"), str):
+            explicit_agent_name = runtime_context["agent_name"]
+        effective_agent_name = explicit_agent_name or normalized
+        if isinstance(configurable, dict):
+            configurable["agent_name"] = effective_agent_name
+        if isinstance(runtime_context, dict):
+            runtime_context["agent_name"] = effective_agent_name
        config.setdefault("run_name", resolve_root_run_name(config, normalized))
    if metadata:
        config.setdefault("metadata", {}).update(metadata)
@@ -28,6 +28,25 @@ def setup_agent(
        skills: Optional list of skill names this agent should use. None means use all enabled skills, empty list means no skills.
    """

+    # Reject empty / whitespace-only soul before touching the filesystem.
+    # Without this guard the tool would happily persist an empty SOUL.md and
+    # still report success, which caused the frontend to enter the "agent
+    # created" state for an unusable agent (issue #3549). Failing loud lets
+    # the model retry instead of silently producing a broken artifact and,
+    # together with the upstream agent_name fix, prevents the global default
+    # SOUL.md from being overwritten with empty content.
+    if not soul or not soul.strip():
+        return Command(
+            update={
+                "messages": [
+                    ToolMessage(
+                        content="Error: soul content is empty; refusing to create agent with an empty SOUL.md",
+                        tool_call_id=runtime.tool_call_id,
+                    )
+                ]
+            }
+        )
+
    agent_name: str | None = runtime.context.get("agent_name") if runtime.context else None
    agent_dir = None
    is_new_dir = False
@@ -252,11 +252,17 @@ def test_build_run_config_explicit_agent_name_not_overwritten():
        assistant_id="other-agent",
    )
    assert config["configurable"]["agent_name"] == "explicit-agent"
+    assert config["context"]["agent_name"] == "explicit-agent"
    assert config["run_name"] == "explicit-agent"


 def test_build_run_config_context_custom_agent_injects_agent_name():
-    """Custom assistant_id must be forwarded as context['agent_name'] in context mode."""
+    """Custom assistant_id must be forwarded as ``agent_name`` in both
+    ``context`` and ``configurable`` (issue #3549). Previously only the
+    active container was populated, so when the caller sent context-only the
+    setup_agent tool — which reads ``ToolRuntime.context`` — saw
+    ``agent_name=None`` and wrote SOUL.md to the global base_dir.
+    """
    from app.gateway.services import build_run_config

    config = build_run_config(
@@ -267,7 +273,7 @@ def test_build_run_config_context_custom_agent_injects_agent_name():
    )

    assert config["context"]["agent_name"] == "finalis"
-    assert "configurable" not in config
+    assert config["configurable"]["agent_name"] == "finalis"


 def test_resolve_agent_factory_returns_make_lead_agent():
@@ -281,6 +287,56 @@ def test_resolve_agent_factory_returns_make_lead_agent():
    assert resolve_agent_factory("custom-agent-123") is make_lead_agent


+def test_build_run_config_configurable_custom_agent_dual_writes_agent_name():
+    """Regression for issue #3549: even when the caller uses the legacy
+    ``configurable`` path, ``agent_name`` must also land in
+    ``config['context']`` so LangGraph >=1.1.9 ``ToolRuntime.context`` consumers
+    (e.g. ``setup_agent``) observe the same value.
+    """
+    from app.gateway.services import build_run_config
+
+    config = build_run_config("thread-1", None, None, assistant_id="finalis")
+
+    assert config["configurable"]["agent_name"] == "finalis"
+    assert config["context"]["agent_name"] == "finalis"
+
+
+def test_build_run_config_context_explicit_agent_name_not_overwritten():
+    """An explicit ``context['agent_name']`` from the request must take
+    precedence over the value derived from ``assistant_id`` and be mirrored
+    to ``configurable`` so the two containers never diverge.
+    """
+    from app.gateway.services import build_run_config
+
+    config = build_run_config(
+        "thread-1",
+        {"context": {"agent_name": "explicit-agent"}},
+        None,
+        assistant_id="other-agent",
+    )
+
+    assert config["context"]["agent_name"] == "explicit-agent"
+    assert config["configurable"]["agent_name"] == "explicit-agent"
+    assert config["run_name"] == "explicit-agent"
+
+
+def test_build_run_config_dual_write_matches_merge_run_context_overrides_shape():
+    """The shape produced by ``build_run_config`` for a custom agent must be
+    indistinguishable from what ``merge_run_context_overrides`` would produce
+    when ``agent_name`` is supplied via ``body.context`` — guarding against
+    the two code paths drifting apart again (issue #3549).
+    """
+    from app.gateway.services import build_run_config, merge_run_context_overrides
+
+    via_assistant_id = build_run_config("thread-1", None, None, assistant_id="finalis")
+
+    via_context = build_run_config("thread-1", None, None)
+    merge_run_context_overrides(via_context, {"agent_name": "finalis"})
+
+    assert via_assistant_id["configurable"]["agent_name"] == via_context["configurable"]["agent_name"]
+    assert via_assistant_id["context"]["agent_name"] == via_context["context"]["agent_name"]
+
+
 # ---------------------------------------------------------------------------

 # ---------------------------------------------------------------------------
@@ -610,13 +666,15 @@ def test_build_run_config_rejects_non_mapping_context():


 def test_build_run_config_null_context_custom_agent_injects_agent_name():
-    """Custom assistant_id can still be injected when context=null starts context mode."""
+    """Custom assistant_id must be injected into both containers even when the
+    request started in context-only mode with ``context=null`` .
+    """
    from app.gateway.services import build_run_config

    config = build_run_config("thread-1", {"context": None}, None, assistant_id="finalis")

-    assert config["context"] == {"agent_name": "finalis"}
-    assert "configurable" not in config
+    assert config["context"]["agent_name"] == "finalis"
+    assert config["configurable"]["agent_name"] == "finalis"


 def test_build_run_config_context_plus_configurable_warns(caplog):
@@ -148,3 +148,58 @@ class TestSetupAgentNoDataLoss:
        default_dir = tmp_path / "users" / "default" / "agents" / "test-agent"
        assert (expected_dir / "SOUL.md").read_text() == "# My Agent"
        assert not default_dir.exists()
+
+
+# --- Empty soul guard tests  ---
+
+
+class TestSetupAgentEmptySoulGuard:
+    """The tool must refuse to persist an empty / whitespace-only SOUL.md and
+    must not touch the filesystem at all, so an existing SOUL.md (per-agent or
+    global default) cannot be silently overwritten with empty content.
+    """
+
+    def test_empty_soul_returns_error_and_does_not_write(self, tmp_path: Path):
+        result = _call_setup_agent(tmp_path, soul="", description="desc")
+
+        messages = result.update["messages"]
+        assert len(messages) == 1
+        assert "soul content is empty" in messages[0].content
+        assert "created_agent_name" not in result.update
+        agent_dir = tmp_path / "users" / "test-user-autouse" / "agents" / "test-agent"
+        assert not agent_dir.exists()
+
+    def test_whitespace_only_soul_returns_error_and_does_not_write(self, tmp_path: Path):
+        result = _call_setup_agent(tmp_path, soul="   \n\t  ", description="desc")
+
+        messages = result.update["messages"]
+        assert len(messages) == 1
+        assert "soul content is empty" in messages[0].content
+        agent_dir = tmp_path / "users" / "test-user-autouse" / "agents" / "test-agent"
+        assert not agent_dir.exists()
+
+    def test_empty_soul_does_not_overwrite_existing_global_soul(self, tmp_path: Path):
+        """If agent_name resolution would have fallen back to base_dir, an
+        empty soul must not clobber a pre-existing global SOUL.md.
+        """
+        global_soul = tmp_path / "SOUL.md"
+        global_soul.write_text("original global soul", encoding="utf-8")
+
+        with patch("deerflow.tools.builtins.setup_agent_tool.get_paths", return_value=_make_paths_mock(tmp_path)):
+            setup_agent.func(
+                soul="",
+                description="desc",
+                runtime=_DummyRuntime(context={"agent_name": None}, tool_call_id="tool-empty"),
+            )
+
+        assert global_soul.read_text(encoding="utf-8") == "original global soul"
+
+    def test_empty_soul_does_not_overwrite_existing_per_agent_soul(self, tmp_path: Path):
+        agent_dir = tmp_path / "users" / "test-user-autouse" / "agents" / "test-agent"
+        agent_dir.mkdir(parents=True)
+        existing_soul = agent_dir / "SOUL.md"
+        existing_soul.write_text("original per-agent soul", encoding="utf-8")
+
+        _call_setup_agent(tmp_path, soul="   ", description="desc")
+
+        assert existing_soul.read_text(encoding="utf-8") == "original per-agent soul"