From 813d3c94efa7fdea6aafcb4f459304db91fcaed0 Mon Sep 17 00:00:00 2001
From: Willem Jiang <willem.jiang@gmail.com>
Date: Mon, 11 May 2026 09:59:06 +0800
Subject: [PATCH 1/8] fix(subagents): consolidate system_prompt and skills into
 single SystemMessage (#2701)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(subagents): consolidate system_prompt and skills into single SystemMessage

  Some LLM APIs (vLLM, Xinference, Chinese LLM providers) reject multiple
  system messages with \”System message must be at the beginning.\” The
  subagent executor was sending separate SystemMessages for the configured
  system_prompt and each loaded skill, which caused failures when calling
  task tool with sub-agents.

  Merge system_prompt and all skill content into one SystemMessage in the
  initial state, and pass system_prompt=None to create_agent() so the
  factory doesn't prepend a second one.

Fixes #2693

* fix(subagents): update SubagentConfig.system_prompt to str | None and add astream regression test

Agent-Logs-Url: https://github.com/bytedance/deer-flow/sessions/2ee03a26-e19b-4106-abc5-c76a2906383b

Co-authored-by: WillemJiang <219644+WillemJiang@users.noreply.github.com>

* fixed the lint error

* fix the lint error in the backend

* fix the unit test error of test_subagent_executor

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
---
 .../harness/deerflow/subagents/config.py      |   2 +-
 .../harness/deerflow/subagents/executor.py    |  19 +-
 backend/tests/test_subagent_executor.py       | 184 +++++++++++++++++-
 3 files changed, 200 insertions(+), 5 deletions(-)

diff --git a/backend/packages/harness/deerflow/subagents/config.py b/backend/packages/harness/deerflow/subagents/config.py
index b0b094e28..9081e2df9 100644
--- a/backend/packages/harness/deerflow/subagents/config.py
+++ b/backend/packages/harness/deerflow/subagents/config.py
@@ -26,7 +26,7 @@ class SubagentConfig:
 
     name: str
     description: str
-    system_prompt: str
+    system_prompt: str | None = None
     tools: list[str] | None = None
     disallowed_tools: list[str] | None = field(default_factory=lambda: ["task"])
     skills: list[str] | None = None
diff --git a/backend/packages/harness/deerflow/subagents/executor.py b/backend/packages/harness/deerflow/subagents/executor.py
index a2fec6432..d6d2e4fc5 100644
--- a/backend/packages/harness/deerflow/subagents/executor.py
+++ b/backend/packages/harness/deerflow/subagents/executor.py
@@ -286,11 +286,13 @@ class SubagentExecutor:
         # Reuse shared middleware composition with lead agent.
         middlewares = build_subagent_runtime_middlewares(app_config=app_config, model_name=self.model_name, lazy_init=True)
 
+        # system_prompt is included in initial state messages (see _build_initial_state)
+        # to avoid multiple SystemMessages which some LLM APIs don't support.
         return create_agent(
             model=model,
             tools=tools if tools is not None else self.tools,
             middleware=middlewares,
-            system_prompt=self.config.system_prompt,
+            system_prompt=None,
             state_schema=ThreadState,
         )
 
@@ -365,14 +367,25 @@ class SubagentExecutor:
         Returns:
             Initial state dictionary and tools filtered by loaded skill metadata.
         """
+
         # Load skills as conversation items (Codex pattern)
         skills = await self._load_skills()
         filtered_tools = self._apply_skill_allowed_tools(skills)
         skill_messages = await self._load_skill_messages(skills)
 
+        # Combine system_prompt and skills into a single SystemMessage.
+        # Some LLM APIs reject multiple SystemMessages with
+        # "System message must be at the beginning."
+        system_parts: list[str] = []
+        if self.config.system_prompt:
+            system_parts.append(self.config.system_prompt)
+        for skill_msg in skill_messages:
+            system_parts.append(skill_msg.content)
+
         messages: list[Any] = []
-        # Skill content injected as developer/system messages before the task
-        messages.extend(skill_messages)
+        if system_parts:
+            messages.append(SystemMessage(content="\n\n".join(system_parts)))
+
         # Then the actual task
         messages.append(HumanMessage(content=task))
 
diff --git a/backend/tests/test_subagent_executor.py b/backend/tests/test_subagent_executor.py
index b8da323f4..87c82ff96 100644
--- a/backend/tests/test_subagent_executor.py
+++ b/backend/tests/test_subagent_executor.py
@@ -291,7 +291,7 @@ class TestAgentConstruction:
         assert captured["agent"]["model"] is model
         assert captured["agent"]["middleware"] is middlewares
         assert captured["agent"]["tools"] == []
-        assert captured["agent"]["system_prompt"] == base_config.system_prompt
+        assert captured["agent"]["system_prompt"] is None  # system_prompt is merged into initial state messages
 
     @pytest.mark.anyio
     async def test_load_skill_messages_uses_explicit_app_config_for_skill_storage(
@@ -331,6 +331,124 @@ class TestAgentConstruction:
         assert len(messages) == 1
         assert "Use demo skill" in messages[0].content
 
+    @pytest.mark.anyio
+    async def test_build_initial_state_consolidates_system_prompt_and_skills(
+        self,
+        classes,
+        base_config,
+        monkeypatch: pytest.MonkeyPatch,
+        tmp_path,
+    ):
+        """_build_initial_state merges system_prompt and skills into one SystemMessage."""
+        SubagentExecutor = classes["SubagentExecutor"]
+
+        skill_dir = tmp_path / "my-skill"
+        skill_dir.mkdir()
+        skill_file = skill_dir / "SKILL.md"
+        skill_file.write_text("Skill instructions here", encoding="utf-8")
+
+        monkeypatch.setattr(
+            sys.modules["deerflow.skills.storage"],
+            "get_or_new_skill_storage",
+            lambda *, app_config=None: SimpleNamespace(load_skills=lambda *, enabled_only: [SimpleNamespace(name="my-skill", skill_file=skill_file, allowed_tools=None)]),
+        )
+
+        executor = SubagentExecutor(
+            config=base_config,
+            tools=[],
+            thread_id="test-thread",
+        )
+
+        state, _filtered_tools = await executor._build_initial_state("Do the task")
+
+        messages = state["messages"]
+        # Should have exactly 2 messages: one combined SystemMessage + one HumanMessage
+        assert len(messages) == 2
+
+        from langchain_core.messages import HumanMessage, SystemMessage
+
+        assert isinstance(messages[0], SystemMessage)
+        assert isinstance(messages[1], HumanMessage)
+        # SystemMessage should contain both the system_prompt and skill content
+        assert base_config.system_prompt in messages[0].content
+        assert "Skill instructions here" in messages[0].content
+        # HumanMessage should be the task
+        assert messages[1].content == "Do the task"
+
+    @pytest.mark.anyio
+    async def test_build_initial_state_no_skills_only_system_prompt(
+        self,
+        classes,
+        base_config,
+        monkeypatch: pytest.MonkeyPatch,
+    ):
+        """_build_initial_state works when there are no skills."""
+        SubagentExecutor = classes["SubagentExecutor"]
+
+        monkeypatch.setattr(
+            sys.modules["deerflow.skills.storage"],
+            "get_or_new_skill_storage",
+            lambda *, app_config=None: SimpleNamespace(load_skills=lambda *, enabled_only: []),
+        )
+
+        executor = SubagentExecutor(
+            config=base_config,
+            tools=[],
+            thread_id="test-thread",
+        )
+
+        state, _filtered_tools = await executor._build_initial_state("Do the task")
+
+        messages = state["messages"]
+        from langchain_core.messages import HumanMessage, SystemMessage
+
+        assert len(messages) == 2
+        assert isinstance(messages[0], SystemMessage)
+        assert base_config.system_prompt in messages[0].content
+        assert isinstance(messages[1], HumanMessage)
+
+    @pytest.mark.anyio
+    async def test_build_initial_state_no_system_prompt_with_skills(
+        self,
+        classes,
+        monkeypatch: pytest.MonkeyPatch,
+        tmp_path,
+    ):
+        """_build_initial_state works when there is no system_prompt but there are skills."""
+        SubagentConfig = classes["SubagentConfig"]
+
+        config = SubagentConfig(
+            name="test-agent",
+            description="Test agent",
+            system_prompt=None,
+            max_turns=10,
+            timeout_seconds=60,
+        )
+
+        skill_dir = tmp_path / "my-skill"
+        skill_dir.mkdir()
+        skill_file = skill_dir / "SKILL.md"
+        skill_file.write_text("Skill content", encoding="utf-8")
+
+        monkeypatch.setattr(
+            sys.modules["deerflow.skills.storage"],
+            "get_or_new_skill_storage",
+            lambda *, app_config=None: SimpleNamespace(load_skills=lambda *, enabled_only: [SimpleNamespace(name="my-skill", skill_file=skill_file, allowed_tools=None)]),
+        )
+
+        SubagentExecutor = classes["SubagentExecutor"]
+        executor = SubagentExecutor(config=config, tools=[], thread_id="test-thread")
+
+        state, _filtered_tools = await executor._build_initial_state("Do the task")
+
+        messages = state["messages"]
+        from langchain_core.messages import HumanMessage, SystemMessage
+
+        assert len(messages) == 2
+        assert isinstance(messages[0], SystemMessage)
+        assert "Skill content" in messages[0].content
+        assert isinstance(messages[1], HumanMessage)
+
 
 # -----------------------------------------------------------------------------
 # Async Execution Path Tests
@@ -514,6 +632,70 @@ class TestAsyncExecutionPath:
         assert result.status == SubagentStatus.COMPLETED
         assert "Task" in result.result
 
+    @pytest.mark.anyio
+    async def test_aexecute_passes_at_most_one_system_message_to_agent(
+        self,
+        classes,
+        base_config,
+        monkeypatch: pytest.MonkeyPatch,
+        tmp_path,
+    ):
+        """Regression: messages sent to agent.astream must contain at most one
+        SystemMessage and it must be the first message.
+
+        This catches any regression where system_prompt would be re-injected
+        via create_agent() (e.g. system_prompt not passed as None) and appear
+        as a second SystemMessage, which providers like vLLM and Xinference
+        reject with "System message must be at the beginning."
+        """
+        from langchain_core.messages import AIMessage, SystemMessage
+
+        SubagentExecutor = classes["SubagentExecutor"]
+        SubagentStatus = classes["SubagentStatus"]
+
+        # Set up a skill so both system_prompt AND skill content are present,
+        # maximising the chance of catching a double-SystemMessage regression.
+        skill_dir = tmp_path / "regression-skill"
+        skill_dir.mkdir()
+        (skill_dir / "SKILL.md").write_text("Skill instruction text", encoding="utf-8")
+
+        monkeypatch.setattr(
+            sys.modules["deerflow.skills.storage"],
+            "get_or_new_skill_storage",
+            lambda *, app_config=None: SimpleNamespace(load_skills=lambda *, enabled_only: [SimpleNamespace(name="regression-skill", skill_file=skill_dir / "SKILL.md", allowed_tools=None)]),
+        )
+
+        captured_states: list[dict] = []
+
+        async def capturing_astream(state, **kwargs):
+            captured_states.append(state)
+            yield {"messages": [AIMessage(content="Done", id="msg-1")]}
+
+        mock_agent = MagicMock()
+        mock_agent.astream = capturing_astream
+
+        executor = SubagentExecutor(
+            config=base_config,
+            tools=[],
+            thread_id="test-thread",
+        )
+
+        with patch.object(executor, "_create_agent", return_value=mock_agent):
+            result = await executor._aexecute("Do something")
+
+        assert result.status == SubagentStatus.COMPLETED
+        assert len(captured_states) == 1, "astream should be called exactly once"
+        initial_messages = captured_states[0]["messages"]
+
+        system_messages = [m for m in initial_messages if isinstance(m, SystemMessage)]
+        assert len(system_messages) <= 1, f"Expected at most 1 SystemMessage but got {len(system_messages)}: {system_messages}"
+        if system_messages:
+            assert initial_messages[0] is system_messages[0], "SystemMessage must be the first message in the conversation"
+            # The consolidated SystemMessage must carry both the system_prompt
+            # and all skill content — nothing should be split across two messages.
+            assert base_config.system_prompt in system_messages[0].content
+            assert "Skill instruction text" in system_messages[0].content
+
 
 class TestSkillAllowedTools:
     @pytest.mark.anyio

From c3bc6c7cd5f0208464301f2f4c772d956920b2dd Mon Sep 17 00:00:00 2001
From: AochenShen99 <142667174+ShenAC-SAC@users.noreply.github.com>
Date: Mon, 11 May 2026 17:38:37 +0800
Subject: [PATCH 2/8] fix(nginx): defer CORS to gateway allowlist (#2861)

* fix(nginx): defer cors to gateway allowlist

Remove proxy-level wildcard CORS handling so browser origins are controlled by the Gateway allowlist and stay aligned with CSRF origin checks.

* docs: document gateway cors allowlist

Clarify that same-origin nginx access needs no CORS headers while split-origin or port-forwarded browser clients must opt in with GATEWAY_CORS_ORIGINS.

* docs(gateway): record cors source of truth

Document that Gateway CORSMiddleware and CSRFMiddleware share GATEWAY_CORS_ORIGINS as the split-origin source of truth.

* fix(gateway): align cors origin normalization

* docs: clarify gateway langgraph routing

* docs(gateway): update runtime routing note
---
 .env.example                                  |  5 +-
 CONTRIBUTING.md                               | 32 +++++--------
 README.md                                     |  2 +
 backend/CLAUDE.md                             |  4 +-
 backend/README.md                             | 41 ++++++++--------
 backend/app/gateway/app.py                    | 48 +++++++++----------
 backend/app/gateway/config.py                 |  3 --
 backend/app/gateway/csrf_middleware.py        |  9 +++-
 backend/docs/API.md                           | 30 +++++-------
 backend/docs/ARCHITECTURE.md                  | 20 ++++----
 backend/tests/test_gateway_docs_toggle.py     | 42 ++++++++++++++++
 backend/tests/test_gateway_runtime_cleanup.py | 23 +++++++++
 docker/nginx/nginx.conf                       | 20 ++------
 docker/nginx/nginx.local.conf                 | 20 ++------
 14 files changed, 169 insertions(+), 130 deletions(-)

diff --git a/.env.example b/.env.example
index a859ec2a5..43290954b 100644
--- a/.env.example
+++ b/.env.example
@@ -9,8 +9,9 @@ JINA_API_KEY=your-jina-api-key
 
 # InfoQuest API Key
 INFOQUEST_API_KEY=your-infoquest-api-key
-# CORS Origins (comma-separated) - e.g., http://localhost:3000,http://localhost:3001
-# CORS_ORIGINS=http://localhost:3000
+# Browser CORS allowlist for split-origin or port-forwarded deployments (comma-separated exact origins).
+# Leave unset when using the unified nginx endpoint, e.g. http://localhost:2026.
+# GATEWAY_CORS_ORIGINS=http://localhost:3000,http://127.0.0.1:3000
 
 # Optional:
 # FIRECRAWL_API_KEY=your-firecrawl-api-key
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b7cb2840b..51b834b4f 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -46,12 +46,12 @@ Docker provides a consistent, isolated environment with all dependencies pre-con
    All services will start with hot-reload enabled:
    - Frontend changes are automatically reloaded
    - Backend changes trigger automatic restart
-   - LangGraph server supports hot-reload
+   - Gateway-hosted LangGraph-compatible runtime supports hot-reload
 
 4. **Access the application**:
    - Web Interface: http://localhost:2026
    - API Gateway: http://localhost:2026/api/*
-   - LangGraph: http://localhost:2026/api/langgraph/*
+   - LangGraph-compatible API: http://localhost:2026/api/langgraph/*
 
 #### Docker Commands
 
@@ -94,7 +94,7 @@ Use these as practical starting points for development and review environments:
 If `make docker-init`, `make docker-start`, or `make docker-stop` fails on Linux with an error like below, your current user likely does not have permission to access the Docker daemon socket:
 
 ```text
-unable to get image 'deer-flow-dev-langgraph': permission denied while trying to connect to the Docker daemon socket at unix:///var/run/docker.sock
+unable to get image 'deer-flow-gateway': permission denied while trying to connect to the Docker daemon socket at unix:///var/run/docker.sock
 ```
 
 Recommended fix: add your current user to the `docker` group so Docker commands work without `sudo`.
@@ -131,9 +131,8 @@ Host Machine
 Docker Compose (deer-flow-dev)
   ├→ nginx (port 2026) ← Reverse proxy
   ├→ web (port 3000) ← Frontend with hot-reload
-  ├→ api (port 8001) ← Gateway API with hot-reload
-   ├→ langgraph (port 2024) ← LangGraph server with hot-reload
-   └→ provisioner (optional, port 8002) ← Started only in provisioner/K8s sandbox mode
+  ├→ gateway (port 8001) ← Gateway API + LangGraph-compatible runtime with hot-reload
+  └→ provisioner (optional, port 8002) ← Started only in provisioner/K8s sandbox mode
 ```
 
 **Benefits of Docker Development**:
@@ -184,17 +183,13 @@ Required tools:
 
 If you need to start services individually:
 
-1. **Start backend services**:
+1. **Start backend service**:
    ```bash
-   # Terminal 1: Start LangGraph Server (port 2024)
-   cd backend
-   make dev
-
-   # Terminal 2: Start Gateway API (port 8001)
+   # Terminal 1: Start Gateway API and embedded LangGraph-compatible runtime (port 8001)
    cd backend
    make gateway
 
-   # Terminal 3: Start Frontend (port 3000)
+   # Terminal 2: Start Frontend (port 3000)
    cd frontend
    pnpm dev
    ```
@@ -212,10 +207,10 @@ If you need to start services individually:
 
 The nginx configuration provides:
 - Unified entry point on port 2026
-- Routes `/api/langgraph/*` to LangGraph Server (2024)
+- Gateway owns `/api/langgraph/*` and translates those public LangGraph-compatible paths to its native `/api/*` routers behind nginx
 - Routes other `/api/*` endpoints to Gateway API (8001)
 - Routes non-API requests to Frontend (3000)
-- Centralized CORS handling
+- Same-origin API routing; split-origin or port-forwarded browser clients should use the Gateway `GATEWAY_CORS_ORIGINS` allowlist
 - SSE/streaming support for real-time agent responses
 - Optimized timeouts for long-running operations
 
@@ -235,8 +230,8 @@ deer-flow/
 │       └── nginx.local.conf # Nginx config for local dev
 ├── backend/                 # Backend application
 │   ├── src/
-│   │   ├── gateway/        # Gateway API (port 8001)
-│   │   ├── agents/         # LangGraph agents (port 2024)
+│   │   ├── gateway/        # Gateway API and LangGraph-compatible runtime (port 8001)
+│   │   ├── agents/         # LangGraph agent definitions
 │   │   ├── mcp/            # Model Context Protocol integration
 │   │   ├── skills/         # Skills system
 │   │   └── sandbox/        # Sandbox execution
@@ -256,8 +251,7 @@ Browser
   ↓
 Nginx (port 2026) ← Unified entry point
   ├→ Frontend (port 3000) ← / (non-API requests)
-  ├→ Gateway API (port 8001) ← /api/models, /api/mcp, /api/skills, /api/threads/*/artifacts
-  └→ LangGraph Server (port 2024) ← /api/langgraph/* (agent interactions)
+  └→ Gateway API (port 8001) ← /api/* and /api/langgraph/* (LangGraph-compatible agent interactions)
 ```
 
 ## Development Workflow
diff --git a/README.md b/README.md
index 0fc8f173e..9ff1d501b 100644
--- a/README.md
+++ b/README.md
@@ -245,6 +245,8 @@ make down   # Stop and remove containers
 
 Access: http://localhost:2026
 
+The unified nginx endpoint is same-origin by default and does not emit browser CORS headers. If you run a split-origin or port-forwarded browser client, set `GATEWAY_CORS_ORIGINS` to comma-separated exact origins such as `http://localhost:3000`; the Gateway then applies the CORS allowlist and matching CSRF origin checks.
+
 See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed Docker development guide.
 
 #### Option 2: Local Development
diff --git a/backend/CLAUDE.md b/backend/CLAUDE.md
index 99922a61e..67ee9cc7e 100644
--- a/backend/CLAUDE.md
+++ b/backend/CLAUDE.md
@@ -207,6 +207,8 @@ Configuration priority:
 
 FastAPI application on port 8001 with health check at `GET /health`. Set `GATEWAY_ENABLE_DOCS=false` to disable `/docs`, `/redoc`, and `/openapi.json` in production (default: enabled).
 
+CORS is same-origin by default when requests enter through nginx on port 2026. Split-origin or port-forwarded browser clients must opt in with `GATEWAY_CORS_ORIGINS` (comma-separated exact origins); Gateway `CORSMiddleware` and `CSRFMiddleware` both read that variable so browser CORS and auth-origin checks stay aligned.
+
 **Routers**:
 
 | Router | Endpoints |
@@ -223,7 +225,7 @@ FastAPI application on port 8001 with health check at `GET /health`. Set `GATEWA
 | **Feedback** (`/api/threads/{id}/runs/{rid}/feedback`) | `PUT /` - upsert feedback; `DELETE /` - delete user feedback; `POST /` - create feedback; `GET /` - list feedback; `GET /stats` - aggregate stats; `DELETE /{fid}` - delete specific |
 | **Runs** (`/api/runs`) | `POST /stream` - stateless run + SSE; `POST /wait` - stateless run + block; `GET /{rid}/messages` - paginated messages by run_id `{data, has_more}` (cursor: `after_seq`/`before_seq`); `GET /{rid}/feedback` - list feedback by run_id |
 
-Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` → Gateway.
+Proxied through nginx: `/api/langgraph/*` → Gateway LangGraph-compatible runtime, all other `/api/*` → Gateway REST APIs.
 
 ### Sandbox System (`packages/harness/deerflow/sandbox/`)
 
diff --git a/backend/README.md b/backend/README.md
index 6295eba22..9b4d26fb1 100644
--- a/backend/README.md
+++ b/backend/README.md
@@ -14,28 +14,31 @@ DeerFlow is a LangGraph-based AI super agent with sandbox execution, persistent
                                 │                  │
               /api/langgraph/*  │                  │  /api/* (other)
                                 ▼                  ▼
-               ┌────────────────────┐  ┌────────────────────────┐
-               │ LangGraph Server   │  │   Gateway API (8001)   │
-               │    (Port 2024)     │  │   FastAPI REST         │
-               │                    │  │                        │
-               │ ┌────────────────┐ │  │ Models, MCP, Skills,   │
-               │ │  Lead Agent    │ │  │ Memory, Uploads,       │
-               │ │  ┌──────────┐  │ │  │ Artifacts              │
-               │ │  │Middleware│  │ │  └────────────────────────┘
-               │ │  │  Chain   │  │ │
-               │ │  └──────────┘  │ │
-               │ │  ┌──────────┐  │ │
-               │ │  │  Tools   │  │ │
-               │ │  └──────────┘  │ │
-               │ │  ┌──────────┐  │ │
-               │ │  │Subagents │  │ │
-               │ │  └──────────┘  │ │
-               │ └────────────────┘ │
-               └────────────────────┘
+               ┌──────────────────────────────────────────────┐
+               │             Gateway API (8001)               │
+               │  FastAPI REST + LangGraph-compatible runtime │
+               │                                              │
+               │ Models, MCP, Skills, Memory, Uploads,       │
+               │ Artifacts, Threads, Runs, Streaming          │
+               │                                              │
+               │ ┌────────────────┐                           │
+               │ │  Lead Agent    │                           │
+               │ │  ┌──────────┐  │                           │
+               │ │  │Middleware│  │                           │
+               │ │  │  Chain   │  │                           │
+               │ │  └──────────┘  │                           │
+               │ │  ┌──────────┐  │                           │
+               │ │  │  Tools   │  │                           │
+               │ │  └──────────┘  │                           │
+               │ │  ┌──────────┐  │                           │
+               │ │  │Subagents │  │                           │
+               │ │  └──────────┘  │                           │
+               │ └────────────────┘                           │
+               └──────────────────────────────────────────────┘
 ```
 
 **Request Routing** (via Nginx):
-- `/api/langgraph/*` → LangGraph Server - agent interactions, threads, streaming
+- `/api/langgraph/*` → Gateway API - LangGraph-compatible agent interactions, threads, runs, and streaming translated to native `/api/*` routers
 - `/api/*` (other) → Gateway API - models, MCP, skills, memory, artifacts, uploads, thread-local cleanup
 - `/` (non-API) → Frontend - Next.js web interface
 
diff --git a/backend/app/gateway/app.py b/backend/app/gateway/app.py
index 2a506df2b..8848f473e 100644
--- a/backend/app/gateway/app.py
+++ b/backend/app/gateway/app.py
@@ -1,6 +1,5 @@
 import asyncio
 import logging
-import os
 from collections.abc import AsyncGenerator
 from contextlib import asynccontextmanager
 
@@ -9,7 +8,7 @@ from fastapi.middleware.cors import CORSMiddleware
 
 from app.gateway.auth_middleware import AuthMiddleware
 from app.gateway.config import get_gateway_config
-from app.gateway.csrf_middleware import CSRFMiddleware
+from app.gateway.csrf_middleware import CSRFMiddleware, get_configured_cors_origins
 from app.gateway.deps import langgraph_runtime
 from app.gateway.routers import (
     agents,
@@ -219,7 +218,9 @@ def create_app() -> FastAPI:
         Configured FastAPI application instance.
     """
     config = get_gateway_config()
-    docs_kwargs = {"docs_url": "/docs", "redoc_url": "/redoc", "openapi_url": "/openapi.json"} if config.enable_docs else {"docs_url": None, "redoc_url": None, "openapi_url": None}
+    docs_url = "/docs" if config.enable_docs else None
+    redoc_url = "/redoc" if config.enable_docs else None
+    openapi_url = "/openapi.json" if config.enable_docs else None
 
     app = FastAPI(
         title="DeerFlow API Gateway",
@@ -239,12 +240,14 @@ API Gateway for DeerFlow - A LangGraph-based AI agent backend with sandbox execu
 
 ### Architecture
 
-LangGraph requests are handled by nginx reverse proxy.
-This gateway provides custom endpoints for models, MCP configuration, skills, and artifacts.
+LangGraph-compatible requests are routed through nginx to this gateway.
+This gateway provides runtime endpoints for agent runs plus custom endpoints for models, MCP configuration, skills, and artifacts.
         """,
         version="0.1.0",
         lifespan=lifespan,
-        **docs_kwargs,
+        docs_url=docs_url,
+        redoc_url=redoc_url,
+        openapi_url=openapi_url,
         openapi_tags=[
             {
                 "name": "models",
@@ -307,25 +310,18 @@ This gateway provides custom endpoints for models, MCP configuration, skills, an
     # CSRF: Double Submit Cookie pattern for state-changing requests
     app.add_middleware(CSRFMiddleware)
 
-    # CORS: when GATEWAY_CORS_ORIGINS is set (dev without nginx), add CORS middleware.
-    # In production, nginx handles CORS and no middleware is needed.
-    cors_origins_env = os.environ.get("GATEWAY_CORS_ORIGINS", "")
-    if cors_origins_env:
-        cors_origins = [o.strip() for o in cors_origins_env.split(",") if o.strip()]
-        # Validate: wildcard origin with credentials is a security misconfiguration
-        for origin in cors_origins:
-            if origin == "*":
-                logger.error("GATEWAY_CORS_ORIGINS contains wildcard '*' with allow_credentials=True. This is a security misconfiguration — browsers will reject the response. Use explicit scheme://host:port origins instead.")
-                cors_origins = [o for o in cors_origins if o != "*"]
-                break
-        if cors_origins:
-            app.add_middleware(
-                CORSMiddleware,
-                allow_origins=cors_origins,
-                allow_credentials=True,
-                allow_methods=["*"],
-                allow_headers=["*"],
-            )
+    # CORS: the unified nginx endpoint is same-origin by default. Split-origin
+    # browser clients must opt in with this explicit Gateway allowlist so CORS
+    # and CSRF origin checks share the same source of truth.
+    cors_origins = sorted(get_configured_cors_origins())
+    if cors_origins:
+        app.add_middleware(
+            CORSMiddleware,
+            allow_origins=cors_origins,
+            allow_credentials=True,
+            allow_methods=["*"],
+            allow_headers=["*"],
+        )
 
     # Include routers
     # Models API is mounted at /api/models
@@ -374,7 +370,7 @@ This gateway provides custom endpoints for models, MCP configuration, skills, an
     app.include_router(runs.router)
 
     @app.get("/health", tags=["health"])
-    async def health_check() -> dict:
+    async def health_check() -> dict[str, str]:
         """Health check endpoint.
 
         Returns:
diff --git a/backend/app/gateway/config.py b/backend/app/gateway/config.py
index 95221dad2..06a7d5b1a 100644
--- a/backend/app/gateway/config.py
+++ b/backend/app/gateway/config.py
@@ -8,7 +8,6 @@ class GatewayConfig(BaseModel):
 
     host: str = Field(default="0.0.0.0", description="Host to bind the gateway server")
     port: int = Field(default=8001, description="Port to bind the gateway server")
-    cors_origins: list[str] = Field(default_factory=lambda: ["http://localhost:3000"], description="Allowed CORS origins")
     enable_docs: bool = Field(default=True, description="Enable Swagger/ReDoc/OpenAPI endpoints")
 
 
@@ -19,11 +18,9 @@ def get_gateway_config() -> GatewayConfig:
     """Get gateway config, loading from environment if available."""
     global _gateway_config
     if _gateway_config is None:
-        cors_origins_str = os.getenv("CORS_ORIGINS", "http://localhost:3000")
         _gateway_config = GatewayConfig(
             host=os.getenv("GATEWAY_HOST", "0.0.0.0"),
             port=int(os.getenv("GATEWAY_PORT", "8001")),
-            cors_origins=cors_origins_str.split(","),
             enable_docs=os.getenv("GATEWAY_ENABLE_DOCS", "true").lower() == "true",
         )
     return _gateway_config
diff --git a/backend/app/gateway/csrf_middleware.py b/backend/app/gateway/csrf_middleware.py
index 08e95be4b..f34882032 100644
--- a/backend/app/gateway/csrf_middleware.py
+++ b/backend/app/gateway/csrf_middleware.py
@@ -6,7 +6,7 @@ State-changing operations require CSRF protection.
 
 import os
 import secrets
-from collections.abc import Callable
+from collections.abc import Awaitable, Callable
 from urllib.parse import urlsplit
 
 from fastapi import Request, Response
@@ -106,6 +106,11 @@ def _configured_cors_origins() -> set[str]:
     return origins
 
 
+def get_configured_cors_origins() -> set[str]:
+    """Return normalized explicit browser origins from GATEWAY_CORS_ORIGINS."""
+    return _configured_cors_origins()
+
+
 def _first_header_value(value: str | None) -> str | None:
     """Return the first value from a comma-separated proxy header."""
     if not value:
@@ -172,7 +177,7 @@ class CSRFMiddleware(BaseHTTPMiddleware):
     def __init__(self, app: ASGIApp) -> None:
         super().__init__(app)
 
-    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+    async def dispatch(self, request: Request, call_next: Callable[[Request], Awaitable[Response]]) -> Response:
         _is_auth = is_auth_endpoint(request)
 
         if should_check_csrf(request) and _is_auth and not is_allowed_auth_origin(request):
diff --git a/backend/docs/API.md b/backend/docs/API.md
index dcefe6779..293c1ebd1 100644
--- a/backend/docs/API.md
+++ b/backend/docs/API.md
@@ -6,16 +6,16 @@ This document provides a complete reference for the DeerFlow backend APIs.
 
 DeerFlow backend exposes two sets of APIs:
 
-1. **LangGraph API** - Agent interactions, threads, and streaming (`/api/langgraph/*`)
+1. **LangGraph-compatible API** - Agent interactions, threads, and streaming (`/api/langgraph/*`)
 2. **Gateway API** - Models, MCP, skills, uploads, and artifacts (`/api/*`)
 
 All APIs are accessed through the Nginx reverse proxy at port 2026.
 
-## LangGraph API
+## LangGraph-compatible API
 
 Base URL: `/api/langgraph`
 
-The LangGraph API is provided by the LangGraph server and follows the LangGraph SDK conventions.
+The public LangGraph-compatible API follows LangGraph SDK conventions. In the unified nginx deployment, Gateway owns `/api/langgraph/*` and translates those paths to its native `/api/*` run, thread, and streaming routers.
 
 ### Threads
 
@@ -104,17 +104,11 @@ Content-Type: application/json
 **Recursion Limit:**
 
 `config.recursion_limit` caps the number of graph steps LangGraph will execute
-in a single run. The `/api/langgraph/*` endpoints go straight to the LangGraph
-server and therefore inherit LangGraph's native default of **25**, which is
-too low for plan-mode or subagent-heavy runs — the agent typically errors out
-with `GraphRecursionError` after the first round of subagent results comes
-back, before the lead agent can synthesize the final answer.
-
-DeerFlow's own Gateway and IM-channel paths mitigate this by defaulting to
-`100` in `build_run_config` (see `backend/app/gateway/services.py`), but
-clients calling the LangGraph API directly must set `recursion_limit`
-explicitly in the request body. `100` matches the Gateway default and is a
-safe starting point; increase it if you run deeply nested subagent graphs.
+in a single run. The unified Gateway path defaults to `100` in
+`build_run_config` (see `backend/app/gateway/services.py`), which is a safer
+starting point for plan-mode or subagent-heavy runs. Clients can still set
+`recursion_limit` explicitly in the request body; increase it if you run deeply
+nested subagent graphs.
 
 **Configurable Options:**
 - `model_name` (string): Override the default model
@@ -649,7 +643,7 @@ curl -X POST http://localhost:2026/api/langgraph/threads/abc123/runs \
   }'
 ```
 
-> The `/api/langgraph/*` endpoints bypass DeerFlow's Gateway and inherit
-> LangGraph's native `recursion_limit` default of 25, which is too low for
-> plan-mode or subagent runs. Set `config.recursion_limit` explicitly — see
-> the [Create Run](#create-run) section for details.
+> The unified Gateway path defaults `config.recursion_limit` to 100 for
+> plan-mode and subagent-heavy runs. Clients may still set
+> `config.recursion_limit` explicitly — see the [Create Run](#create-run)
+> section for details.
diff --git a/backend/docs/ARCHITECTURE.md b/backend/docs/ARCHITECTURE.md
index cc0993f7f..e6fdbe217 100644
--- a/backend/docs/ARCHITECTURE.md
+++ b/backend/docs/ARCHITECTURE.md
@@ -14,8 +14,8 @@ This document provides a comprehensive overview of the DeerFlow backend architec
 │                          Nginx (Port 2026)                               │
 │                    Unified Reverse Proxy Entry Point                      │
 │  ┌────────────────────────────────────────────────────────────────────┐  │
-│  │  /api/langgraph/*  →  LangGraph Server (2024)                      │  │
-│  │  /api/*            →  Gateway API (8001)                           │  │
+│  │  /api/langgraph/*  →  Gateway LangGraph-compatible runtime (8001)  │  │
+│  │  /api/*            →  Gateway REST APIs (8001)                     │  │
 │  │  /*                →  Frontend (3000)                               │  │
 │  └────────────────────────────────────────────────────────────────────┘  │
 └─────────────────────────────────┬────────────────────────────────────────┘
@@ -24,8 +24,8 @@ This document provides a comprehensive overview of the DeerFlow backend architec
           │                       │                       │
           ▼                       ▼                       ▼
 ┌─────────────────────┐ ┌─────────────────────┐ ┌─────────────────────┐
-│   LangGraph Server  │ │    Gateway API      │ │     Frontend        │
-│     (Port 2024)     │ │    (Port 8001)      │ │    (Port 3000)      │
+│ Embedded Runtime    │ │    Gateway API      │ │     Frontend        │
+│  (inside Gateway)   │ │    (Port 8001)      │ │    (Port 3000)      │
 │                     │ │                     │ │                     │
 │  - Agent Runtime    │ │  - Models API       │ │  - Next.js App      │
 │  - Thread Mgmt      │ │  - MCP Config       │ │  - React UI         │
@@ -52,9 +52,9 @@ This document provides a comprehensive overview of the DeerFlow backend architec
 
 ## Component Details
 
-### LangGraph Server
+### Embedded LangGraph Runtime
 
-The LangGraph server is the core agent runtime, built on LangGraph for robust multi-agent workflow orchestration.
+The LangGraph-compatible runtime runs inside the Gateway process and is built on LangGraph for robust multi-agent workflow orchestration.
 
 **Entry Point**: `packages/harness/deerflow/agents/lead_agent/agent.py:make_lead_agent`
 
@@ -78,7 +78,7 @@ The LangGraph server is the core agent runtime, built on LangGraph for robust mu
 
 ### Gateway API
 
-FastAPI application providing REST endpoints for non-agent operations.
+FastAPI application providing REST endpoints plus the public LangGraph-compatible `/api/langgraph/*` runtime routes.
 
 **Entry Point**: `app/gateway/app.py`
 
@@ -353,10 +353,10 @@ SKILL.md Format:
    POST /api/langgraph/threads/{thread_id}/runs
    {"input": {"messages": [{"role": "user", "content": "Hello"}]}}
 
-2. Nginx → LangGraph Server (2024)
-   Proxied to LangGraph server
+2. Nginx → Gateway API (8001)
+   Routes `/api/langgraph/*` to the Gateway's LangGraph-compatible runtime
 
-3. LangGraph Server
+3. Embedded LangGraph runtime
    a. Load/create thread state
    b. Execute middleware chain:
       - ThreadDataMiddleware: Set up paths
diff --git a/backend/tests/test_gateway_docs_toggle.py b/backend/tests/test_gateway_docs_toggle.py
index 54392ee2e..372f93e18 100644
--- a/backend/tests/test_gateway_docs_toggle.py
+++ b/backend/tests/test_gateway_docs_toggle.py
@@ -122,3 +122,45 @@ def test_health_still_works_when_docs_disabled():
         resp = client.get("/health")
         assert resp.status_code == 200
         assert resp.json()["status"] == "healthy"
+
+
+# ---------------------------------------------------------------------------
+# Runtime CORS behavior
+# ---------------------------------------------------------------------------
+
+
+def _make_gateway_client(cors_origins: str) -> TestClient:
+    with patch.dict(os.environ, {"GATEWAY_CORS_ORIGINS": cors_origins}):
+        _reset_gateway_config()
+        from app.gateway.app import create_app
+
+        return TestClient(create_app())
+
+
+def test_gateway_cors_allows_configured_origin():
+    """GATEWAY_CORS_ORIGINS should control actual browser CORS responses."""
+    client = _make_gateway_client("https://app.example")
+
+    response = client.get("/health", headers={"Origin": "https://app.example"})
+
+    assert response.status_code == 200
+    assert response.headers["access-control-allow-origin"] == "https://app.example"
+    assert response.headers["access-control-allow-credentials"] == "true"
+
+
+def test_gateway_cors_rejects_unconfigured_origin():
+    client = _make_gateway_client("https://app.example")
+
+    response = client.get("/health", headers={"Origin": "https://evil.example"})
+
+    assert response.status_code == 200
+    assert "access-control-allow-origin" not in response.headers
+
+
+def test_gateway_cors_normalizes_configured_default_port():
+    client = _make_gateway_client("https://app.example:443")
+
+    response = client.get("/health", headers={"Origin": "https://app.example"})
+
+    assert response.status_code == 200
+    assert response.headers["access-control-allow-origin"] == "https://app.example"
diff --git a/backend/tests/test_gateway_runtime_cleanup.py b/backend/tests/test_gateway_runtime_cleanup.py
index 3bf7c1a5b..895e04885 100644
--- a/backend/tests/test_gateway_runtime_cleanup.py
+++ b/backend/tests/test_gateway_runtime_cleanup.py
@@ -53,6 +53,29 @@ def test_nginx_routes_official_langgraph_prefix_to_gateway_api():
         assert "proxy_pass http://gateway" in content or "proxy_pass http://$gateway_upstream" in content
 
 
+def test_nginx_defers_cors_to_gateway_allowlist():
+    for path in ("docker/nginx/nginx.local.conf", "docker/nginx/nginx.conf"):
+        content = _read(path)
+
+        assert "Access-Control-Allow-Origin" not in content
+        assert "Access-Control-Allow-Methods" not in content
+        assert "Access-Control-Allow-Headers" not in content
+        assert "Access-Control-Allow-Credentials" not in content
+        assert "proxy_hide_header 'Access-Control-Allow-" not in content
+        assert "if ($request_method = 'OPTIONS')" not in content
+
+
+def test_gateway_cors_configuration_uses_gateway_allowlist():
+    gateway_config = _read("backend/app/gateway/config.py")
+    gateway_app = _read("backend/app/gateway/app.py")
+    csrf_middleware = _read("backend/app/gateway/csrf_middleware.py")
+
+    assert not re.search(r"(?<!GATEWAY_)[\"']CORS_ORIGINS[\"']", gateway_config)
+    assert "cors_origins" not in gateway_config
+    assert "get_configured_cors_origins" in gateway_app
+    assert "GATEWAY_CORS_ORIGINS" in csrf_middleware
+
+
 def test_frontend_rewrites_langgraph_prefix_to_gateway():
     next_config = _read("frontend/next.config.js")
     api_client = _read("frontend/src/core/api/api-client.ts")
diff --git a/docker/nginx/nginx.conf b/docker/nginx/nginx.conf
index a012a1e3b..45be0ab97 100644
--- a/docker/nginx/nginx.conf
+++ b/docker/nginx/nginx.conf
@@ -28,21 +28,11 @@ http {
         set $gateway_upstream gateway:8001;
         set $frontend_upstream frontend:3000;
 
-        # Hide CORS headers from upstream to prevent duplicates
-        proxy_hide_header 'Access-Control-Allow-Origin';
-        proxy_hide_header 'Access-Control-Allow-Methods';
-        proxy_hide_header 'Access-Control-Allow-Headers';
-        proxy_hide_header 'Access-Control-Allow-Credentials';
-
-        # CORS headers for all responses (nginx handles CORS centrally)
-        add_header 'Access-Control-Allow-Origin' '*' always;
-        add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always;
-        add_header 'Access-Control-Allow-Headers' '*' always;
-
-        # Handle OPTIONS requests (CORS preflight)
-        if ($request_method = 'OPTIONS') {
-            return 204;
-        }
+        # Keep the unified nginx endpoint same-origin by default. When split
+        # frontend/backend or port-forwarded deployments need browser CORS,
+        # configure the Gateway allowlist with GATEWAY_CORS_ORIGINS so CORS and
+        # CSRF origin checks stay aligned instead of approving every origin at
+        # the proxy layer.
 
         # LangGraph-compatible API routes served by Gateway.
         # Rewrites /api/langgraph/* to /api/* before proxying to Gateway.
diff --git a/docker/nginx/nginx.local.conf b/docker/nginx/nginx.local.conf
index eac7f8a04..68ca1f1ac 100644
--- a/docker/nginx/nginx.local.conf
+++ b/docker/nginx/nginx.local.conf
@@ -28,21 +28,11 @@ http {
         listen [::]:2026;
         server_name _;
 
-        # Hide CORS headers from upstream to prevent duplicates
-        proxy_hide_header 'Access-Control-Allow-Origin';
-        proxy_hide_header 'Access-Control-Allow-Methods';
-        proxy_hide_header 'Access-Control-Allow-Headers';
-        proxy_hide_header 'Access-Control-Allow-Credentials';
-
-        # CORS headers for all responses (nginx handles CORS centrally)
-        add_header 'Access-Control-Allow-Origin' '*' always;
-        add_header 'Access-Control-Allow-Methods' 'GET, POST, PUT, DELETE, PATCH, OPTIONS' always;
-        add_header 'Access-Control-Allow-Headers' '*' always;
-
-        # Handle OPTIONS requests (CORS preflight)
-        if ($request_method = 'OPTIONS') {
-            return 204;
-        }
+        # Keep the unified nginx endpoint same-origin by default. When split
+        # frontend/backend or port-forwarded deployments need browser CORS,
+        # configure the Gateway allowlist with GATEWAY_CORS_ORIGINS so CORS and
+        # CSRF origin checks stay aligned instead of approving every origin at
+        # the proxy layer.
 
         # LangGraph-compatible API routes served by Gateway.
         # Rewrites /api/langgraph/* to /api/* before proxying to Gateway.

From 2eb11f97abeeea49d6150ef1f6581fa337e0fe3c Mon Sep 17 00:00:00 2001
From: Nan Gao <thunderggnn@gmail.com>
Date: Mon, 11 May 2026 13:54:00 +0200
Subject: [PATCH 3/8] fix(runtime): persist run message summaries (#2850)

* fix(runtime): persist run message summaries (#2849)

* fix(runtime): dedupe run message summaries
---
 .../harness/deerflow/runtime/journal.py       | 56 ++++++++++-
 backend/tests/test_run_journal.py             | 93 +++++++++++++++++++
 2 files changed, 148 insertions(+), 1 deletion(-)

diff --git a/backend/packages/harness/deerflow/runtime/journal.py b/backend/packages/harness/deerflow/runtime/journal.py
index 41e48efed..8a9382e23 100644
--- a/backend/packages/harness/deerflow/runtime/journal.py
+++ b/backend/packages/harness/deerflow/runtime/journal.py
@@ -20,12 +20,13 @@ from __future__ import annotations
 import asyncio
 import logging
 import time
+from collections.abc import Mapping
 from datetime import UTC, datetime
 from typing import TYPE_CHECKING, Any, cast
 from uuid import UUID
 
 from langchain_core.callbacks import BaseCallbackHandler
-from langchain_core.messages import AnyMessage, BaseMessage, HumanMessage, ToolMessage
+from langchain_core.messages import AIMessage, AnyMessage, BaseMessage, HumanMessage, ToolMessage
 from langgraph.types import Command
 
 if TYPE_CHECKING:
@@ -71,6 +72,7 @@ class RunJournal(BaseCallbackHandler):
         # Dedup: LangChain may fire on_llm_end multiple times for the same run_id
         self._counted_llm_run_ids: set[str] = set()
         self._counted_external_source_ids: set[str] = set()
+        self._counted_message_llm_run_ids: set[str] = set()
 
         # Convenience fields
         self._last_ai_msg: str | None = None
@@ -86,6 +88,50 @@ class RunJournal(BaseCallbackHandler):
 
     # -- Lifecycle callbacks --
 
+    @staticmethod
+    def _message_text(message: BaseMessage) -> str:
+        """Extract displayable text from a message's mixed content shape."""
+        content = getattr(message, "content", None)
+        if isinstance(content, str):
+            return content
+        if isinstance(content, list):
+            parts: list[str] = []
+            for block in content:
+                if isinstance(block, str):
+                    parts.append(block)
+                elif isinstance(block, Mapping):
+                    text = block.get("text")
+                    if isinstance(text, str):
+                        parts.append(text)
+                    else:
+                        nested = block.get("content")
+                        if isinstance(nested, str):
+                            parts.append(nested)
+            return "".join(parts)
+        if isinstance(content, Mapping):
+            for key in ("text", "content"):
+                value = content.get(key)
+                if isinstance(value, str):
+                    return value
+
+        text = getattr(message, "text", None)
+        if isinstance(text, str):
+            return text
+        return ""
+
+    def _record_message_summary(self, message: BaseMessage, *, caller: str | None = None) -> None:
+        """Update run-level convenience fields for persisted run rows."""
+        self._msg_count += 1
+
+        # ``last_ai_message`` should represent the lead agent's user-facing
+        # answer. Middleware/subagent model calls and empty tool-call-only
+        # AI messages must not overwrite the last useful assistant text.
+        is_ai_message = isinstance(message, AIMessage) or getattr(message, "type", None) == "ai"
+        if is_ai_message and (caller is None or caller == "lead_agent"):
+            text = self._message_text(message).strip()
+            if text:
+                self._last_ai_msg = text[:2000]
+
     def on_chain_start(
         self,
         serialized: dict[str, Any],
@@ -164,6 +210,7 @@ class RunJournal(BaseCallbackHandler):
                             content=m.model_dump(),
                             metadata={"caller": caller},
                         )
+                        self._record_message_summary(m, caller=caller)
                         break
                 if self._first_human_msg:
                     break
@@ -222,6 +269,8 @@ class RunJournal(BaseCallbackHandler):
                     "llm_call_index": call_index,
                 },
             )
+            if rid not in self._counted_message_llm_run_ids:
+                self._record_message_summary(message, caller=caller)
 
             # Token accumulation (dedup by langchain run_id to avoid double-counting
             # when the callback fires more than once for the same response)
@@ -245,6 +294,9 @@ class RunJournal(BaseCallbackHandler):
                     else:
                         self._lead_agent_tokens += total_tk
 
+        if messages:
+            self._counted_message_llm_run_ids.add(str(run_id))
+
     def on_llm_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
         self._llm_start_times.pop(str(run_id), None)
         self._put(event_type="llm.error", category="trace", content=str(error))
@@ -260,12 +312,14 @@ class RunJournal(BaseCallbackHandler):
             if isinstance(output, ToolMessage):
                 msg = cast(ToolMessage, output)
                 self._put(event_type="llm.tool.result", category="message", content=msg.model_dump())
+                self._record_message_summary(msg)
             elif isinstance(output, Command):
                 cmd = cast(Command, output)
                 messages = cmd.update.get("messages", [])
                 for message in messages:
                     if isinstance(message, BaseMessage):
                         self._put(event_type="llm.tool.result", category="message", content=message.model_dump())
+                        self._record_message_summary(message)
                     else:
                         logger.warning(f"on_tool_end {run_id}: command update message is not BaseMessage: {type(message)}")
             else:
diff --git a/backend/tests/test_run_journal.py b/backend/tests/test_run_journal.py
index 27c05619c..8615caa49 100644
--- a/backend/tests/test_run_journal.py
+++ b/backend/tests/test_run_journal.py
@@ -339,6 +339,99 @@ class TestConvenienceFields:
         data = j.get_completion_data()
         assert data["first_human_message"] == "What is AI?"
 
+    @pytest.mark.anyio
+    async def test_completion_data_counts_human_ai_and_tool_messages(self, journal_setup):
+        from langchain_core.messages import HumanMessage, ToolMessage
+
+        j, _ = journal_setup
+        j.on_chat_model_start({}, [[HumanMessage(content="Question")]], run_id=uuid4(), tags=["lead_agent"])
+        j.on_llm_end(_make_llm_response("Answer"), run_id=uuid4(), parent_run_id=None, tags=["lead_agent"])
+        j.on_tool_end(ToolMessage(content="Tool result", tool_call_id="call_1", name="search"), run_id=uuid4())
+
+        data = j.get_completion_data()
+
+        assert data["message_count"] == 3
+        assert data["first_human_message"] == "Question"
+        assert data["last_ai_message"] == "Answer"
+
+    @pytest.mark.anyio
+    async def test_tool_call_only_ai_does_not_clear_last_ai_message(self, journal_setup):
+        j, _ = journal_setup
+        j.on_llm_end(_make_llm_response("Useful answer"), run_id=uuid4(), parent_run_id=None, tags=["lead_agent"])
+        j.on_llm_end(
+            _make_llm_response("", tool_calls=[{"id": "call_1", "name": "search", "args": {}}]),
+            run_id=uuid4(),
+            parent_run_id=None,
+            tags=["lead_agent"],
+        )
+
+        data = j.get_completion_data()
+
+        assert data["message_count"] == 2
+        assert data["last_ai_message"] == "Useful answer"
+
+    @pytest.mark.anyio
+    async def test_last_ai_message_extracts_mixed_content_without_extra_newlines(self, journal_setup):
+        j, _ = journal_setup
+        j.on_llm_end(
+            _make_llm_response(
+                [
+                    {"type": "text", "text": "First "},
+                    {"type": "text", "content": "second"},
+                    " third",
+                    {"type": "image", "url": "ignored"},
+                ]
+            ),
+            run_id=uuid4(),
+            parent_run_id=None,
+            tags=["lead_agent"],
+        )
+
+        data = j.get_completion_data()
+
+        assert data["message_count"] == 1
+        assert data["last_ai_message"] == "First second third"
+
+    @pytest.mark.anyio
+    async def test_last_ai_message_extracts_mapping_content(self, journal_setup):
+        j, _ = journal_setup
+        j.on_llm_end(_make_llm_response({"content": "Nested answer"}), run_id=uuid4(), parent_run_id=None, tags=["lead_agent"])
+
+        data = j.get_completion_data()
+
+        assert data["message_count"] == 1
+        assert data["last_ai_message"] == "Nested answer"
+
+    @pytest.mark.anyio
+    async def test_duplicate_llm_run_id_does_not_double_count_message_summary(self, journal_setup):
+        j, _ = journal_setup
+        run_id = uuid4()
+
+        j.on_llm_end(_make_llm_response("Answer", usage=None), run_id=run_id, parent_run_id=None, tags=["lead_agent"])
+        j.on_llm_end(
+            _make_llm_response("Answer", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}),
+            run_id=run_id,
+            parent_run_id=None,
+            tags=["lead_agent"],
+        )
+
+        data = j.get_completion_data()
+
+        assert data["message_count"] == 1
+        assert data["last_ai_message"] == "Answer"
+        assert data["total_tokens"] == 15
+
+    @pytest.mark.anyio
+    async def test_subagent_ai_does_not_overwrite_lead_last_ai_message(self, journal_setup):
+        j, _ = journal_setup
+        j.on_llm_end(_make_llm_response("Lead answer"), run_id=uuid4(), parent_run_id=None, tags=["lead_agent"])
+        j.on_llm_end(_make_llm_response("Subagent detail"), run_id=uuid4(), parent_run_id=None, tags=["subagent:research"])
+
+        data = j.get_completion_data()
+
+        assert data["message_count"] == 2
+        assert data["last_ai_message"] == "Lead answer"
+
     @pytest.mark.anyio
     async def test_get_completion_data(self, journal_setup):
         j, _ = journal_setup

From de253e4a0a9e4bcfa5fb3ce20e280fc8737ec5fc Mon Sep 17 00:00:00 2001
From: Yi Tang <6054101+yitang@users.noreply.github.com>
Date: Mon, 11 May 2026 14:45:18 +0100
Subject: [PATCH 4/8]   feat(run): Propagates `model_name` from the gateway
 request through the runtime and persistence stack to the SQLite database.
 (#2775)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(run): propagate model_name from gateway request context to persistence layer

Pass model_name through the full run creation pipeline — from
RunCreateRequest.context in the gateway, through RunManager, to the
RunStore interface and SQL persistence. This enables client-specified
model selection to be recorded per-run in the database.

* feat(run): add model allowlist validation and effective model name capture

- Validate model_name against allowlist in gateway services.py using
  get_app_config().get_model_config()
- Truncate model_name to 128 chars to match DB column constraint
- In worker.py, capture effective model name from agent.metadata after
  agent creation and persist if resolved differently than requested

* feat(run): add defense-in-depth model_name normalization and round-trip persistence tests

- Add _normalize_model_name() to RunRepository for whitespace stripping
  and 128-char truncation before DB writes.
- Add round-trip unit tests for model_name creation and default None
  in test_run_manager.py.

* fix(run): coerce non-string model_name values before strip/truncate in _normalize_model_name

* fix(gateway): add runtime type guard for model_name coercion in gateway services

Add isinstance check and str() coercion before calling .strip() to prevent
AttributeError when non-string types (int, None, etc.) flow through the
gateway. Paired with SQL integration test for end-to-end model_name
persistence across gateway → langgraph → persistence layer.

* fix(run): drop Alembic migration for model_name (no-op) and expose public update method on RunManager

- Drop a1b2c3d4e5f6 migration: model_name already exists in RunRow schema
  and is auto-created via Base.metadata.create_all() at startup
- Add update_model_name() public method to RunManager to replace the private
  _persist_to_store call in worker.py, preserving internal locking/persistence
---
 backend/app/gateway/services.py               | 19 +++++++
 .../harness/deerflow/persistence/run/sql.py   | 14 +++++
 .../harness/deerflow/runtime/runs/manager.py  | 16 ++++++
 .../deerflow/runtime/runs/store/base.py       |  1 +
 .../deerflow/runtime/runs/store/memory.py     |  2 +
 .../harness/deerflow/runtime/runs/worker.py   | 11 ++++
 backend/tests/test_run_manager.py             | 51 +++++++++++++++++++
 backend/tests/test_run_repository.py          | 29 +++++++++++
 8 files changed, 143 insertions(+)

diff --git a/backend/app/gateway/services.py b/backend/app/gateway/services.py
index 0cbea4faf..96521b86f 100644
--- a/backend/app/gateway/services.py
+++ b/backend/app/gateway/services.py
@@ -19,6 +19,7 @@ from langchain_core.messages import HumanMessage
 
 from app.gateway.deps import get_run_context, get_run_manager, get_stream_bridge
 from app.gateway.utils import sanitize_log_param
+from deerflow.config.app_config import get_app_config
 from deerflow.runtime import (
     END_SENTINEL,
     HEARTBEAT_SENTINEL,
@@ -267,6 +268,23 @@ async def start_run(
 
     disconnect = DisconnectMode.cancel if body.on_disconnect == "cancel" else DisconnectMode.continue_
 
+    body_context = getattr(body, "context", None) or {}
+    model_name = body_context.get("model_name")
+
+    # Coerce non-string model_name values to str before truncation.
+    if model_name is not None and not isinstance(model_name, str):
+        model_name = str(model_name)
+
+    # Validate model against the allowlist when a model_name is provided.
+    if model_name:
+        app_config = get_app_config()
+        resolved = app_config.get_model_config(model_name)
+        if resolved is None:
+            raise HTTPException(
+                status_code=400,
+                detail=f"Model {model_name!r} is not in the configured model allowlist",
+            )
+
     try:
         record = await run_mgr.create_or_reject(
             thread_id,
@@ -275,6 +293,7 @@ async def start_run(
             metadata=body.metadata or {},
             kwargs={"input": body.input, "config": body.config},
             multitask_strategy=body.multitask_strategy,
+            model_name=model_name,
         )
     except ConflictError as exc:
         raise HTTPException(status_code=409, detail=str(exc)) from exc
diff --git a/backend/packages/harness/deerflow/persistence/run/sql.py b/backend/packages/harness/deerflow/persistence/run/sql.py
index fcd1a3411..430fbe4f6 100644
--- a/backend/packages/harness/deerflow/persistence/run/sql.py
+++ b/backend/packages/harness/deerflow/persistence/run/sql.py
@@ -23,6 +23,18 @@ class RunRepository(RunStore):
     def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
         self._sf = session_factory
 
+    @staticmethod
+    def _normalize_model_name(model_name: str | None) -> str | None:
+        """Normalize model_name for storage: strip whitespace, truncate to 128 chars."""
+        if model_name is None:
+            return None
+        if not isinstance(model_name, str):
+            model_name = str(model_name)
+        normalized = model_name.strip()
+        if len(normalized) > 128:
+            normalized = normalized[:128]
+        return normalized
+
     @staticmethod
     def _safe_json(obj: Any) -> Any:
         """Ensure obj is JSON-serializable. Falls back to model_dump() or str()."""
@@ -70,6 +82,7 @@ class RunRepository(RunStore):
         thread_id,
         assistant_id=None,
         user_id: str | None | _AutoSentinel = AUTO,
+        model_name: str | None = None,
         status="pending",
         multitask_strategy="reject",
         metadata=None,
@@ -85,6 +98,7 @@ class RunRepository(RunStore):
             thread_id=thread_id,
             assistant_id=assistant_id,
             user_id=resolved_user_id,
+            model_name=self._normalize_model_name(model_name),
             status=status,
             multitask_strategy=multitask_strategy,
             metadata_json=self._safe_json(metadata) or {},
diff --git a/backend/packages/harness/deerflow/runtime/runs/manager.py b/backend/packages/harness/deerflow/runtime/runs/manager.py
index 533342c87..50dc594ab 100644
--- a/backend/packages/harness/deerflow/runtime/runs/manager.py
+++ b/backend/packages/harness/deerflow/runtime/runs/manager.py
@@ -36,6 +36,7 @@ class RunRecord:
     abort_event: asyncio.Event = field(default_factory=asyncio.Event, repr=False)
     abort_action: str = "interrupt"
     error: str | None = None
+    model_name: str | None = None
 
 
 class RunManager:
@@ -65,6 +66,7 @@ class RunManager:
                 metadata=record.metadata or {},
                 kwargs=record.kwargs or {},
                 created_at=record.created_at,
+                model_name=record.model_name,
             )
         except Exception:
             logger.warning("Failed to persist run %s to store", record.run_id, exc_info=True)
@@ -137,6 +139,18 @@ class RunManager:
                 logger.warning("Failed to persist status update for run %s", run_id, exc_info=True)
         logger.info("Run %s -> %s", run_id, status.value)
 
+    async def update_model_name(self, run_id: str, model_name: str | None) -> None:
+        """Update the model name for a run."""
+        async with self._lock:
+            record = self._runs.get(run_id)
+            if record is None:
+                logger.warning("update_model_name called for unknown run %s", run_id)
+                return
+            record.model_name = model_name
+            record.updated_at = _now_iso()
+        await self._persist_to_store(record)
+        logger.info("Run %s model_name=%s", run_id, model_name)
+
     async def cancel(self, run_id: str, *, action: str = "interrupt") -> bool:
         """Request cancellation of a run.
 
@@ -171,6 +185,7 @@ class RunManager:
         metadata: dict | None = None,
         kwargs: dict | None = None,
         multitask_strategy: str = "reject",
+        model_name: str | None = None,
     ) -> RunRecord:
         """Atomically check for inflight runs and create a new one.
 
@@ -221,6 +236,7 @@ class RunManager:
                 kwargs=kwargs or {},
                 created_at=now,
                 updated_at=now,
+                model_name=model_name,
             )
             self._runs[run_id] = record
 
diff --git a/backend/packages/harness/deerflow/runtime/runs/store/base.py b/backend/packages/harness/deerflow/runtime/runs/store/base.py
index 518a1903c..d3c10eba6 100644
--- a/backend/packages/harness/deerflow/runtime/runs/store/base.py
+++ b/backend/packages/harness/deerflow/runtime/runs/store/base.py
@@ -23,6 +23,7 @@ class RunStore(abc.ABC):
         thread_id: str,
         assistant_id: str | None = None,
         user_id: str | None = None,
+        model_name: str | None = None,
         status: str = "pending",
         multitask_strategy: str = "reject",
         metadata: dict[str, Any] | None = None,
diff --git a/backend/packages/harness/deerflow/runtime/runs/store/memory.py b/backend/packages/harness/deerflow/runtime/runs/store/memory.py
index 5a14af3df..e41147e3e 100644
--- a/backend/packages/harness/deerflow/runtime/runs/store/memory.py
+++ b/backend/packages/harness/deerflow/runtime/runs/store/memory.py
@@ -22,6 +22,7 @@ class MemoryRunStore(RunStore):
         thread_id,
         assistant_id=None,
         user_id=None,
+        model_name=None,
         status="pending",
         multitask_strategy="reject",
         metadata=None,
@@ -35,6 +36,7 @@ class MemoryRunStore(RunStore):
             "thread_id": thread_id,
             "assistant_id": assistant_id,
             "user_id": user_id,
+            "model_name": model_name,
             "status": status,
             "multitask_strategy": multitask_strategy,
             "metadata": metadata or {},
diff --git a/backend/packages/harness/deerflow/runtime/runs/worker.py b/backend/packages/harness/deerflow/runtime/runs/worker.py
index 2aecb9a1b..f78d425a2 100644
--- a/backend/packages/harness/deerflow/runtime/runs/worker.py
+++ b/backend/packages/harness/deerflow/runtime/runs/worker.py
@@ -230,6 +230,17 @@ async def run_agent(
         else:
             agent = agent_factory(config=runnable_config)
 
+        # Capture the effective (resolved) model name from the agent's metadata.
+        # _resolve_model_name in agent.py may return the default model if the
+        # requested name is not in the allowlist — this update ensures the
+        # persisted model_name reflects the actual model used.
+        if record.model_name is not None:
+            resolved = getattr(agent, "metadata", {}) or {}
+            if isinstance(resolved, dict):
+                effective = resolved.get("model_name")
+                if effective and effective != record.model_name:
+                    await run_manager.update_model_name(record.run_id, effective)
+
         # 4. Attach checkpointer and store
         if checkpointer is not None:
             agent.checkpointer = checkpointer
diff --git a/backend/tests/test_run_manager.py b/backend/tests/test_run_manager.py
index 58ecf1f26..98cd58264 100644
--- a/backend/tests/test_run_manager.py
+++ b/backend/tests/test_run_manager.py
@@ -5,6 +5,7 @@ import re
 import pytest
 
 from deerflow.runtime import RunManager, RunStatus
+from deerflow.runtime.runs.store.memory import MemoryRunStore
 
 ISO_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}")
 
@@ -141,3 +142,53 @@ async def test_create_defaults(manager: RunManager):
     assert record.kwargs == {}
     assert record.multitask_strategy == "reject"
     assert record.assistant_id is None
+
+
+@pytest.mark.anyio
+async def test_model_name_create_or_reject():
+    """create_or_reject should accept and persist model_name."""
+    from deerflow.runtime.runs.schemas import DisconnectMode
+
+    store = MemoryRunStore()
+    mgr = RunManager(store=store)
+
+    record = await mgr.create_or_reject(
+        "thread-1",
+        assistant_id="lead_agent",
+        on_disconnect=DisconnectMode.cancel,
+        metadata={"key": "val"},
+        kwargs={"input": {}},
+        multitask_strategy="reject",
+        model_name="anthropic.claude-sonnet-4-20250514-v1:0",
+    )
+    assert record.model_name == "anthropic.claude-sonnet-4-20250514-v1:0"
+    assert record.status == RunStatus.pending
+
+    # Verify model_name was persisted to store
+    stored = await store.get(record.run_id)
+    assert stored is not None
+    assert stored["model_name"] == "anthropic.claude-sonnet-4-20250514-v1:0"
+
+    # Verify retrieval returns the model_name via in-memory record
+    fetched = mgr.get(record.run_id)
+    assert fetched is not None
+    assert fetched.model_name == "anthropic.claude-sonnet-4-20250514-v1:0"
+
+
+@pytest.mark.anyio
+async def test_model_name_default_is_none():
+    """create_or_reject without model_name should default to None."""
+    from deerflow.runtime.runs.schemas import DisconnectMode
+
+    store = MemoryRunStore()
+    mgr = RunManager(store=store)
+
+    record = await mgr.create_or_reject(
+        "thread-1",
+        on_disconnect=DisconnectMode.cancel,
+        model_name=None,
+    )
+    assert record.model_name is None
+
+    stored = await store.get(record.run_id)
+    assert stored["model_name"] is None
diff --git a/backend/tests/test_run_repository.py b/backend/tests/test_run_repository.py
index bff49206d..6fd534829 100644
--- a/backend/tests/test_run_repository.py
+++ b/backend/tests/test_run_repository.py
@@ -249,3 +249,32 @@ class TestRunRepository:
         rows = await repo.list_by_thread("t1", user_id=None)
         assert len(rows) == 2
         await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_model_name_persistence(self, tmp_path):
+        """RunRepository should persist, normalize, and truncate model_name correctly via SQL."""
+        from deerflow.persistence.engine import get_session_factory, init_engine
+
+        url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+        await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+        repo = RunRepository(get_session_factory())
+
+        await repo.put("run-1", thread_id="thread-1", model_name="gpt-4o")
+        row = await repo.get("run-1")
+        assert row is not None
+        assert row["model_name"] == "gpt-4o"
+
+        long_name = "a" * 200
+        await repo.put("run-2", thread_id="thread-1", model_name=long_name)
+        row2 = await repo.get("run-2")
+        assert row2["model_name"] == "a" * 128
+
+        await repo.put("run-3", thread_id="thread-1", model_name=123)
+        row3 = await repo.get("run-3")
+        assert row3["model_name"] == "123"
+
+        await repo.put("run-4", thread_id="thread-1", model_name=None)
+        row4 = await repo.get("run-4")
+        assert row4["model_name"] is None
+
+        await _cleanup()

From bedbf2291e182a53c7be6bece9485d44300d1925 Mon Sep 17 00:00:00 2001
From: AochenShen99 <142667174+ShenAC-SAC@users.noreply.github.com>
Date: Mon, 11 May 2026 22:14:13 +0800
Subject: [PATCH 5/8] fix(harness): wrap async-only config tools for sync
 client execution (#2878)

* fix(harness): wrap async-only config tools for sync clients

* refactor(tools): share async tool sync wrapper
---
 .../packages/harness/deerflow/mcp/tools.py    | 45 +------------------
 .../deerflow/tools/skill_manage_tool.py       |  4 +-
 .../packages/harness/deerflow/tools/sync.py   | 36 +++++++++++++++
 .../packages/harness/deerflow/tools/tools.py  | 10 ++++-
 backend/tests/test_mcp_sync_wrapper.py        | 16 +++----
 backend/tests/test_tool_deduplication.py      | 42 ++++++++++++++++-
 6 files changed, 98 insertions(+), 55 deletions(-)
 create mode 100644 backend/packages/harness/deerflow/tools/sync.py

diff --git a/backend/packages/harness/deerflow/mcp/tools.py b/backend/packages/harness/deerflow/mcp/tools.py
index bcd50c645..d27641692 100644
--- a/backend/packages/harness/deerflow/mcp/tools.py
+++ b/backend/packages/harness/deerflow/mcp/tools.py
@@ -1,11 +1,6 @@
 """Load MCP tools using langchain-mcp-adapters."""
 
-import asyncio
-import atexit
-import concurrent.futures
 import logging
-from collections.abc import Callable
-from typing import Any
 
 from langchain_core.tools import BaseTool
 
@@ -13,46 +8,10 @@ from deerflow.config.extensions_config import ExtensionsConfig
 from deerflow.mcp.client import build_servers_config
 from deerflow.mcp.oauth import build_oauth_tool_interceptor, get_initial_oauth_headers
 from deerflow.reflection import resolve_variable
+from deerflow.tools.sync import make_sync_tool_wrapper
 
 logger = logging.getLogger(__name__)
 
-# Global thread pool for sync tool invocation in async environments
-_SYNC_TOOL_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=10, thread_name_prefix="mcp-sync-tool")
-
-# Register shutdown hook for the global executor
-atexit.register(lambda: _SYNC_TOOL_EXECUTOR.shutdown(wait=False))
-
-
-def _make_sync_tool_wrapper(coro: Callable[..., Any], tool_name: str) -> Callable[..., Any]:
-    """Build a synchronous wrapper for an asynchronous tool coroutine.
-
-    Args:
-        coro: The tool's asynchronous coroutine.
-        tool_name: Name of the tool (for logging).
-
-    Returns:
-        A synchronous function that correctly handles nested event loops.
-    """
-
-    def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
-        try:
-            loop = asyncio.get_running_loop()
-        except RuntimeError:
-            loop = None
-
-        try:
-            if loop is not None and loop.is_running():
-                # Use global executor to avoid nested loop issues and improve performance
-                future = _SYNC_TOOL_EXECUTOR.submit(asyncio.run, coro(*args, **kwargs))
-                return future.result()
-            else:
-                return asyncio.run(coro(*args, **kwargs))
-        except Exception as e:
-            logger.error(f"Error invoking MCP tool '{tool_name}' via sync wrapper: {e}", exc_info=True)
-            raise
-
-    return sync_wrapper
-
 
 async def get_mcp_tools() -> list[BaseTool]:
     """Get all tools from enabled MCP servers.
@@ -126,7 +85,7 @@ async def get_mcp_tools() -> list[BaseTool]:
         # Patch tools to support sync invocation, as deerflow client streams synchronously
         for tool in tools:
             if getattr(tool, "func", None) is None and getattr(tool, "coroutine", None) is not None:
-                tool.func = _make_sync_tool_wrapper(tool.coroutine, tool.name)
+                tool.func = make_sync_tool_wrapper(tool.coroutine, tool.name)
 
         return tools
 
diff --git a/backend/packages/harness/deerflow/tools/skill_manage_tool.py b/backend/packages/harness/deerflow/tools/skill_manage_tool.py
index 46865242c..2a39732bc 100644
--- a/backend/packages/harness/deerflow/tools/skill_manage_tool.py
+++ b/backend/packages/harness/deerflow/tools/skill_manage_tool.py
@@ -10,11 +10,11 @@ from weakref import WeakValueDictionary
 from langchain.tools import tool
 
 from deerflow.agents.lead_agent.prompt import refresh_skills_system_prompt_cache_async
-from deerflow.mcp.tools import _make_sync_tool_wrapper
 from deerflow.skills.security_scanner import scan_skill_content
 from deerflow.skills.storage import get_or_new_skill_storage
 from deerflow.skills.storage.skill_storage import SkillStorage
 from deerflow.skills.types import SKILL_MD_FILE
+from deerflow.tools.sync import make_sync_tool_wrapper
 from deerflow.tools.types import Runtime
 
 logger = logging.getLogger(__name__)
@@ -235,4 +235,4 @@ async def skill_manage_tool(
     )
 
 
-skill_manage_tool.func = _make_sync_tool_wrapper(_skill_manage_impl, "skill_manage")
+skill_manage_tool.func = make_sync_tool_wrapper(_skill_manage_impl, "skill_manage")
diff --git a/backend/packages/harness/deerflow/tools/sync.py b/backend/packages/harness/deerflow/tools/sync.py
new file mode 100644
index 000000000..c2b80781a
--- /dev/null
+++ b/backend/packages/harness/deerflow/tools/sync.py
@@ -0,0 +1,36 @@
+"""Utilities for invoking async tools from synchronous agent paths."""
+
+import asyncio
+import atexit
+import concurrent.futures
+import logging
+from collections.abc import Callable
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# Shared thread pool for sync tool invocation in async environments.
+_SYNC_TOOL_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=10, thread_name_prefix="tool-sync")
+
+atexit.register(lambda: _SYNC_TOOL_EXECUTOR.shutdown(wait=False))
+
+
+def make_sync_tool_wrapper(coro: Callable[..., Any], tool_name: str) -> Callable[..., Any]:
+    """Build a synchronous wrapper for an asynchronous tool coroutine."""
+
+    def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            loop = None
+
+        try:
+            if loop is not None and loop.is_running():
+                future = _SYNC_TOOL_EXECUTOR.submit(asyncio.run, coro(*args, **kwargs))
+                return future.result()
+            return asyncio.run(coro(*args, **kwargs))
+        except Exception as e:
+            logger.error("Error invoking tool %r via sync wrapper: %s", tool_name, e, exc_info=True)
+            raise
+
+    return sync_wrapper
diff --git a/backend/packages/harness/deerflow/tools/tools.py b/backend/packages/harness/deerflow/tools/tools.py
index 14d93e65f..01bfce43f 100644
--- a/backend/packages/harness/deerflow/tools/tools.py
+++ b/backend/packages/harness/deerflow/tools/tools.py
@@ -8,6 +8,7 @@ from deerflow.reflection import resolve_variable
 from deerflow.sandbox.security import is_host_bash_allowed
 from deerflow.tools.builtins import ask_clarification_tool, present_file_tool, task_tool, view_image_tool
 from deerflow.tools.builtins.tool_search import reset_deferred_registry
+from deerflow.tools.sync import make_sync_tool_wrapper
 
 logger = logging.getLogger(__name__)
 
@@ -33,6 +34,13 @@ def _is_host_bash_tool(tool: object) -> bool:
     return False
 
 
+def _ensure_sync_invocable_tool(tool: BaseTool) -> BaseTool:
+    """Attach a sync wrapper to async-only tools used by sync agent callers."""
+    if getattr(tool, "func", None) is None and getattr(tool, "coroutine", None) is not None:
+        tool.func = make_sync_tool_wrapper(tool.coroutine, tool.name)
+    return tool
+
+
 def get_available_tools(
     groups: list[str] | None = None,
     include_mcp: bool = True,
@@ -77,7 +85,7 @@ def get_available_tools(
                 cfg.use,
             )
 
-    loaded_tools = [t for _, t in loaded_tools_raw]
+    loaded_tools = [_ensure_sync_invocable_tool(t) for _, t in loaded_tools_raw]
 
     # Conditionally add tools based on config
     builtin_tools = BUILTIN_TOOLS.copy()
diff --git a/backend/tests/test_mcp_sync_wrapper.py b/backend/tests/test_mcp_sync_wrapper.py
index 376d1a790..285200781 100644
--- a/backend/tests/test_mcp_sync_wrapper.py
+++ b/backend/tests/test_mcp_sync_wrapper.py
@@ -5,7 +5,8 @@ import pytest
 from langchain_core.tools import StructuredTool
 from pydantic import BaseModel, Field
 
-from deerflow.mcp.tools import _make_sync_tool_wrapper, get_mcp_tools
+from deerflow.mcp.tools import get_mcp_tools
+from deerflow.tools.sync import make_sync_tool_wrapper
 
 
 class MockArgs(BaseModel):
@@ -51,14 +52,13 @@ def test_mcp_tool_sync_wrapper_generation():
 
 
 def test_mcp_tool_sync_wrapper_in_running_loop():
-    """Test the actual helper function from production code (Fix for Comment 1 & 3)."""
+    """Test the shared sync wrapper from production code."""
 
     async def mock_coro(x: int):
         await asyncio.sleep(0.01)
         return f"async_result: {x}"
 
-    # Test the real helper function exported from deerflow.mcp.tools
-    sync_func = _make_sync_tool_wrapper(mock_coro, "test_tool")
+    sync_func = make_sync_tool_wrapper(mock_coro, "test_tool")
 
     async def run_in_loop():
         # This call should succeed due to ThreadPoolExecutor in the real helper
@@ -70,16 +70,16 @@ def test_mcp_tool_sync_wrapper_in_running_loop():
 
 
 def test_mcp_tool_sync_wrapper_exception_logging():
-    """Test the actual helper's error logging (Fix for Comment 3)."""
+    """Test the shared sync wrapper's error logging."""
 
     async def error_coro():
         raise ValueError("Tool failure")
 
-    sync_func = _make_sync_tool_wrapper(error_coro, "error_tool")
+    sync_func = make_sync_tool_wrapper(error_coro, "error_tool")
 
-    with patch("deerflow.mcp.tools.logger.error") as mock_log_error:
+    with patch("deerflow.tools.sync.logger.error") as mock_log_error:
         with pytest.raises(ValueError, match="Tool failure"):
             sync_func()
         mock_log_error.assert_called_once()
         # Verify the tool name is in the log message
-        assert "error_tool" in mock_log_error.call_args[0][0]
+        assert mock_log_error.call_args[0][1] == "error_tool"
diff --git a/backend/tests/test_tool_deduplication.py b/backend/tests/test_tool_deduplication.py
index 35ec0bea6..ed9efffaf 100644
--- a/backend/tests/test_tool_deduplication.py
+++ b/backend/tests/test_tool_deduplication.py
@@ -10,7 +10,8 @@ from __future__ import annotations
 
 from unittest.mock import MagicMock, patch
 
-from langchain_core.tools import BaseTool, tool
+from langchain_core.tools import BaseTool, StructuredTool, tool
+from pydantic import BaseModel, Field
 
 from deerflow.tools.tools import get_available_tools
 
@@ -19,6 +20,10 @@ from deerflow.tools.tools import get_available_tools
 # ---------------------------------------------------------------------------
 
 
+class AsyncToolArgs(BaseModel):
+    x: int = Field(..., description="test input")
+
+
 @tool
 def _tool_alpha(x: str) -> str:
     """Alpha tool."""
@@ -52,10 +57,45 @@ def _make_minimal_config(tools):
     config.tools = tools
     config.models = []
     config.tool_search.enabled = False
+    config.skill_evolution.enabled = False
     config.sandbox = MagicMock()
+    config.acp_agents = {}
     return config
 
 
+@patch("deerflow.tools.tools.get_app_config")
+@patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True)
+@patch("deerflow.tools.tools.reset_deferred_registry")
+def test_config_loaded_async_only_tool_gets_sync_wrapper(mock_reset, mock_bash, mock_cfg):
+    """Config-loaded async-only tools can still be invoked by sync clients."""
+
+    async def async_tool_impl(x: int) -> str:
+        return f"result: {x}"
+
+    async_tool = StructuredTool(
+        name="async_tool",
+        description="Async-only test tool.",
+        args_schema=AsyncToolArgs,
+        func=None,
+        coroutine=async_tool_impl,
+    )
+    tool_cfg = MagicMock()
+    tool_cfg.name = "async_tool"
+    tool_cfg.group = "test"
+    tool_cfg.use = "tests.fake:async_tool"
+    mock_cfg.return_value = _make_minimal_config([tool_cfg])
+
+    with (
+        patch("deerflow.tools.tools.resolve_variable", return_value=async_tool),
+        patch("deerflow.tools.tools.BUILTIN_TOOLS", []),
+    ):
+        result = get_available_tools(include_mcp=False, app_config=mock_cfg.return_value)
+
+    assert async_tool in result
+    assert async_tool.func is not None
+    assert async_tool.invoke({"x": 42}) == "result: 42"
+
+
 @patch("deerflow.tools.tools.get_app_config")
 @patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True)
 @patch("deerflow.tools.tools.reset_deferred_registry")

From 1f978393ec6558b3c91f30475f28b805ad0bb803 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 May 2026 10:35:34 +0800
Subject: [PATCH 6/8] chore(deps): bump urllib3 from 2.6.3 to 2.7.0 in /backend
 (#2898)

Bumps [urllib3](https://github.com/urllib3/urllib3) from 2.6.3 to 2.7.0.
- [Release notes](https://github.com/urllib3/urllib3/releases)
- [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst)
- [Commits](https://github.com/urllib3/urllib3/compare/2.6.3...2.7.0)

---
updated-dependencies:
- dependency-name: urllib3
  dependency-version: 2.7.0
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 backend/uv.lock | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/backend/uv.lock b/backend/uv.lock
index 64cab46d9..e144fb07e 100644
--- a/backend/uv.lock
+++ b/backend/uv.lock
@@ -4224,11 +4224,11 @@ wheels = [
 
 [[package]]
 name = "urllib3"
-version = "2.6.3"
+version = "2.7.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/53/0c/06f8b233b8fd13b9e5ee11424ef85419ba0d8ba0b3138bf360be2ff56953/urllib3-2.7.0.tar.gz", hash = "sha256:231e0ec3b63ceb14667c67be60f2f2c40a518cb38b03af60abc813da26505f4c", size = 433602, upload-time = "2026-05-07T16:13:18.596Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/3e/5db95bcf282c52709639744ca2a8b149baccf648e39c8cc87553df9eae0c/urllib3-2.7.0-py3-none-any.whl", hash = "sha256:9fb4c81ebbb1ce9531cce37674bbc6f1360472bc18ca9a553ede278ef7276897", size = 131087, upload-time = "2026-05-07T16:13:17.151Z" },
 ]
 
 [[package]]

From 0009655454cda708654ebe41c6ede5cb9e3fc760 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Tue, 12 May 2026 10:45:40 +0800
Subject: [PATCH 7/8] chore(deps): bump next from 16.1.7 to 16.2.6 in /frontend
 (#2899)

Bumps [next](https://github.com/vercel/next.js) from 16.1.7 to 16.2.6.
- [Release notes](https://github.com/vercel/next.js/releases)
- [Changelog](https://github.com/vercel/next.js/blob/canary/release.js)
- [Commits](https://github.com/vercel/next.js/compare/v16.1.7...v16.2.6)

---
updated-dependencies:
- dependency-name: next
  dependency-version: 16.2.6
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 frontend/package.json   |   2 +-
 frontend/pnpm-lock.yaml | 154 ++++++++++++++++++++++------------------
 2 files changed, 85 insertions(+), 71 deletions(-)

diff --git a/frontend/package.json b/frontend/package.json
index 2ce4e2f6d..0a46ee452 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -68,7 +68,7 @@
     "lucide-react": "^0.562.0",
     "motion": "^12.26.2",
     "nanoid": "^5.1.6",
-    "next": "^16.1.7",
+    "next": "^16.2.6",
     "next-themes": "^0.4.6",
     "nextra": "^4.6.1",
     "nextra-theme-docs": "^4.6.1",
diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml
index d27c6687c..8c80061c9 100644
--- a/frontend/pnpm-lock.yaml
+++ b/frontend/pnpm-lock.yaml
@@ -156,17 +156,17 @@ importers:
         specifier: ^5.1.6
         version: 5.1.6
       next:
-        specifier: ^16.1.7
-        version: 16.1.7(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
+        specifier: ^16.2.6
+        version: 16.2.6(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
       next-themes:
         specifier: ^0.4.6
         version: 0.4.6(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
       nextra:
         specifier: ^4.6.1
-        version: 4.6.1(next@16.1.7(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(typescript@5.9.3)
+        version: 4.6.1(next@16.2.6(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(typescript@5.9.3)
       nextra-theme-docs:
         specifier: ^4.6.1
-        version: 4.6.1(@types/react@19.2.13)(next@16.1.7(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(nextra@4.6.1(next@16.1.7(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(typescript@5.9.3))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(use-sync-external-store@1.6.0(react@19.2.4))
+        version: 4.6.1(@types/react@19.2.13)(next@16.2.6(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(nextra@4.6.1(next@16.2.6(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(typescript@5.9.3))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(use-sync-external-store@1.6.0(react@19.2.4))
       nuxt-og-image:
         specifier: ^5.1.13
         version: 5.1.13(@unhead/vue@2.1.4(vue@3.5.28(typescript@5.9.3)))(unstorage@1.17.4)(vite@7.3.1(@types/node@20.19.33)(jiti@2.6.1)(lightningcss@1.30.2)(yaml@2.8.3))(vue@3.5.28(typescript@5.9.3))
@@ -437,8 +437,8 @@ packages:
   '@emnapi/core@1.8.1':
     resolution: {integrity: sha512-AvT9QFpxK0Zd8J0jopedNm+w/2fIzvtPKPjqyw9jwvBaReTTqPBk9Hixaz7KbjimP+QNz605/XnjFcDAL2pqBg==}
 
-  '@emnapi/runtime@1.9.0':
-    resolution: {integrity: sha512-QN75eB0IH2ywSpRpNddCRfQIhmJYBCJ1x5Lb3IscKAL8bMnVAKnRg8dCoXbHzVLLH7P38N2Z3mtulB7W0J0FKw==}
+  '@emnapi/runtime@1.10.0':
+    resolution: {integrity: sha512-ewvYlk86xUoGI0zQRNq/mC+16R1QeDlKQy21Ki3oSYXNgLb45GV1P6A0M+/s6nyCuNDqe5VpaY84BzXGwVbwFA==}
 
   '@emnapi/wasi-threads@1.1.0':
     resolution: {integrity: sha512-WI0DdZ8xFSbgMjR1sFsKABJ/C5OnRrjT06JXbZKexJGrDuPTzZdDYfFlsgcCXCyf+suG5QU2e/y1Wo2V/OapLQ==}
@@ -1018,56 +1018,56 @@ packages:
   '@napi-rs/wasm-runtime@0.2.12':
     resolution: {integrity: sha512-ZVWUcfwY4E/yPitQJl481FjFo3K22D6qF0DuFH6Y/nbnE11GY5uguDxZMGXPQ8WQ0128MXQD7TnfHyK4oWoIJQ==}
 
-  '@next/env@16.1.7':
-    resolution: {integrity: sha512-rJJbIdJB/RQr2F1nylZr/PJzamvNNhfr3brdKP6s/GW850jbtR70QlSfFselvIBbcPUOlQwBakexjFzqLzF6pg==}
+  '@next/env@16.2.6':
+    resolution: {integrity: sha512-gd8HoHN4ufj73WmR3JmVolrpJR47ILK6LouP5xElPglaVxir6e1a7VzvTvDWkOoPXT9rkkTzyCxBu4yeZfZwcw==}
 
   '@next/eslint-plugin-next@15.5.12':
     resolution: {integrity: sha512-+ZRSDFTv4aC96aMb5E41rMjysx8ApkryevnvEYZvPZO52KvkqP5rNExLUXJFr9P4s0f3oqNQR6vopCZsPWKDcQ==}
 
-  '@next/swc-darwin-arm64@16.1.7':
-    resolution: {integrity: sha512-b2wWIE8sABdyafc4IM8r5Y/dS6kD80JRtOGrUiKTsACFQfWWgUQ2NwoUX1yjFMXVsAwcQeNpnucF2ZrujsBBPg==}
+  '@next/swc-darwin-arm64@16.2.6':
+    resolution: {integrity: sha512-ZJGkkcNfYgrrMkqOdZ7zoLa1TOy0qpcMfk/z4Mh/FKUz40gVO+HNQWqmLxf67Z5WB64DRp0dhEbyHfel+6sJUg==}
     engines: {node: '>= 10'}
     cpu: [arm64]
     os: [darwin]
 
-  '@next/swc-darwin-x64@16.1.7':
-    resolution: {integrity: sha512-zcnVaaZulS1WL0Ss38R5Q6D2gz7MtBu8GZLPfK+73D/hp4GFMrC2sudLky1QibfV7h6RJBJs/gOFvYP0X7UVlQ==}
+  '@next/swc-darwin-x64@16.2.6':
+    resolution: {integrity: sha512-v/YLBHIY132Ced3puBJ7YJKw1lqsCrgcNo2aRJlCEyQrrCeRJlvGlnmxhPxNQI3KE3N1DN5r9TPNPvka3nq5RQ==}
     engines: {node: '>= 10'}
     cpu: [x64]
     os: [darwin]
 
-  '@next/swc-linux-arm64-gnu@16.1.7':
-    resolution: {integrity: sha512-2ant89Lux/Q3VyC8vNVg7uBaFVP9SwoK2jJOOR0L8TQnX8CAYnh4uctAScy2Hwj2dgjVHqHLORQZJ2wH6VxhSQ==}
+  '@next/swc-linux-arm64-gnu@16.2.6':
+    resolution: {integrity: sha512-RPOvqlYBbcQjkz9VQQDZ2T2bARIjXZV1KFlt+V2Mr6SW/e4I9fcKsaA0hdyf2FHoTlsV2xnBd5Y912rP/1Ce6w==}
     engines: {node: '>= 10'}
     cpu: [arm64]
     os: [linux]
 
-  '@next/swc-linux-arm64-musl@16.1.7':
-    resolution: {integrity: sha512-uufcze7LYv0FQg9GnNeZ3/whYfo+1Q3HnQpm16o6Uyi0OVzLlk2ZWoY7j07KADZFY8qwDbsmFnMQP3p3+Ftprw==}
+  '@next/swc-linux-arm64-musl@16.2.6':
+    resolution: {integrity: sha512-URUTu1+dMkxJsPFgm+OeEvq9wf5sujw0EvgYy80TDGHTSLTnIHeqb0Eu8A3sC95IRgjejQL+kC4mw+4yPxiAXA==}
     engines: {node: '>= 10'}
     cpu: [arm64]
     os: [linux]
 
-  '@next/swc-linux-x64-gnu@16.1.7':
-    resolution: {integrity: sha512-KWVf2gxYvHtvuT+c4MBOGxuse5TD7DsMFYSxVxRBnOzok/xryNeQSjXgxSv9QpIVlaGzEn/pIuI6Koosx8CGWA==}
+  '@next/swc-linux-x64-gnu@16.2.6':
+    resolution: {integrity: sha512-DOj182mPV8G3UkrayLoREM5YEYI+Dk5wv7Ox9xl1fFibAELEsFD0lDPfHIeILlutMMfdyhlzYPELG3peuKaurw==}
     engines: {node: '>= 10'}
     cpu: [x64]
     os: [linux]
 
-  '@next/swc-linux-x64-musl@16.1.7':
-    resolution: {integrity: sha512-HguhaGwsGr1YAGs68uRKc4aGWxLET+NevJskOcCAwXbwj0fYX0RgZW2gsOCzr9S11CSQPIkxmoSbuVaBp4Z3dA==}
+  '@next/swc-linux-x64-musl@16.2.6':
+    resolution: {integrity: sha512-HKQ5SP/V/ub73UvF7n/zeJlxk2kLmtL7Wzrg4WfmkjmNos5onJ2tKu7yZOPdL18A6Svfn3max29ym+ry7NkK4g==}
     engines: {node: '>= 10'}
     cpu: [x64]
     os: [linux]
 
-  '@next/swc-win32-arm64-msvc@16.1.7':
-    resolution: {integrity: sha512-S0n3KrDJokKTeFyM/vGGGR8+pCmXYrjNTk2ZozOL1C/JFdfUIL9O1ATaJOl5r2POe56iRChbsszrjMAdWSv7kQ==}
+  '@next/swc-win32-arm64-msvc@16.2.6':
+    resolution: {integrity: sha512-LZXpTlPyS5v7HhSmnvsLGP3iIYgYOBnc8r8ArlT55sGHV89bR2HlDdBjWQ+PY6SJMmk8TuVGFuxalnP3k/0Dwg==}
     engines: {node: '>= 10'}
     cpu: [arm64]
     os: [win32]
 
-  '@next/swc-win32-x64-msvc@16.1.7':
-    resolution: {integrity: sha512-mwgtg8CNZGYm06LeEd+bNnOUfwOyNem/rOiP14Lsz+AnUY92Zq/LXwtebtUiaeVkhbroRCQ0c8GlR4UT1U+0yg==}
+  '@next/swc-win32-x64-msvc@16.2.6':
+    resolution: {integrity: sha512-F0+4i0h9J6C4eE3EAPWsoCk7UW/dbzOjyzxY0qnDUOYFu6FFmdZ6l97/XdV3/Nz3VYyO7UWjyEJUXkGqcoXfMA==}
     engines: {node: '>= 10'}
     cpu: [x64]
     os: [win32]
@@ -1912,6 +1912,9 @@ packages:
   '@swc/helpers@0.5.15':
     resolution: {integrity: sha512-JQ5TuMi45Owi4/BIMAJBoSQoOJu12oOk/gADqlcUL9JEdHB8vyjUSsxqeNXnmXHjYKMi2WcYtezGEEhqUI/E2g==}
 
+  '@swc/helpers@0.5.21':
+    resolution: {integrity: sha512-jI/VAmtdjB/RnI8GTnokyX7Ug8c+g+ffD6QRLa6XQewtnGyukKkKSk3wLTM3b5cjt1jNh9x0jfVlagdN2gDKQg==}
+
   '@t3-oss/env-core@0.12.0':
     resolution: {integrity: sha512-lOPj8d9nJJTt81mMuN9GMk8x5veOt7q9m11OSnCBJhwp1QrL/qR+M8Y467ULBSm9SunosryWNbmQQbgoiMgcdw==}
     peerDependencies:
@@ -2652,8 +2655,8 @@ packages:
   base64-js@1.5.1:
     resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
 
-  baseline-browser-mapping@2.10.8:
-    resolution: {integrity: sha512-PCLz/LXGBsNTErbtB6i5u4eLpHeMfi93aUv5duMmj6caNu6IphS4q6UevDnL36sZQv9lrP11dbPKGMaXPwMKfQ==}
+  baseline-browser-mapping@2.10.29:
+    resolution: {integrity: sha512-Asa2krT+XTPZINCS+2QcyS8WTkObE77RwkydwF7h6DmnKqbvlalz93m/dnphUyCa6SWSP51VgtEUf2FN+gelFQ==}
     engines: {node: '>=6.0.0'}
     hasBin: true
 
@@ -2710,8 +2713,8 @@ packages:
   camelize@1.0.1:
     resolution: {integrity: sha512-dU+Tx2fsypxTgtLoE36npi3UqcjSSMNYfkqgmoEhtZrraP5VWq0K7FkWVTYa8eMPtnU/G2txVsfdCJTn9uzpuQ==}
 
-  caniuse-lite@1.0.30001780:
-    resolution: {integrity: sha512-llngX0E7nQci5BPJDqoZSbuZ5Bcs9F5db7EtgfwBerX9XGtkkiO4NwfDDIRzHTTwcYC8vC7bmeUEPGrKlR/TkQ==}
+  caniuse-lite@1.0.30001792:
+    resolution: {integrity: sha512-hVLMUZFgR4JJ6ACt1uEESvQN1/dBVqPAKY0hgrV70eN3391K6juAfTjKZLKvOMsx8PxA7gsY1/tLMMTcfFLLpw==}
 
   canvas-confetti@1.9.4:
     resolution: {integrity: sha512-yxQbJkAVrFXWNbTUjPqjF7G+g6pDotOUHGbkZq2NELZUMDpiJ85rIEazVb8GTaAptNW2miJAXbs1BtioA251Pw==}
@@ -4389,8 +4392,8 @@ packages:
       react: ^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc
       react-dom: ^16.8 || ^17 || ^18 || ^19 || ^19.0.0-rc
 
-  next@16.1.7:
-    resolution: {integrity: sha512-WM0L7WrSvKwoLegLYr6V+mz+RIofqQgVAfHhMp9a88ms0cFX8iX9ew+snpWlSBwpkURJOUdvCEt3uLl3NNzvWg==}
+  next@16.2.6:
+    resolution: {integrity: sha512-qOVgKJg1+At15NpeUP+eJgCHvTCgXsogweq87Ri/Ix7PkqQHg4sdaXmSFqKlgaIXE4kW0g25LE68W87UANlHtw==}
     engines: {node: '>=20.9.0'}
     hasBin: true
     peerDependencies:
@@ -5013,6 +5016,11 @@ packages:
     engines: {node: '>=10'}
     hasBin: true
 
+  semver@7.8.0:
+    resolution: {integrity: sha512-AcM7dV/5ul4EekoQ29Agm5vri8JNqRyj39o0qpX6vDF2GZrtutZl5RwgD1XnZjiTAfncsJhMI48QQH3sN87YNA==}
+    engines: {node: '>=10'}
+    hasBin: true
+
   server-only@0.0.1:
     resolution: {integrity: sha512-qepMx2JxAa5jjfzxG79yPPq+8BuFToHd1hm7kI+Z4zAq1ftQiP7HcxMhDDItrbtwVeLg/cY2JnKnrcFkmiswNA==}
 
@@ -6066,7 +6074,7 @@ snapshots:
       tslib: 2.8.1
     optional: true
 
-  '@emnapi/runtime@1.9.0':
+  '@emnapi/runtime@1.10.0':
     dependencies:
       tslib: 2.8.1
     optional: true
@@ -6343,7 +6351,7 @@ snapshots:
 
   '@img/sharp-wasm32@0.34.5':
     dependencies:
-      '@emnapi/runtime': 1.9.0
+      '@emnapi/runtime': 1.10.0
     optional: true
 
   '@img/sharp-win32-arm64@0.34.5':
@@ -6598,38 +6606,38 @@ snapshots:
   '@napi-rs/wasm-runtime@0.2.12':
     dependencies:
       '@emnapi/core': 1.8.1
-      '@emnapi/runtime': 1.9.0
+      '@emnapi/runtime': 1.10.0
       '@tybys/wasm-util': 0.10.1
     optional: true
 
-  '@next/env@16.1.7': {}
+  '@next/env@16.2.6': {}
 
   '@next/eslint-plugin-next@15.5.12':
     dependencies:
       fast-glob: 3.3.1
 
-  '@next/swc-darwin-arm64@16.1.7':
+  '@next/swc-darwin-arm64@16.2.6':
     optional: true
 
-  '@next/swc-darwin-x64@16.1.7':
+  '@next/swc-darwin-x64@16.2.6':
     optional: true
 
-  '@next/swc-linux-arm64-gnu@16.1.7':
+  '@next/swc-linux-arm64-gnu@16.2.6':
     optional: true
 
-  '@next/swc-linux-arm64-musl@16.1.7':
+  '@next/swc-linux-arm64-musl@16.2.6':
     optional: true
 
-  '@next/swc-linux-x64-gnu@16.1.7':
+  '@next/swc-linux-x64-gnu@16.2.6':
     optional: true
 
-  '@next/swc-linux-x64-musl@16.1.7':
+  '@next/swc-linux-x64-musl@16.2.6':
     optional: true
 
-  '@next/swc-win32-arm64-msvc@16.1.7':
+  '@next/swc-win32-arm64-msvc@16.2.6':
     optional: true
 
-  '@next/swc-win32-x64-msvc@16.1.7':
+  '@next/swc-win32-x64-msvc@16.2.6':
     optional: true
 
   '@nodelib/fs.scandir@2.1.5':
@@ -7192,7 +7200,7 @@ snapshots:
       '@react-aria/interactions': 3.27.1(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
       '@react-aria/utils': 3.33.1(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
       '@react-types/shared': 3.33.1(react@19.2.4)
-      '@swc/helpers': 0.5.15
+      '@swc/helpers': 0.5.21
       clsx: 2.1.1
       react: 19.2.4
       react-dom: 19.2.4(react@19.2.4)
@@ -7203,13 +7211,13 @@ snapshots:
       '@react-aria/utils': 3.33.1(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
       '@react-stately/flags': 3.1.2
       '@react-types/shared': 3.33.1(react@19.2.4)
-      '@swc/helpers': 0.5.15
+      '@swc/helpers': 0.5.21
       react: 19.2.4
       react-dom: 19.2.4(react@19.2.4)
 
   '@react-aria/ssr@3.9.10(react@19.2.4)':
     dependencies:
-      '@swc/helpers': 0.5.15
+      '@swc/helpers': 0.5.21
       react: 19.2.4
 
   '@react-aria/utils@3.33.1(react-dom@19.2.4(react@19.2.4))(react@19.2.4)':
@@ -7218,18 +7226,18 @@ snapshots:
       '@react-stately/flags': 3.1.2
       '@react-stately/utils': 3.11.0(react@19.2.4)
       '@react-types/shared': 3.33.1(react@19.2.4)
-      '@swc/helpers': 0.5.15
+      '@swc/helpers': 0.5.21
       clsx: 2.1.1
       react: 19.2.4
       react-dom: 19.2.4(react@19.2.4)
 
   '@react-stately/flags@3.1.2':
     dependencies:
-      '@swc/helpers': 0.5.15
+      '@swc/helpers': 0.5.21
 
   '@react-stately/utils@3.11.0(react@19.2.4)':
     dependencies:
-      '@swc/helpers': 0.5.15
+      '@swc/helpers': 0.5.21
       react: 19.2.4
 
   '@react-types/shared@3.33.1(react@19.2.4)':
@@ -7437,6 +7445,10 @@ snapshots:
     dependencies:
       tslib: 2.8.1
 
+  '@swc/helpers@0.5.21':
+    dependencies:
+      tslib: 2.8.1
+
   '@t3-oss/env-core@0.12.0(typescript@5.9.3)(zod@3.25.76)':
     optionalDependencies:
       typescript: 5.9.3
@@ -8249,7 +8261,7 @@ snapshots:
 
   base64-js@1.5.1: {}
 
-  baseline-browser-mapping@2.10.8: {}
+  baseline-browser-mapping@2.10.29: {}
 
   best-effort-json-parser@1.2.1: {}
 
@@ -8313,7 +8325,7 @@ snapshots:
 
   camelize@1.0.1: {}
 
-  caniuse-lite@1.0.30001780: {}
+  caniuse-lite@1.0.30001792: {}
 
   canvas-confetti@1.9.4: {}
 
@@ -9643,7 +9655,7 @@ snapshots:
 
   is-bun-module@2.0.0:
     dependencies:
-      semver: 7.7.4
+      semver: 7.8.0
 
   is-callable@1.2.7: {}
 
@@ -10531,25 +10543,25 @@ snapshots:
       react: 19.2.4
       react-dom: 19.2.4(react@19.2.4)
 
-  next@16.1.7(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4):
+  next@16.2.6(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4):
     dependencies:
-      '@next/env': 16.1.7
+      '@next/env': 16.2.6
       '@swc/helpers': 0.5.15
-      baseline-browser-mapping: 2.10.8
-      caniuse-lite: 1.0.30001780
+      baseline-browser-mapping: 2.10.29
+      caniuse-lite: 1.0.30001792
       postcss: 8.4.31
       react: 19.2.4
       react-dom: 19.2.4(react@19.2.4)
       styled-jsx: 5.1.6(react@19.2.4)
     optionalDependencies:
-      '@next/swc-darwin-arm64': 16.1.7
-      '@next/swc-darwin-x64': 16.1.7
-      '@next/swc-linux-arm64-gnu': 16.1.7
-      '@next/swc-linux-arm64-musl': 16.1.7
-      '@next/swc-linux-x64-gnu': 16.1.7
-      '@next/swc-linux-x64-musl': 16.1.7
-      '@next/swc-win32-arm64-msvc': 16.1.7
-      '@next/swc-win32-x64-msvc': 16.1.7
+      '@next/swc-darwin-arm64': 16.2.6
+      '@next/swc-darwin-x64': 16.2.6
+      '@next/swc-linux-arm64-gnu': 16.2.6
+      '@next/swc-linux-arm64-musl': 16.2.6
+      '@next/swc-linux-x64-gnu': 16.2.6
+      '@next/swc-linux-x64-musl': 16.2.6
+      '@next/swc-win32-arm64-msvc': 16.2.6
+      '@next/swc-win32-x64-msvc': 16.2.6
       '@opentelemetry/api': 1.9.0
       '@playwright/test': 1.59.1
       sharp: 0.34.5
@@ -10557,13 +10569,13 @@ snapshots:
       - '@babel/core'
       - babel-plugin-macros
 
-  nextra-theme-docs@4.6.1(@types/react@19.2.13)(next@16.1.7(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(nextra@4.6.1(next@16.1.7(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(typescript@5.9.3))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(use-sync-external-store@1.6.0(react@19.2.4)):
+  nextra-theme-docs@4.6.1(@types/react@19.2.13)(next@16.2.6(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(nextra@4.6.1(next@16.2.6(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(typescript@5.9.3))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(use-sync-external-store@1.6.0(react@19.2.4)):
     dependencies:
       '@headlessui/react': 2.2.9(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
       clsx: 2.1.1
-      next: 16.1.7(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
+      next: 16.2.6(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
       next-themes: 0.4.6(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
-      nextra: 4.6.1(next@16.1.7(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(typescript@5.9.3)
+      nextra: 4.6.1(next@16.2.6(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(typescript@5.9.3)
       react: 19.2.4
       react-compiler-runtime: 19.1.0-rc.3(react@19.2.4)
       react-dom: 19.2.4(react@19.2.4)
@@ -10575,7 +10587,7 @@ snapshots:
       - immer
       - use-sync-external-store
 
-  nextra@4.6.1(next@16.1.7(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(typescript@5.9.3):
+  nextra@4.6.1(next@16.2.6(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4))(react-dom@19.2.4(react@19.2.4))(react@19.2.4)(typescript@5.9.3):
     dependencies:
       '@formatjs/intl-localematcher': 0.6.2
       '@headlessui/react': 2.2.9(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
@@ -10596,7 +10608,7 @@ snapshots:
       mdast-util-gfm: 3.1.0
       mdast-util-to-hast: 13.2.1
       negotiator: 1.0.0
-      next: 16.1.7(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
+      next: 16.2.6(@opentelemetry/api@1.9.0)(@playwright/test@1.59.1)(react-dom@19.2.4(react@19.2.4))(react@19.2.4)
       react: 19.2.4
       react-compiler-runtime: 19.1.0-rc.3(react@19.2.4)
       react-dom: 19.2.4(react@19.2.4)
@@ -10925,7 +10937,7 @@ snapshots:
 
   postcss@8.4.31:
     dependencies:
-      nanoid: 3.3.11
+      nanoid: 3.3.12
       picocolors: 1.1.1
       source-map-js: 1.2.1
 
@@ -11365,6 +11377,8 @@ snapshots:
 
   semver@7.7.4: {}
 
+  semver@7.8.0: {}
+
   server-only@0.0.1: {}
 
   set-function-length@1.2.2:
@@ -11393,7 +11407,7 @@ snapshots:
     dependencies:
       '@img/colour': 1.1.0
       detect-libc: 2.1.2
-      semver: 7.7.4
+      semver: 7.8.0
     optionalDependencies:
       '@img/sharp-darwin-arm64': 0.34.5
       '@img/sharp-darwin-x64': 0.34.5

From 20d2d2b3731edf9d5d72a191471c1fd856453350 Mon Sep 17 00:00:00 2001
From: Nan Gao <thunderggnn@gmail.com>
Date: Tue, 12 May 2026 04:55:13 +0200
Subject: [PATCH 8/8] fix(middleware): Handle invalid tool calls in dangling
 pairing middleware (#2890) (#2891)

---
 .../dangling_tool_call_middleware.py          | 83 +++++++++++++------
 .../test_dangling_tool_call_middleware.py     | 50 +++++++++++
 2 files changed, 107 insertions(+), 26 deletions(-)

diff --git a/backend/packages/harness/deerflow/agents/middlewares/dangling_tool_call_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/dangling_tool_call_middleware.py
index 7bf600b9f..5bb54f3e5 100644
--- a/backend/packages/harness/deerflow/agents/middlewares/dangling_tool_call_middleware.py
+++ b/backend/packages/harness/deerflow/agents/middlewares/dangling_tool_call_middleware.py
@@ -36,42 +36,73 @@ class DanglingToolCallMiddleware(AgentMiddleware[AgentState]):
 
     @staticmethod
     def _message_tool_calls(msg) -> list[dict]:
-        """Return normalized tool calls from structured fields or raw provider payloads."""
+        """Return normalized tool calls from structured fields or raw provider payloads.
+
+        LangChain stores malformed provider function calls in ``invalid_tool_calls``.
+        They do not execute, but provider adapters may still serialize enough of
+        the call id/name back into the next request that strict OpenAI-compatible
+        validators expect a matching ToolMessage. Treat them as dangling calls so
+        the next model request stays well-formed and the model sees a recoverable
+        tool error instead of another provider 400.
+        """
+        normalized: list[dict] = []
+
         tool_calls = getattr(msg, "tool_calls", None) or []
-        if tool_calls:
-            return list(tool_calls)
+        normalized.extend(list(tool_calls))
 
         raw_tool_calls = (getattr(msg, "additional_kwargs", None) or {}).get("tool_calls") or []
-        normalized: list[dict] = []
-        for raw_tc in raw_tool_calls:
-            if not isinstance(raw_tc, dict):
+        if not tool_calls:
+            for raw_tc in raw_tool_calls:
+                if not isinstance(raw_tc, dict):
+                    continue
+
+                function = raw_tc.get("function")
+                name = raw_tc.get("name")
+                if not name and isinstance(function, dict):
+                    name = function.get("name")
+
+                args = raw_tc.get("args", {})
+                if not args and isinstance(function, dict):
+                    raw_args = function.get("arguments")
+                    if isinstance(raw_args, str):
+                        try:
+                            parsed_args = json.loads(raw_args)
+                        except (TypeError, ValueError, json.JSONDecodeError):
+                            parsed_args = {}
+                        args = parsed_args if isinstance(parsed_args, dict) else {}
+
+                normalized.append(
+                    {
+                        "id": raw_tc.get("id"),
+                        "name": name or "unknown",
+                        "args": args if isinstance(args, dict) else {},
+                    }
+                )
+
+        for invalid_tc in getattr(msg, "invalid_tool_calls", None) or []:
+            if not isinstance(invalid_tc, dict):
                 continue
-
-            function = raw_tc.get("function")
-            name = raw_tc.get("name")
-            if not name and isinstance(function, dict):
-                name = function.get("name")
-
-            args = raw_tc.get("args", {})
-            if not args and isinstance(function, dict):
-                raw_args = function.get("arguments")
-                if isinstance(raw_args, str):
-                    try:
-                        parsed_args = json.loads(raw_args)
-                    except (TypeError, ValueError, json.JSONDecodeError):
-                        parsed_args = {}
-                    args = parsed_args if isinstance(parsed_args, dict) else {}
-
             normalized.append(
                 {
-                    "id": raw_tc.get("id"),
-                    "name": name or "unknown",
-                    "args": args if isinstance(args, dict) else {},
+                    "id": invalid_tc.get("id"),
+                    "name": invalid_tc.get("name") or "unknown",
+                    "args": {},
+                    "invalid": True,
+                    "error": invalid_tc.get("error"),
                 }
             )
 
         return normalized
 
+    @staticmethod
+    def _synthetic_tool_message_content(tool_call: dict) -> str:
+        if tool_call.get("invalid"):
+            error = tool_call.get("error")
+            if isinstance(error, str) and error:
+                return f"[Tool call could not be executed because its arguments were invalid: {error}]"
+            return "[Tool call could not be executed because its arguments were invalid.]"
+        return "[Tool call was interrupted and did not return a result.]"
+
     def _build_patched_messages(self, messages: list) -> list | None:
         """Return a new message list with patches inserted at the correct positions.
 
@@ -114,7 +145,7 @@ class DanglingToolCallMiddleware(AgentMiddleware[AgentState]):
                 if tc_id and tc_id not in existing_tool_msg_ids and tc_id not in patched_ids:
                     patched.append(
                         ToolMessage(
-                            content="[Tool call was interrupted and did not return a result.]",
+                            content=self._synthetic_tool_message_content(tc),
                             tool_call_id=tc_id,
                             name=tc.get("name", "unknown"),
                             status="error",
diff --git a/backend/tests/test_dangling_tool_call_middleware.py b/backend/tests/test_dangling_tool_call_middleware.py
index 90c162eac..b1d5c476a 100644
--- a/backend/tests/test_dangling_tool_call_middleware.py
+++ b/backend/tests/test_dangling_tool_call_middleware.py
@@ -14,6 +14,10 @@ def _ai_with_tool_calls(tool_calls):
     return AIMessage(content="", tool_calls=tool_calls)
 
 
+def _ai_with_invalid_tool_calls(invalid_tool_calls):
+    return AIMessage(content="", tool_calls=[], invalid_tool_calls=invalid_tool_calls)
+
+
 def _tool_msg(tool_call_id, name="test_tool"):
     return ToolMessage(content="result", tool_call_id=tool_call_id, name=name)
 
@@ -22,6 +26,16 @@ def _tc(name="bash", tc_id="call_1"):
     return {"name": name, "id": tc_id, "args": {}}
 
 
+def _invalid_tc(name="write_file", tc_id="write_file:36", error="Failed to parse tool arguments: malformed JSON"):
+    return {
+        "type": "invalid_tool_call",
+        "name": name,
+        "id": tc_id,
+        "args": '{"description":"write report","path":"/mnt/user-data/outputs/report.md","content":"bad {"json"}"}',
+        "error": error,
+    }
+
+
 class TestBuildPatchedMessagesNoPatch:
     def test_empty_messages(self):
         mw = DanglingToolCallMiddleware()
@@ -144,6 +158,42 @@ class TestBuildPatchedMessagesPatching:
         assert patched[1].name == "bash"
         assert patched[1].status == "error"
 
+    def test_invalid_tool_call_is_patched(self):
+        mw = DanglingToolCallMiddleware()
+        msgs = [_ai_with_invalid_tool_calls([_invalid_tc()])]
+        patched = mw._build_patched_messages(msgs)
+        assert patched is not None
+        assert len(patched) == 2
+        assert isinstance(patched[1], ToolMessage)
+        assert patched[1].tool_call_id == "write_file:36"
+        assert patched[1].name == "write_file"
+        assert patched[1].status == "error"
+        assert "arguments were invalid" in patched[1].content
+        assert "Failed to parse tool arguments" in patched[1].content
+
+    def test_valid_and_invalid_tool_calls_are_both_patched(self):
+        mw = DanglingToolCallMiddleware()
+        msgs = [
+            AIMessage(
+                content="",
+                tool_calls=[_tc("bash", "call_1")],
+                invalid_tool_calls=[_invalid_tc()],
+            )
+        ]
+        patched = mw._build_patched_messages(msgs)
+        assert patched is not None
+        tool_msgs = [m for m in patched if isinstance(m, ToolMessage)]
+        assert len(tool_msgs) == 2
+        assert {tm.tool_call_id for tm in tool_msgs} == {"call_1", "write_file:36"}
+
+    def test_invalid_tool_call_already_responded_is_not_patched(self):
+        mw = DanglingToolCallMiddleware()
+        msgs = [
+            _ai_with_invalid_tool_calls([_invalid_tc()]),
+            _tool_msg("write_file:36", "write_file"),
+        ]
+        assert mw._build_patched_messages(msgs) is None
+
 
 class TestWrapModelCall:
     def test_no_patch_passthrough(self):