fix: thread app config through client and sync providers

2026-05-02 12:07:26 +08:00
229 changed files with 2077 additions and 15136 deletions
@@ -1,6 +1,3 @@
-# Serper API Key (Google Search) - https://serper.dev
-SERPER_API_KEY=your-serper-api-key
-
 # TAVILY API Key
 TAVILY_API_KEY=your-tavily-api-key

@@ -9,9 +6,8 @@ JINA_API_KEY=your-jina-api-key

 # InfoQuest API Key
 INFOQUEST_API_KEY=your-infoquest-api-key
-# Browser CORS allowlist for split-origin or port-forwarded deployments (comma-separated exact origins).
-# Leave unset when using the unified nginx endpoint, e.g. http://localhost:2026.
-# GATEWAY_CORS_ORIGINS=http://localhost:3000,http://127.0.0.1:3000
+# CORS Origins (comma-separated) - e.g., http://localhost:3000,http://localhost:3001
+# CORS_ORIGINS=http://localhost:3000

 # Optional:
 # FIRECRAWL_API_KEY=your-firecrawl-api-key
@@ -49,14 +45,3 @@ INFOQUEST_API_KEY=your-infoquest-api-key

 # Set to "false" to disable Swagger UI, ReDoc, and OpenAPI schema in production
 # GATEWAY_ENABLE_DOCS=false
-
-# ── Frontend SSR → Gateway wiring ─────────────────────────────────────────────
-# The Next.js server uses these to reach the Gateway during SSR (auth checks,
-# /api/* rewrites). They default to localhost values that match `make dev` and
-# `make start`, so most local users do not need to set them.
-#
-# Override only when the Gateway is not on localhost:8001 (e.g. when the
-# frontend and gateway run on different hosts, in containers with a service
-# alias, or behind a different port). docker-compose already sets these.
-# DEER_FLOW_INTERNAL_GATEWAY_BASE_URL=http://localhost:8001
-# DEER_FLOW_TRUSTED_ORIGINS=http://localhost:3000,http://localhost:2026
@@ -1,101 +0,0 @@
-name: Publish Containers
-
-on:
-  push:
-    tags:
-      - "v*"
-
-jobs:
-
-  backend-container:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-      attestations: write
-      id-token: write
-    env:
-      REGISTRY: ghcr.io
-      IMAGE_NAME: ${{ github.repository }}-backend
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-      - name: Log in to the Container registry
-        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 #v3.4.0
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-      - name: Extract metadata (tags, labels) for Docker
-        id: meta
-        uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 #v5.7.0
-        with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
-          tags: |
-            type=ref,event=tag
-            type=ref,event=branch
-            type=sha
-            type=raw,value=latest,enable={{is_default_branch}}
-      - name: Build and push Docker image
-        id: push
-        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 #v6.18.0
-        with:
-          context: .
-          file: backend/Dockerfile
-          push: true
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
-
-      - name: Generate artifact attestation
-        uses: actions/attest-build-provenance@v2
-        with:
-          subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
-          subject-digest: ${{ steps.push.outputs.digest }}
-          push-to-registry: true
-
-  frontend-container:
-    runs-on: ubuntu-latest
-    permissions:
-      contents: read
-      packages: write
-      attestations: write
-      id-token: write
-    env:
-      REGISTRY: ghcr.io
-      IMAGE_NAME: ${{ github.repository }}-frontend
-
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v6
-      - name: Log in to the Container registry
-        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 #v3.4.0
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-      - name: Extract metadata (tags, labels) for Docker
-        id: meta
-        uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 #v5.7.0
-        with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
-          tags: |
-            type=ref,event=tag
-            type=ref,event=branch
-            type=sha
-            type=raw,value=latest,enable={{is_default_branch}}
-      - name: Build and push Docker image
-        id: push
-        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 #v6.18.0
-        with:
-          context: .
-          file: frontend/Dockerfile
-          push: true
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
-
-      - name: Generate artifact attestation
-        uses: actions/attest-build-provenance@v2
-        with:
-          subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
-          subject-digest: ${{ steps.push.outputs.digest }}
-          push-to-registry: true
@@ -46,12 +46,12 @@ Docker provides a consistent, isolated environment with all dependencies pre-con
   All services will start with hot-reload enabled:
   - Frontend changes are automatically reloaded
   - Backend changes trigger automatic restart
-   - Gateway-hosted LangGraph-compatible runtime supports hot-reload
+   - LangGraph server supports hot-reload

 4. **Access the application**:
   - Web Interface: http://localhost:2026
   - API Gateway: http://localhost:2026/api/*
-   - LangGraph-compatible API: http://localhost:2026/api/langgraph/*
+   - LangGraph: http://localhost:2026/api/langgraph/*

 #### Docker Commands

@@ -94,7 +94,7 @@ Use these as practical starting points for development and review environments:
 If `make docker-init`, `make docker-start`, or `make docker-stop` fails on Linux with an error like below, your current user likely does not have permission to access the Docker daemon socket:

 ```text
-unable to get image 'deer-flow-gateway': permission denied while trying to connect to the Docker daemon socket at unix:///var/run/docker.sock
+unable to get image 'deer-flow-dev-langgraph': permission denied while trying to connect to the Docker daemon socket at unix:///var/run/docker.sock
 ```

 Recommended fix: add your current user to the `docker` group so Docker commands work without `sudo`.
@@ -131,8 +131,9 @@ Host Machine
 Docker Compose (deer-flow-dev)
  ├→ nginx (port 2026) ← Reverse proxy
  ├→ web (port 3000) ← Frontend with hot-reload
-  ├→ gateway (port 8001) ← Gateway API + LangGraph-compatible runtime with hot-reload
-  └→ provisioner (optional, port 8002) ← Started only in provisioner/K8s sandbox mode
+  ├→ api (port 8001) ← Gateway API with hot-reload
+   ├→ langgraph (port 2024) ← LangGraph server with hot-reload
+   └→ provisioner (optional, port 8002) ← Started only in provisioner/K8s sandbox mode
 ```

 **Benefits of Docker Development**:
@@ -183,13 +184,17 @@ Required tools:

 If you need to start services individually:

-1. **Start backend service**:
+1. **Start backend services**:
   ```bash
-   # Terminal 1: Start Gateway API + embedded agent runtime (port 8001)
+   # Terminal 1: Start LangGraph Server (port 2024)
   cd backend
   make dev

-   # Terminal 2: Start Frontend (port 3000)
+   # Terminal 2: Start Gateway API (port 8001)
+   cd backend
+   make gateway
+
+   # Terminal 3: Start Frontend (port 3000)
   cd frontend
   pnpm dev
   ```
@@ -207,10 +212,10 @@ If you need to start services individually:

 The nginx configuration provides:
 - Unified entry point on port 2026
- Rewrites `/api/langgraph/*` to Gateway's LangGraph-compatible API (8001)
+- Routes `/api/langgraph/*` to LangGraph Server (2024)
 - Routes other `/api/*` endpoints to Gateway API (8001)
 - Routes non-API requests to Frontend (3000)
- Same-origin API routing; split-origin or port-forwarded browser clients should use the Gateway `GATEWAY_CORS_ORIGINS` allowlist
+- Centralized CORS handling
 - SSE/streaming support for real-time agent responses
 - Optimized timeouts for long-running operations

@@ -230,8 +235,8 @@ deer-flow/
 │       └── nginx.local.conf # Nginx config for local dev
 ├── backend/                 # Backend application
 │   ├── src/
-│   │   ├── gateway/        # Gateway API and LangGraph-compatible runtime (port 8001)
-│   │   ├── agents/         # LangGraph agent runtime used by Gateway
+│   │   ├── gateway/        # Gateway API (port 8001)
+│   │   ├── agents/         # LangGraph agents (port 2024)
 │   │   ├── mcp/            # Model Context Protocol integration
 │   │   ├── skills/         # Skills system
 │   │   └── sandbox/        # Sandbox execution
@@ -251,7 +256,8 @@ Browser
  ↓
 Nginx (port 2026) ← Unified entry point
  ├→ Frontend (port 3000) ← / (non-API requests)
-  └→ Gateway API (port 8001) ← /api/* and /api/langgraph/* (LangGraph-compatible agent interactions)
+  ├→ Gateway API (port 8001) ← /api/models, /api/mcp, /api/skills, /api/threads/*/artifacts
+  └→ LangGraph Server (port 2024) ← /api/langgraph/* (agent interactions)
 ```

 ## Development Workflow
@@ -245,8 +245,6 @@ make down   # Stop and remove containers

 Access: http://localhost:2026

-The unified nginx endpoint is same-origin by default and does not emit browser CORS headers. If you run a split-origin or port-forwarded browser client, set `GATEWAY_CORS_ORIGINS` to comma-separated exact origins such as `http://localhost:3000`; the Gateway then applies the CORS allowlist and matching CSRF origin checks.
-
 See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed Docker development guide.

 #### Option 2: Local Development
@@ -228,7 +228,7 @@ make down   # Stop and remove containers
 ```

 > [!NOTE]
-> Le runtime d'agent s'exécute actuellement dans la Gateway. nginx réécrit `/api/langgraph/*` vers l'API compatible LangGraph servie par la Gateway.
+> Le serveur d'agents LangGraph fonctionne actuellement via `langgraph dev` (le serveur CLI open source).

 Accès : http://localhost:2026

@@ -296,8 +296,8 @@ DeerFlow peut recevoir des tâches depuis des applications de messagerie. Les ca

 ```yaml
 channels:
-  # LangGraph-compatible Gateway API base URL (default: http://localhost:8001/api)
-  langgraph_url: http://localhost:8001/api
+  # LangGraph Server URL (default: http://localhost:2024)
+  langgraph_url: http://localhost:2024
  # Gateway API URL (default: http://localhost:8001)
  gateway_url: http://localhost:8001

@@ -181,7 +181,7 @@ make down   # コンテナを停止して削除
 ```

 > [!NOTE]
-> Agentランタイムは現在Gateway内で実行されます。`/api/langgraph/*`はnginxによってGatewayのLangGraph-compatible APIへ書き換えられます。
+> LangGraphエージェントサーバーは現在`langgraph dev`（オープンソースCLIサーバー）経由で実行されます。

 アクセス: http://localhost:2026

@@ -249,8 +249,8 @@ DeerFlowはメッセージングアプリからのタスク受信をサポート

 ```yaml
 channels:
-  # LangGraph-compatible Gateway API base URL（デフォルト: http://localhost:8001/api）
-  langgraph_url: http://localhost:8001/api
+  # LangGraphサーバーURL（デフォルト: http://localhost:2024）
+  langgraph_url: http://localhost:2024
  # Gateway API URL（デフォルト: http://localhost:8001）
  gateway_url: http://localhost:8001

@@ -184,7 +184,7 @@ make down   # 停止并移除容器
 ```

 > [!NOTE]
-> 当前 Agent 运行时嵌入在 Gateway 中运行，`/api/langgraph/*` 会由 nginx 重写到 Gateway 的 LangGraph-compatible API。
+> 当前 LangGraph agent server 通过开源 CLI 服务 `langgraph dev` 运行。

 访问地址：http://localhost:2026

@@ -254,8 +254,8 @@ DeerFlow 支持从即时通讯应用接收任务。只要配置完成，对应

 ```yaml
 channels:
-  # LangGraph-compatible Gateway API base URL（默认：http://localhost:8001/api）
-  langgraph_url: http://localhost:8001/api
+  # LangGraph Server URL（默认：http://localhost:2024）
+  langgraph_url: http://localhost:2024
  # Gateway API URL（默认：http://localhost:8001）
  gateway_url: http://localhost:8001

@@ -207,8 +207,6 @@ Configuration priority:

 FastAPI application on port 8001 with health check at `GET /health`. Set `GATEWAY_ENABLE_DOCS=false` to disable `/docs`, `/redoc`, and `/openapi.json` in production (default: enabled).

-CORS is same-origin by default when requests enter through nginx on port 2026. Split-origin or port-forwarded browser clients must opt in with `GATEWAY_CORS_ORIGINS` (comma-separated exact origins); Gateway `CORSMiddleware` and `CSRFMiddleware` both read that variable so browser CORS and auth-origin checks stay aligned.
-
 **Routers**:

 | Router | Endpoints |
@@ -225,7 +223,7 @@ CORS is same-origin by default when requests enter through nginx on port 2026. S
 | **Feedback** (`/api/threads/{id}/runs/{rid}/feedback`) | `PUT /` - upsert feedback; `DELETE /` - delete user feedback; `POST /` - create feedback; `GET /` - list feedback; `GET /stats` - aggregate stats; `DELETE /{fid}` - delete specific |
 | **Runs** (`/api/runs`) | `POST /stream` - stateless run + SSE; `POST /wait` - stateless run + block; `GET /{rid}/messages` - paginated messages by run_id `{data, has_more}` (cursor: `after_seq`/`before_seq`); `GET /{rid}/feedback` - list feedback by run_id |

-Proxied through nginx: `/api/langgraph/*` → Gateway LangGraph-compatible runtime, all other `/api/*` → Gateway REST APIs.
+Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` → Gateway.

 ### Sandbox System (`packages/harness/deerflow/sandbox/`)

@@ -245,7 +243,7 @@ Proxied through nginx: `/api/langgraph/*` → Gateway LangGraph-compatible runti
 - `bash` - Execute commands with path translation and error handling
 - `ls` - Directory listing (tree format, max 2 levels)
 - `read_file` - Read file contents with optional line range
- `write_file` - Write/append to files, creates directories; overwrites by default and exposes the `append` argument in the model-facing schema for end-of-file writes
+- `write_file` - Write/append to files, creates directories
 - `str_replace` - Substring replacement (single or all occurrences); same-path serialization is scoped to `(sandbox.id, path)` so isolated sandboxes do not contend on identical virtual paths inside one process

 ### Subagent System (`packages/harness/deerflow/subagents/`)
@@ -265,10 +263,8 @@ Proxied through nginx: `/api/langgraph/*` → Gateway LangGraph-compatible runti
   - `present_files` - Make output files visible to user (only `/mnt/user-data/outputs`)
   - `ask_clarification` - Request clarification (intercepted by ClarificationMiddleware → interrupts)
   - `view_image` - Read image as base64 (added only if model supports vision)
-   - `setup_agent` - Bootstrap-only: persist a brand-new custom agent's `SOUL.md` and `config.yaml`. Bound only when `is_bootstrap=True`.
-   - `update_agent` - Custom-agent-only: persist self-updates to the current agent's `SOUL.md` / `config.yaml` from inside a normal chat (partial update + atomic write). Bound when `agent_name` is set and `is_bootstrap=False`.
 4. **Subagent tool** (if enabled):
-   - `task` - Delegate to subagent (description, prompt, subagent_type)
+   - `task` - Delegate to subagent (description, prompt, subagent_type, max_turns)

 **Community tools** (`packages/harness/deerflow/community/`):
 - `tavily/` - Web search (5 results default) and web fetch (4KB limit)
@@ -358,11 +354,10 @@ Bridges external messaging platforms (Feishu, Slack, Telegram, DingTalk) to the
 **Per-User Isolation**:
 - Memory is stored per-user at `{base_dir}/users/{user_id}/memory.json`
 - Per-agent per-user memory at `{base_dir}/users/{user_id}/agents/{agent_name}/memory.json`
- Custom agent definitions (`SOUL.md` + `config.yaml`) are also per-user at `{base_dir}/users/{user_id}/agents/{agent_name}/`. The legacy shared layout `{base_dir}/agents/{agent_name}/` remains read-only fallback for unmigrated installations
 - `user_id` is resolved via `get_effective_user_id()` from `deerflow.runtime.user_context`
 - In no-auth mode, `user_id` defaults to `"default"` (constant `DEFAULT_USER_ID`)
 - Absolute `storage_path` in config opts out of per-user isolation
- **Migration**: Run `PYTHONPATH=. python scripts/migrate_user_isolation.py` to move legacy `memory.json`, `threads/`, and `agents/` into per-user layout. Supports `--dry-run` (preview changes) and `--user-id USER_ID` (assign unowned legacy data to a user, defaults to `default`).
+- **Migration**: Run `PYTHONPATH=. python scripts/migrate_user_isolation.py` to move legacy `memory.json` and `threads/` into per-user layout; supports `--dry-run`

 **Data Structure** (stored in `{base_dir}/users/{user_id}/memory.json`):
 - **User Context**: `workContext`, `personalContext`, `topOfMind` (1-3 sentence summaries)
@@ -522,7 +517,6 @@ Multi-file upload with automatic document conversion:
 - Rejects directory inputs before copying so uploads stay all-or-nothing
 - Reuses one conversion worker per request when called from an active event loop
 - Files stored in thread-isolated directories
- Duplicate filenames in a single upload request are auto-renamed with `_N` suffixes so later files do not truncate earlier files
 - Agent receives uploaded file list via `UploadsMiddleware`

 See [docs/FILE_UPLOAD.md](docs/FILE_UPLOAD.md) for details.
@@ -56,8 +56,11 @@ export OPENAI_API_KEY="your-api-key"
 ### Run the Development Server

 ```bash
-# Gateway API + embedded agent runtime
+# Terminal 1: LangGraph server
 make dev
+
+# Terminal 2: Gateway API
+make gateway
 ```

 ## Project Structure
@@ -50,12 +50,6 @@ COPY backend ./backend
 RUN --mount=type=cache,target=/root/.cache/uv \
    sh -c "cd backend && UV_INDEX_URL=${UV_INDEX_URL:-https://pypi.org/simple} uv sync ${UV_EXTRAS:+--extra $UV_EXTRAS}"

-# UTF-8 locale prevents UnicodeEncodeError on Chinese/emoji content in minimal
-# containers where locale configuration may be missing and the default encoding is not UTF-8.
-ENV LANG=C.UTF-8
-ENV LC_ALL=C.UTF-8
-ENV PYTHONIOENCODING=utf-8
-
 # ── Stage 2: Dev ──────────────────────────────────────────────────────────────
 # Retains compiler toolchain from builder so startup-time `uv sync` can build
 # source distributions in development containers.
@@ -72,10 +66,6 @@ CMD ["sh", "-c", "cd backend && PYTHONPATH=. uv run uvicorn app.gateway.app:app
 # Clean image without build-essential — reduces size (~200 MB) and attack surface.
 FROM python:3.12-slim-bookworm

-ENV LANG=C.UTF-8
-ENV LC_ALL=C.UTF-8
-ENV PYTHONIOENCODING=utf-8
-
 # Copy Node.js runtime from builder (provides npx for MCP servers)
 COPY --from=builder /usr/bin/node /usr/bin/node
 COPY --from=builder /usr/lib/node_modules /usr/lib/node_modules
@@ -11,26 +11,31 @@ DeerFlow is a LangGraph-based AI super agent with sandbox execution, persistent
                        │          Nginx (Port 2026)           │
                        │      Unified reverse proxy           │
                        └───────┬──────────────────┬───────────┘
-                                │
-            /api/langgraph/*    │    /api/* (other)
-            rewritten to /api/* │
-                                ▼
-               ┌────────────────────────────────────────┐
-               │        Gateway API (8001)              │
-               │        FastAPI REST + agent runtime    │
-               │                                        │
-               │ Models, MCP, Skills, Memory, Uploads,  │
-               │ Artifacts, Threads, Runs, Streaming    │
-               │                                        │
-               │ ┌────────────────────────────────────┐ │
-               │ │ Lead Agent                         │ │
-               │ │ Middleware Chain, Tools, Subagents │ │
-               │ └────────────────────────────────────┘ │
-               └────────────────────────────────────────┘
+                                │                  │
+              /api/langgraph/*  │                  │  /api/* (other)
+                                ▼                  ▼
+               ┌────────────────────┐  ┌────────────────────────┐
+               │ LangGraph Server   │  │   Gateway API (8001)   │
+               │    (Port 2024)     │  │   FastAPI REST         │
+               │                    │  │                        │
+               │ ┌────────────────┐ │  │ Models, MCP, Skills,   │
+               │ │  Lead Agent    │ │  │ Memory, Uploads,       │
+               │ │  ┌──────────┐  │ │  │ Artifacts              │
+               │ │  │Middleware│  │ │  └────────────────────────┘
+               │ │  │  Chain   │  │ │
+               │ │  └──────────┘  │ │
+               │ │  ┌──────────┐  │ │
+               │ │  │  Tools   │  │ │
+               │ │  └──────────┘  │ │
+               │ │  ┌──────────┐  │ │
+               │ │  │Subagents │  │ │
+               │ │  └──────────┘  │ │
+               │ └────────────────┘ │
+               └────────────────────┘
 ```

 **Request Routing** (via Nginx):
- `/api/langgraph/*` → Gateway LangGraph-compatible API - agent interactions, threads, streaming
+- `/api/langgraph/*` → LangGraph Server - agent interactions, threads, streaming
 - `/api/*` (other) → Gateway API - models, MCP, skills, memory, artifacts, uploads, thread-local cleanup
 - `/` (non-API) → Frontend - Next.js web interface

@@ -74,7 +79,7 @@ Per-thread isolated execution with virtual path translation:
 - **Skills path**: `/mnt/skills` → `deer-flow/skills/` directory
 - **Skills loading**: Recursively discovers nested `SKILL.md` files under `skills/{public,custom}` and preserves nested container paths
 - **File-write safety**: `str_replace` serializes read-modify-write per `(sandbox.id, path)` so isolated sandboxes keep concurrency even when virtual paths match
- **Tools**: `bash`, `ls`, `read_file`, `write_file`, `str_replace` (`write_file` overwrites by default and exposes `append` for end-of-file writes; `bash` is disabled by default when using `LocalSandboxProvider`; use `AioSandboxProvider` for isolated shell access)
+- **Tools**: `bash`, `ls`, `read_file`, `write_file`, `str_replace` (`bash` is disabled by default when using `LocalSandboxProvider`; use `AioSandboxProvider` for isolated shell access)

 ### Subagent System

@@ -119,7 +124,7 @@ FastAPI application providing REST endpoints for frontend integration:
 | `POST /api/memory/reload` | Force memory reload |
 | `GET /api/memory/config` | Memory configuration |
 | `GET /api/memory/status` | Combined config + data |
-| `POST /api/threads/{id}/uploads` | Upload files (auto-converts PDF/PPT/Excel/Word to Markdown, rejects directory paths, auto-renames duplicate filenames in one request) |
+| `POST /api/threads/{id}/uploads` | Upload files (auto-converts PDF/PPT/Excel/Word to Markdown, rejects directory paths) |
 | `GET /api/threads/{id}/uploads/list` | List uploaded files |
 | `DELETE /api/threads/{id}` | Delete DeerFlow-managed local thread data after LangGraph thread deletion; unexpected failures are logged server-side and return a generic 500 detail |
 | `GET /api/threads/{id}/artifacts/{path}` | Serve generated artifacts |
@@ -188,7 +193,7 @@ export OPENAI_API_KEY="your-api-key-here"
 **Full Application** (from project root):

 ```bash
-make dev  # Starts Gateway + Frontend + Nginx
+make dev  # Starts LangGraph + Gateway + Frontend + Nginx
 ```

 Access at: http://localhost:2026
@@ -196,11 +201,14 @@ Access at: http://localhost:2026
 **Backend Only** (from backend directory):

 ```bash
-# Gateway API + embedded agent runtime
+# Terminal 1: LangGraph server
 make dev
+
+# Terminal 2: Gateway API
+make gateway
 ```

-Direct access: Gateway at http://localhost:8001
+Direct access: LangGraph at http://localhost:2024, Gateway at http://localhost:8001

 ---

@@ -236,16 +244,12 @@ backend/
 │   └── utils/                  # Utilities
 ├── docs/                       # Documentation
 ├── tests/                      # Test suite
-├── langgraph.json              # LangGraph graph registry for tooling/Studio compatibility
+├── langgraph.json              # LangGraph server configuration
 ├── pyproject.toml              # Python dependencies
 ├── Makefile                    # Development commands
 └── Dockerfile                  # Container build
 ```

-`langgraph.json` is not the default service entrypoint.  The scripts and Docker
-deployments run the Gateway embedded runtime; the file is kept for LangGraph
-tooling, Studio, or direct LangGraph Server compatibility.
-
 ---

 ## Configuration
@@ -358,8 +362,8 @@ If a provider is explicitly enabled but required credentials are missing, or the

 ```bash
 make install    # Install dependencies
-make dev        # Run Gateway API + embedded agent runtime (port 8001)
-make gateway    # Run Gateway API without reload (port 8001)
+make dev        # Run LangGraph server (port 2024)
+make gateway    # Run Gateway API (port 8001)
 make lint       # Run linter (ruff)
 make format     # Format code (ruff)
 ```
@@ -146,13 +146,6 @@ def _normalize_custom_agent_name(raw_value: str) -> str:
    return normalized


-def _strip_loop_warning_text(text: str) -> str:
-    """Remove middleware-authored loop warning lines from display text."""
-    if "[LOOP DETECTED]" not in text:
-        return text
-    return "\n".join(line for line in text.splitlines() if "[LOOP DETECTED]" not in line).strip()
-
-
 def _extract_response_text(result: dict | list) -> str:
    """Extract the last AI message text from a LangGraph runs.wait result.

@@ -162,7 +155,7 @@ def _extract_response_text(result: dict | list) -> str:
    Handles special cases:
    - Regular AI text responses
    - Clarification interrupts (``ask_clarification`` tool messages)
-    - Strips loop-detection warnings attached to tool-call AI messages
+    - AI messages with tool_calls but no text content
    """
    if isinstance(result, list):
        messages = result
@@ -192,12 +185,7 @@ def _extract_response_text(result: dict | list) -> str:
        # Regular AI message with text content
        if msg_type == "ai":
            content = msg.get("content", "")
-            has_tool_calls = bool(msg.get("tool_calls"))
            if isinstance(content, str) and content:
-                if has_tool_calls:
-                    content = _strip_loop_warning_text(content)
-                    if not content:
-                        continue
                return content
            # content can be a list of content blocks
            if isinstance(content, list):
@@ -208,8 +196,6 @@ def _extract_response_text(result: dict | list) -> str:
                    elif isinstance(block, str):
                        parts.append(block)
                text = "".join(parts)
-                if has_tool_calls:
-                    text = _strip_loop_warning_text(text)
                if text:
                    return text
    return ""
@@ -434,13 +420,7 @@ async def _ingest_inbound_files(thread_id: str, msg: InboundMessage) -> list[dic
    if not msg.files:
        return []

-    from deerflow.uploads.manager import (
-        UnsafeUploadPathError,
-        claim_unique_filename,
-        ensure_uploads_dir,
-        normalize_filename,
-        write_upload_file_no_symlink,
-    )
+    from deerflow.uploads.manager import claim_unique_filename, ensure_uploads_dir, normalize_filename

    uploads_dir = ensure_uploads_dir(thread_id)
    seen_names = {entry.name for entry in uploads_dir.iterdir() if entry.is_file()}
@@ -491,10 +471,7 @@ async def _ingest_inbound_files(thread_id: str, msg: InboundMessage) -> list[dic

            dest = uploads_dir / safe_name
            try:
-                dest = write_upload_file_no_symlink(uploads_dir, safe_name, data)
-            except UnsafeUploadPathError:
-                logger.warning("[Manager] skipping inbound file with unsafe destination: %s", safe_name)
-                continue
+                dest.write_bytes(data)
            except Exception:
                logger.exception("[Manager] failed to write inbound file: %s", dest)
                continue
@@ -603,17 +580,6 @@ class ChannelManager:
            user_layer.get("config"),
        )

-        configurable = run_config.get("configurable")
-        if isinstance(configurable, Mapping):
-            configurable = dict(configurable)
-        else:
-            configurable = {}
-        run_config["configurable"] = configurable
-        # Pin channel-triggered runs to the root graph namespace so follow-up
-        # turns continue from the same conversation checkpoint.
-        configurable["checkpoint_ns"] = ""
-        configurable["thread_id"] = thread_id
-
        run_context = _merge_dicts(
            DEFAULT_RUN_CONTEXT,
            self._default_session.get("context"),
@@ -997,11 +963,7 @@ class ChannelManager:

        try:
            async with httpx.AsyncClient() as http:
-                resp = await http.get(
-                    f"{self._gateway_url}{path}",
-                    timeout=10,
-                    headers=create_internal_auth_headers(),
-                )
+                resp = await http.get(f"{self._gateway_url}{path}", timeout=10)
                resp.raise_for_status()
                data = resp.json()
        except Exception:
@@ -1,5 +1,6 @@
 import asyncio
 import logging
+import os
 from collections.abc import AsyncGenerator
 from contextlib import asynccontextmanager

@@ -8,7 +9,7 @@ from fastapi.middleware.cors import CORSMiddleware

 from app.gateway.auth_middleware import AuthMiddleware
 from app.gateway.config import get_gateway_config
-from app.gateway.csrf_middleware import CSRFMiddleware, get_configured_cors_origins
+from app.gateway.csrf_middleware import CSRFMiddleware
 from app.gateway.deps import langgraph_runtime
 from app.gateway.routers import (
    agents,
@@ -62,7 +63,7 @@ async def _ensure_admin_user(app: FastAPI) -> None:

    Subsequent boots (admin already exists):
      - Runs the one-time "no-auth → with-auth" orphan thread migration for
-        existing LangGraph thread metadata that has no user_id.
+        existing LangGraph thread metadata that has no owner_id.

    No SQL persistence migration is needed: the four user_id columns
    (threads_meta, runs, run_events, feedback) only come into existence
@@ -177,7 +178,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
    async with langgraph_runtime(app):
        logger.info("LangGraph runtime initialised")

-        # Check admin bootstrap state and migrate orphan threads after admin exists.
+        # Ensure admin user exists (auto-create on first boot)
        # Must run AFTER langgraph_runtime so app.state.store is available for thread migration
        await _ensure_admin_user(app)

@@ -218,9 +219,7 @@ def create_app() -> FastAPI:
        Configured FastAPI application instance.
    """
    config = get_gateway_config()
-    docs_url = "/docs" if config.enable_docs else None
-    redoc_url = "/redoc" if config.enable_docs else None
-    openapi_url = "/openapi.json" if config.enable_docs else None
+    docs_kwargs = {"docs_url": "/docs", "redoc_url": "/redoc", "openapi_url": "/openapi.json"} if config.enable_docs else {"docs_url": None, "redoc_url": None, "openapi_url": None}

    app = FastAPI(
        title="DeerFlow API Gateway",
@@ -240,14 +239,12 @@ API Gateway for DeerFlow - A LangGraph-based AI agent backend with sandbox execu

 ### Architecture

-LangGraph-compatible requests are routed through nginx to this gateway.
-This gateway provides runtime endpoints for agent runs plus custom endpoints for models, MCP configuration, skills, and artifacts.
+LangGraph requests are handled by nginx reverse proxy.
+This gateway provides custom endpoints for models, MCP configuration, skills, and artifacts.
        """,
        version="0.1.0",
        lifespan=lifespan,
-        docs_url=docs_url,
-        redoc_url=redoc_url,
-        openapi_url=openapi_url,
+        **docs_kwargs,
        openapi_tags=[
            {
                "name": "models",
@@ -310,18 +307,25 @@ This gateway provides runtime endpoints for agent runs plus custom endpoints for
    # CSRF: Double Submit Cookie pattern for state-changing requests
    app.add_middleware(CSRFMiddleware)

-    # CORS: the unified nginx endpoint is same-origin by default. Split-origin
-    # browser clients must opt in with this explicit Gateway allowlist so CORS
-    # and CSRF origin checks share the same source of truth.
-    cors_origins = sorted(get_configured_cors_origins())
-    if cors_origins:
-        app.add_middleware(
-            CORSMiddleware,
-            allow_origins=cors_origins,
-            allow_credentials=True,
-            allow_methods=["*"],
-            allow_headers=["*"],
-        )
+    # CORS: when GATEWAY_CORS_ORIGINS is set (dev without nginx), add CORS middleware.
+    # In production, nginx handles CORS and no middleware is needed.
+    cors_origins_env = os.environ.get("GATEWAY_CORS_ORIGINS", "")
+    if cors_origins_env:
+        cors_origins = [o.strip() for o in cors_origins_env.split(",") if o.strip()]
+        # Validate: wildcard origin with credentials is a security misconfiguration
+        for origin in cors_origins:
+            if origin == "*":
+                logger.error("GATEWAY_CORS_ORIGINS contains wildcard '*' with allow_credentials=True. This is a security misconfiguration — browsers will reject the response. Use explicit scheme://host:port origins instead.")
+                cors_origins = [o for o in cors_origins if o != "*"]
+                break
+        if cors_origins:
+            app.add_middleware(
+                CORSMiddleware,
+                allow_origins=cors_origins,
+                allow_credentials=True,
+                allow_methods=["*"],
+                allow_headers=["*"],
+            )

    # Include routers
    # Models API is mounted at /api/models
@@ -370,7 +374,7 @@ This gateway provides runtime endpoints for agent runs plus custom endpoints for
    app.include_router(runs.router)

    @app.get("/health", tags=["health"])
-    async def health_check() -> dict[str, str]:
+    async def health_check() -> dict:
        """Health check endpoint.

        Returns:
@@ -28,7 +28,7 @@ class User(BaseModel):
    oauth_id: str | None = Field(None, description="User ID from OAuth provider")

    # Auth lifecycle
-    needs_setup: bool = Field(default=False, description="True when a reset account must complete setup")
+    needs_setup: bool = Field(default=False, description="True for auto-created admin until setup completes")
    token_version: int = Field(default=0, description="Incremented on password change to invalidate old JWTs")


@@ -8,6 +8,7 @@ class GatewayConfig(BaseModel):

    host: str = Field(default="0.0.0.0", description="Host to bind the gateway server")
    port: int = Field(default=8001, description="Port to bind the gateway server")
+    cors_origins: list[str] = Field(default_factory=lambda: ["http://localhost:3000"], description="Allowed CORS origins")
    enable_docs: bool = Field(default=True, description="Enable Swagger/ReDoc/OpenAPI endpoints")


@@ -18,9 +19,11 @@ def get_gateway_config() -> GatewayConfig:
    """Get gateway config, loading from environment if available."""
    global _gateway_config
    if _gateway_config is None:
+        cors_origins_str = os.getenv("CORS_ORIGINS", "http://localhost:3000")
        _gateway_config = GatewayConfig(
            host=os.getenv("GATEWAY_HOST", "0.0.0.0"),
            port=int(os.getenv("GATEWAY_PORT", "8001")),
+            cors_origins=cors_origins_str.split(","),
            enable_docs=os.getenv("GATEWAY_ENABLE_DOCS", "true").lower() == "true",
        )
    return _gateway_config
@@ -4,10 +4,8 @@ Per RFC-001:
 State-changing operations require CSRF protection.
 """

-import os
 import secrets
-from collections.abc import Awaitable, Callable
-from urllib.parse import urlsplit
+from collections.abc import Callable

 from fastapi import Request, Response
 from starlette.middleware.base import BaseHTTPMiddleware
@@ -21,7 +19,7 @@ CSRF_TOKEN_LENGTH = 64  # bytes

 def is_secure_request(request: Request) -> bool:
    """Detect whether the original client request was made over HTTPS."""
-    return _request_scheme(request) == "https"
+    return request.headers.get("x-forwarded-proto", request.url.scheme) == "https"


 def generate_csrf_token() -> str:
@@ -63,129 +61,15 @@ def is_auth_endpoint(request: Request) -> bool:
    return request.url.path.rstrip("/") in _AUTH_EXEMPT_PATHS


-def _host_with_optional_port(hostname: str, port: int | None, scheme: str) -> str:
-    """Return normalized host[:port], omitting default ports."""
-    host = hostname.lower()
-    if ":" in host and not host.startswith("["):
-        host = f"[{host}]"
-
-    if port is None or (scheme == "http" and port == 80) or (scheme == "https" and port == 443):
-        return host
-    return f"{host}:{port}"
-
-
-def _normalize_origin(origin: str) -> str | None:
-    """Return a normalized scheme://host[:port] origin, or None for invalid input."""
-    try:
-        parsed = urlsplit(origin.strip())
-        port = parsed.port
-    except ValueError:
-        return None
-
-    scheme = parsed.scheme.lower()
-    if scheme not in {"http", "https"} or not parsed.hostname:
-        return None
-
-    # Browser Origin is only scheme/host/port. Reject URL-shaped or credentialed values.
-    if parsed.username or parsed.password or parsed.path or parsed.query or parsed.fragment:
-        return None
-
-    return f"{scheme}://{_host_with_optional_port(parsed.hostname, port, scheme)}"
-
-
-def _configured_cors_origins() -> set[str]:
-    """Return explicit configured browser origins that may call auth routes."""
-    origins = set()
-    for raw_origin in os.environ.get("GATEWAY_CORS_ORIGINS", "").split(","):
-        origin = raw_origin.strip()
-        if not origin or origin == "*":
-            continue
-        normalized = _normalize_origin(origin)
-        if normalized:
-            origins.add(normalized)
-    return origins
-
-
-def get_configured_cors_origins() -> set[str]:
-    """Return normalized explicit browser origins from GATEWAY_CORS_ORIGINS."""
-    return _configured_cors_origins()
-
-
-def _first_header_value(value: str | None) -> str | None:
-    """Return the first value from a comma-separated proxy header."""
-    if not value:
-        return None
-    first = value.split(",", 1)[0].strip()
-    return first or None
-
-
-def _forwarded_param(request: Request, name: str) -> str | None:
-    """Extract a parameter from the first RFC 7239 Forwarded header entry."""
-    forwarded = _first_header_value(request.headers.get("forwarded"))
-    if not forwarded:
-        return None
-
-    for part in forwarded.split(";"):
-        key, sep, value = part.strip().partition("=")
-        if sep and key.lower() == name:
-            return value.strip().strip('"') or None
-    return None
-
-
-def _request_scheme(request: Request) -> str:
-    """Resolve the original request scheme from trusted proxy headers."""
-    scheme = _forwarded_param(request, "proto") or _first_header_value(request.headers.get("x-forwarded-proto")) or request.url.scheme
-    return scheme.lower()
-
-
-def _request_origin(request: Request) -> str | None:
-    """Build the origin for the URL the browser is targeting."""
-    scheme = _request_scheme(request)
-    host = _forwarded_param(request, "host") or _first_header_value(request.headers.get("x-forwarded-host")) or request.headers.get("host") or request.url.netloc
-
-    forwarded_port = _first_header_value(request.headers.get("x-forwarded-port"))
-    if forwarded_port and ":" not in host.rsplit("]", 1)[-1]:
-        host = f"{host}:{forwarded_port}"
-
-    return _normalize_origin(f"{scheme}://{host}")
-
-
-def is_allowed_auth_origin(request: Request) -> bool:
-    """Allow auth POSTs only from the same origin or explicit configured origins.
-
-    Login/register/initialize are exempt from the double-submit token because
-    first-time browser clients do not have a CSRF token yet. They still create
-    a session cookie, so browser requests with a hostile Origin header must be
-    rejected to prevent login CSRF / session fixation. Requests without Origin
-    are allowed for non-browser clients such as curl and mobile integrations.
-    """
-    origin = request.headers.get("origin")
-    if not origin:
-        return True
-
-    normalized_origin = _normalize_origin(origin)
-    if normalized_origin is None:
-        return False
-
-    request_origin = _request_origin(request)
-    return normalized_origin in _configured_cors_origins() or (request_origin is not None and normalized_origin == request_origin)
-
-
 class CSRFMiddleware(BaseHTTPMiddleware):
    """Middleware that implements CSRF protection using Double Submit Cookie pattern."""

    def __init__(self, app: ASGIApp) -> None:
        super().__init__(app)

-    async def dispatch(self, request: Request, call_next: Callable[[Request], Awaitable[Response]]) -> Response:
+    async def dispatch(self, request: Request, call_next: Callable) -> Response:
        _is_auth = is_auth_endpoint(request)

-        if should_check_csrf(request) and _is_auth and not is_allowed_auth_origin(request):
-            return JSONResponse(
-                status_code=403,
-                content={"detail": "Cross-site auth request denied."},
-            )
-
        if should_check_csrf(request) and not _is_auth:
            cookie_token = request.cookies.get(CSRF_COOKIE_NAME)
            header_token = request.headers.get(CSRF_HEADER_NAME)
@@ -1,12 +1,8 @@
-"""LangGraph compatibility auth handler — shares JWT logic with Gateway.
+"""LangGraph Server auth handler — shares JWT logic with Gateway.

-The default DeerFlow runtime is embedded in the FastAPI Gateway; scripts and
-Docker deployments do not load this module.  It is retained for LangGraph
-tooling, Studio, or direct LangGraph Server compatibility through
-``langgraph.json``'s ``auth.path``.
-
-When that compatibility path is used, this module reuses the same JWT and CSRF
-rules as Gateway so both modes validate sessions consistently.
+Loaded by LangGraph Server via langgraph.json ``auth.path``.
+Reuses the same ``decode_token`` / ``get_auth_config`` as Gateway,
+so both modes validate tokens with the same secret and rules.

 Two layers:
  1. @auth.authenticate — validates JWT cookie, extracts user_id,
@@ -11,7 +11,6 @@ from pydantic import BaseModel, Field
 from deerflow.config.agents_api_config import get_agents_api_config
 from deerflow.config.agents_config import AgentConfig, list_custom_agents, load_agent_config, load_agent_soul
 from deerflow.config.paths import get_paths
-from deerflow.runtime.user_context import get_effective_user_id

 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api", tags=["agents"])
@@ -87,11 +86,11 @@ def _require_agents_api_enabled() -> None:
        )


-def _agent_config_to_response(agent_cfg: AgentConfig, include_soul: bool = False, *, user_id: str | None = None) -> AgentResponse:
+def _agent_config_to_response(agent_cfg: AgentConfig, include_soul: bool = False) -> AgentResponse:
    """Convert AgentConfig to AgentResponse."""
    soul: str | None = None
    if include_soul:
-        soul = load_agent_soul(agent_cfg.name, user_id=user_id) or ""
+        soul = load_agent_soul(agent_cfg.name) or ""

    return AgentResponse(
        name=agent_cfg.name,
@@ -117,10 +116,9 @@ async def list_agents() -> AgentsListResponse:
    """
    _require_agents_api_enabled()

-    user_id = get_effective_user_id()
    try:
-        agents = list_custom_agents(user_id=user_id)
-        return AgentsListResponse(agents=[_agent_config_to_response(a, include_soul=True, user_id=user_id) for a in agents])
+        agents = list_custom_agents()
+        return AgentsListResponse(agents=[_agent_config_to_response(a, include_soul=True) for a in agents])
    except Exception as e:
        logger.error(f"Failed to list agents: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Failed to list agents: {str(e)}")
@@ -146,12 +144,7 @@ async def check_agent_name(name: str) -> dict:
    _require_agents_api_enabled()
    _validate_agent_name(name)
    normalized = _normalize_agent_name(name)
-    user_id = get_effective_user_id()
-    paths = get_paths()
-    # Treat the name as taken if either the per-user path or the legacy shared
-    # path holds an agent — picking a name that collides with an unmigrated
-    # legacy agent would shadow the legacy entry once migration runs.
-    available = not paths.user_agent_dir(user_id, normalized).exists() and not paths.agent_dir(normalized).exists()
+    available = not get_paths().agent_dir(normalized).exists()
    return {"available": available, "name": normalized}


@@ -176,11 +169,10 @@ async def get_agent(name: str) -> AgentResponse:
    _require_agents_api_enabled()
    _validate_agent_name(name)
    name = _normalize_agent_name(name)
-    user_id = get_effective_user_id()

    try:
-        agent_cfg = load_agent_config(name, user_id=user_id)
-        return _agent_config_to_response(agent_cfg, include_soul=True, user_id=user_id)
+        agent_cfg = load_agent_config(name)
+        return _agent_config_to_response(agent_cfg, include_soul=True)
    except FileNotFoundError:
        raise HTTPException(status_code=404, detail=f"Agent '{name}' not found")
    except Exception as e:
@@ -210,13 +202,10 @@ async def create_agent_endpoint(request: AgentCreateRequest) -> AgentResponse:
    _require_agents_api_enabled()
    _validate_agent_name(request.name)
    normalized_name = _normalize_agent_name(request.name)
-    user_id = get_effective_user_id()
-    paths = get_paths()

-    agent_dir = paths.user_agent_dir(user_id, normalized_name)
-    legacy_dir = paths.agent_dir(normalized_name)
+    agent_dir = get_paths().agent_dir(normalized_name)

-    if agent_dir.exists() or legacy_dir.exists():
+    if agent_dir.exists():
        raise HTTPException(status_code=409, detail=f"Agent '{normalized_name}' already exists")

    try:
@@ -243,8 +232,8 @@ async def create_agent_endpoint(request: AgentCreateRequest) -> AgentResponse:

        logger.info(f"Created agent '{normalized_name}' at {agent_dir}")

-        agent_cfg = load_agent_config(normalized_name, user_id=user_id)
-        return _agent_config_to_response(agent_cfg, include_soul=True, user_id=user_id)
+        agent_cfg = load_agent_config(normalized_name)
+        return _agent_config_to_response(agent_cfg, include_soul=True)

    except HTTPException:
        raise
@@ -278,20 +267,13 @@ async def update_agent(name: str, request: AgentUpdateRequest) -> AgentResponse:
    _require_agents_api_enabled()
    _validate_agent_name(name)
    name = _normalize_agent_name(name)
-    user_id = get_effective_user_id()

    try:
-        agent_cfg = load_agent_config(name, user_id=user_id)
+        agent_cfg = load_agent_config(name)
    except FileNotFoundError:
        raise HTTPException(status_code=404, detail=f"Agent '{name}' not found")

-    paths = get_paths()
-    agent_dir = paths.user_agent_dir(user_id, name)
-    if not agent_dir.exists() and paths.agent_dir(name).exists():
-        raise HTTPException(
-            status_code=409,
-            detail=(f"Agent '{name}' only exists in the legacy shared layout and is not scoped to a user. Run scripts/migrate_user_isolation.py to move legacy agents into the per-user layout before updating."),
-        )
+    agent_dir = get_paths().agent_dir(name)

    try:
        # Update config if any config fields changed
@@ -332,8 +314,8 @@ async def update_agent(name: str, request: AgentUpdateRequest) -> AgentResponse:

        logger.info(f"Updated agent '{name}'")

-        refreshed_cfg = load_agent_config(name, user_id=user_id)
-        return _agent_config_to_response(refreshed_cfg, include_soul=True, user_id=user_id)
+        refreshed_cfg = load_agent_config(name)
+        return _agent_config_to_response(refreshed_cfg, include_soul=True)

    except HTTPException:
        raise
@@ -420,22 +402,15 @@ async def delete_agent(name: str) -> None:
        name: The agent name.

    Raises:
-        HTTPException: 404 if no per-user copy exists; 409 if only a legacy
-            shared copy exists (suggesting the migration script).
+        HTTPException: 404 if agent not found.
    """
    _require_agents_api_enabled()
    _validate_agent_name(name)
    name = _normalize_agent_name(name)
-    user_id = get_effective_user_id()
-    paths = get_paths()
-    agent_dir = paths.user_agent_dir(user_id, name)
+
+    agent_dir = get_paths().agent_dir(name)

    if not agent_dir.exists():
-        if paths.agent_dir(name).exists():
-            raise HTTPException(
-                status_code=409,
-                detail=(f"Agent '{name}' only exists in the legacy shared layout and is not scoped to a user. Run scripts/migrate_user_isolation.py to move legacy agents into the per-user layout before deleting."),
-            )
        raise HTTPException(status_code=404, detail=f"Agent '{name}' not found")

    try:
@@ -305,7 +305,7 @@ async def login_local(
 async def register(request: Request, response: Response, body: RegisterRequest):
    """Register a new user account (always 'user' role).

-    The first admin is created explicitly through /initialize. This endpoint creates regular users.
+    Admin is auto-created on first boot. This endpoint creates regular users.
    Auto-login by setting the session cookie.
    """
    try:
@@ -68,27 +68,6 @@ class RunResponse(BaseModel):
    updated_at: str = ""


-class ThreadTokenUsageModelBreakdown(BaseModel):
-    tokens: int = 0
-    runs: int = 0
-
-
-class ThreadTokenUsageCallerBreakdown(BaseModel):
-    lead_agent: int = 0
-    subagent: int = 0
-    middleware: int = 0
-
-
-class ThreadTokenUsageResponse(BaseModel):
-    thread_id: str
-    total_tokens: int = 0
-    total_input_tokens: int = 0
-    total_output_tokens: int = 0
-    total_runs: int = 0
-    by_model: dict[str, ThreadTokenUsageModelBreakdown] = Field(default_factory=dict)
-    by_caller: ThreadTokenUsageCallerBreakdown = Field(default_factory=ThreadTokenUsageCallerBreakdown)
-
-
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -389,10 +368,10 @@ async def list_run_events(
    return await event_store.list_events(thread_id, run_id, event_types=types, limit=limit)


-@router.get("/{thread_id}/token-usage", response_model=ThreadTokenUsageResponse)
+@router.get("/{thread_id}/token-usage")
@require_permission("threads", "read", owner_check=True)
-async def thread_token_usage(thread_id: str, request: Request) -> ThreadTokenUsageResponse:
+async def thread_token_usage(thread_id: str, request: Request) -> dict:
    """Thread-level token usage aggregation."""
    run_store = get_run_store(request)
    agg = await run_store.aggregate_tokens_by_thread(thread_id)
-    return ThreadTokenUsageResponse(thread_id=thread_id, **agg)
+    return {"thread_id": thread_id, **agg}
@@ -13,11 +13,11 @@ matching the LangGraph Platform wire format expected by the
 from __future__ import annotations

 import logging
+import time
 import uuid
 from typing import Any

 from fastapi import APIRouter, HTTPException, Request
-from langgraph.checkpoint.base import empty_checkpoint
 from pydantic import BaseModel, Field, field_validator

 from app.gateway.authz import require_permission
@@ -26,7 +26,6 @@ from app.gateway.utils import sanitize_log_param
 from deerflow.config.paths import Paths, get_paths
 from deerflow.runtime import serialize_channel_values
 from deerflow.runtime.user_context import get_effective_user_id
-from deerflow.utils.time import coerce_iso, now_iso

 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api/threads", tags=["threads"])
@@ -90,28 +89,6 @@ class ThreadSearchRequest(BaseModel):
    offset: int = Field(default=0, ge=0, description="Pagination offset")
    status: str | None = Field(default=None, description="Filter by thread status")

-    @field_validator("metadata")
-    @classmethod
-    def _validate_metadata_filters(cls, v: dict[str, Any]) -> dict[str, Any]:
-        """Reject filter entries the SQL backend cannot compile.
-
-        Enforces consistent behaviour across SQL and memory backends.
-        See ``deerflow.persistence.json_compat`` for the shared validators.
-        """
-        if not v:
-            return v
-        from deerflow.persistence.json_compat import validate_metadata_filter_key, validate_metadata_filter_value
-
-        bad_entries: list[str] = []
-        for key, value in v.items():
-            if not validate_metadata_filter_key(key):
-                bad_entries.append(f"{key!r} (unsafe key)")
-            elif not validate_metadata_filter_value(value):
-                bad_entries.append(f"{key!r} (unsupported value type {type(value).__name__})")
-        if bad_entries:
-            raise ValueError(f"Invalid metadata filter entries: {', '.join(bad_entries)}")
-        return v
-

 class ThreadStateResponse(BaseModel):
    """Response model for thread state."""
@@ -256,7 +233,7 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe
    checkpointer = get_checkpointer(request)
    thread_store = get_thread_store(request)
    thread_id = body.thread_id or str(uuid.uuid4())
-    now = now_iso()
+    now = time.time()
    # ``body.metadata`` is already stripped of server-reserved keys by
    # ``ThreadCreateRequest._strip_reserved`` — see the model definition.

@@ -266,8 +243,8 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe
        return ThreadResponse(
            thread_id=thread_id,
            status=existing_record.get("status", "idle"),
-            created_at=coerce_iso(existing_record.get("created_at", "")),
-            updated_at=coerce_iso(existing_record.get("updated_at", "")),
+            created_at=str(existing_record.get("created_at", "")),
+            updated_at=str(existing_record.get("updated_at", "")),
            metadata=existing_record.get("metadata", {}),
        )

@@ -285,6 +262,8 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe
    # Write an empty checkpoint so state endpoints work immediately
    config = {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}}
    try:
+        from langgraph.checkpoint.base import empty_checkpoint
+
        ckpt_metadata = {
            "step": -1,
            "source": "input",
@@ -302,8 +281,8 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe
    return ThreadResponse(
        thread_id=thread_id,
        status="idle",
-        created_at=now,
-        updated_at=now,
+        created_at=str(now),
+        updated_at=str(now),
        metadata=body.metadata,
    )

@@ -316,27 +295,20 @@ async def search_threads(body: ThreadSearchRequest, request: Request) -> list[Th
    (SQL-backed for sqlite/postgres, Store-backed for memory mode).
    """
    from app.gateway.deps import get_thread_store
-    from deerflow.persistence.thread_meta import InvalidMetadataFilterError

    repo = get_thread_store(request)
-    try:
-        rows = await repo.search(
-            metadata=body.metadata or None,
-            status=body.status,
-            limit=body.limit,
-            offset=body.offset,
-        )
-    except InvalidMetadataFilterError as exc:
-        raise HTTPException(status_code=400, detail=str(exc)) from exc
+    rows = await repo.search(
+        metadata=body.metadata or None,
+        status=body.status,
+        limit=body.limit,
+        offset=body.offset,
+    )
    return [
        ThreadResponse(
            thread_id=r["thread_id"],
            status=r.get("status", "idle"),
-            # ``coerce_iso`` heals legacy unix-second values that
-            # ``MemoryThreadMetaStore`` historically wrote with ``time.time()``;
-            # SQL-backed rows already arrive as ISO strings and pass through.
-            created_at=coerce_iso(r.get("created_at", "")),
-            updated_at=coerce_iso(r.get("updated_at", "")),
+            created_at=r.get("created_at", ""),
+            updated_at=r.get("updated_at", ""),
            metadata=r.get("metadata", {}),
            values={"title": r["display_name"]} if r.get("display_name") else {},
            interrupts={},
@@ -368,8 +340,8 @@ async def patch_thread(thread_id: str, body: ThreadPatchRequest, request: Reques
    return ThreadResponse(
        thread_id=thread_id,
        status=record.get("status", "idle"),
-        created_at=coerce_iso(record.get("created_at", "")),
-        updated_at=coerce_iso(record.get("updated_at", "")),
+        created_at=str(record.get("created_at", "")),
+        updated_at=str(record.get("updated_at", "")),
        metadata=record.get("metadata", {}),
    )

@@ -409,8 +381,8 @@ async def get_thread(thread_id: str, request: Request) -> ThreadResponse:
        record = {
            "thread_id": thread_id,
            "status": "idle",
-            "created_at": coerce_iso(ckpt_meta.get("created_at", "")),
-            "updated_at": coerce_iso(ckpt_meta.get("updated_at", ckpt_meta.get("created_at", ""))),
+            "created_at": ckpt_meta.get("created_at", ""),
+            "updated_at": ckpt_meta.get("updated_at", ckpt_meta.get("created_at", "")),
            "metadata": {k: v for k, v in ckpt_meta.items() if k not in ("created_at", "updated_at", "step", "source", "writes", "parents")},
        }

@@ -424,8 +396,8 @@ async def get_thread(thread_id: str, request: Request) -> ThreadResponse:
    return ThreadResponse(
        thread_id=thread_id,
        status=status,
-        created_at=coerce_iso(record.get("created_at", "")),
-        updated_at=coerce_iso(record.get("updated_at", "")),
+        created_at=str(record.get("created_at", "")),
+        updated_at=str(record.get("updated_at", "")),
        metadata=record.get("metadata", {}),
        values=serialize_channel_values(channel_values),
    )
@@ -476,10 +448,10 @@ async def get_thread_state(thread_id: str, request: Request) -> ThreadStateRespo
        values=values,
        next=next_tasks,
        metadata=metadata,
-        checkpoint={"id": checkpoint_id, "ts": coerce_iso(metadata.get("created_at", ""))},
+        checkpoint={"id": checkpoint_id, "ts": str(metadata.get("created_at", ""))},
        checkpoint_id=checkpoint_id,
        parent_checkpoint_id=parent_checkpoint_id,
-        created_at=coerce_iso(metadata.get("created_at", "")),
+        created_at=str(metadata.get("created_at", "")),
        tasks=tasks,
    )

@@ -529,7 +501,7 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re
        channel_values.update(body.values)

    checkpoint["channel_values"] = channel_values
-    metadata["updated_at"] = now_iso()
+    metadata["updated_at"] = time.time()

    if body.as_node:
        metadata["source"] = "update"
@@ -570,7 +542,7 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re
        next=[],
        metadata=metadata,
        checkpoint_id=new_checkpoint_id,
-        created_at=coerce_iso(metadata.get("created_at", "")),
+        created_at=str(metadata.get("created_at", "")),
    )


@@ -637,7 +609,7 @@ async def get_thread_history(thread_id: str, body: ThreadHistoryRequest, request
                    parent_checkpoint_id=parent_id,
                    metadata=user_meta,
                    values=values,
-                    created_at=coerce_iso(metadata.get("created_at", "")),
+                    created_at=str(metadata.get("created_at", "")),
                    next=next_tasks,
                )
            )
@@ -5,7 +5,7 @@ import os
 import stat

 from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile
-from pydantic import BaseModel, Field
+from pydantic import BaseModel

 from app.gateway.authz import require_permission
 from app.gateway.deps import get_config
@@ -15,15 +15,12 @@ from deerflow.runtime.user_context import get_effective_user_id
 from deerflow.sandbox.sandbox_provider import SandboxProvider, get_sandbox_provider
 from deerflow.uploads.manager import (
    PathTraversalError,
-    UnsafeUploadPathError,
-    claim_unique_filename,
    delete_file_safe,
    enrich_file_listing,
    ensure_uploads_dir,
    get_uploads_dir,
    list_files_in_dir,
    normalize_filename,
-    open_upload_file_no_symlink,
    upload_artifact_url,
    upload_virtual_path,
 )
@@ -45,7 +42,6 @@ class UploadResponse(BaseModel):
    success: bool
    files: list[dict[str, str]]
    message: str
-    skipped_files: list[str] = Field(default_factory=list)


 class UploadLimits(BaseModel):
@@ -120,18 +116,17 @@ def _cleanup_uploaded_paths(paths: list[os.PathLike[str] | str]) -> None:
            logger.warning("Failed to clean up upload path after rejected request: %s", path, exc_info=True)


-async def _write_upload_file_with_limits(
+async def _write_upload_file_streaming(
    file: UploadFile,
+    file_path: os.PathLike[str] | str,
    *,
-    uploads_dir: os.PathLike[str] | str,
    display_filename: str,
    max_single_file_size: int,
    max_total_size: int,
    total_size: int,
-) -> tuple[os.PathLike[str] | str, int, int]:
+) -> tuple[int, int]:
    file_size = 0
-    file_path, fh = open_upload_file_no_symlink(uploads_dir, display_filename)
-    try:
+    with open(file_path, "wb") as output:
        while chunk := await file.read(UPLOAD_CHUNK_SIZE):
            file_size += len(chunk)
            total_size += len(chunk)
@@ -139,17 +134,8 @@ async def _write_upload_file_with_limits(
                raise HTTPException(status_code=413, detail=f"File too large: {display_filename}")
            if total_size > max_total_size:
                raise HTTPException(status_code=413, detail="Total upload size too large")
-            fh.write(chunk)
-    except Exception:
-        fh.close()
-        try:
-            os.unlink(file_path)
-        except FileNotFoundError:
-            pass
-        raise
-    else:
-        fh.close()
-    return file_path, file_size, total_size
+            output.write(chunk)
+    return file_size, total_size


 def _auto_convert_documents_enabled(app_config: AppConfig) -> bool:
@@ -191,12 +177,7 @@ async def upload_files(
    uploaded_files = []
    written_paths = []
    sandbox_sync_targets = []
-    skipped_files = []
    total_size = 0
-    # Track filenames within this request so duplicate form parts do not
-    # silently truncate each other. Existing uploads keep the historical
-    # overwrite behavior for a single replacement upload.
-    seen_filenames: set[str] = set()

    sandbox_provider = get_sandbox_provider()
    sync_to_sandbox = not _uses_thread_data_mounts(sandbox_provider)
@@ -213,22 +194,22 @@ async def upload_files(
            continue

        try:
-            original_filename = normalize_filename(file.filename)
-            safe_filename = claim_unique_filename(original_filename, seen_filenames)
+            safe_filename = normalize_filename(file.filename)
        except ValueError:
            logger.warning(f"Skipping file with unsafe filename: {file.filename!r}")
            continue

        try:
-            file_path, file_size, total_size = await _write_upload_file_with_limits(
+            file_path = uploads_dir / safe_filename
+            written_paths.append(file_path)
+            file_size, total_size = await _write_upload_file_streaming(
                file,
-                uploads_dir=uploads_dir,
+                file_path,
                display_filename=safe_filename,
                max_single_file_size=limits.max_file_size,
                max_total_size=limits.max_total_size,
                total_size=total_size,
            )
-            written_paths.append(file_path)

            virtual_path = upload_virtual_path(safe_filename)

@@ -242,8 +223,6 @@ async def upload_files(
                "virtual_path": virtual_path,
                "artifact_url": upload_artifact_url(thread_id, safe_filename),
            }
-            if safe_filename != original_filename:
-                file_info["original_filename"] = original_filename

            logger.info(f"Saved file: {safe_filename} ({file_size} bytes) to {file_info['path']}")

@@ -267,10 +246,6 @@ async def upload_files(
        except HTTPException as e:
            _cleanup_uploaded_paths(written_paths)
            raise e
-        except UnsafeUploadPathError as e:
-            logger.warning("Skipping upload with unsafe destination %s: %s", file.filename, e)
-            skipped_files.append(safe_filename)
-            continue
        except Exception as e:
            logger.error(f"Failed to upload {file.filename}: {e}")
            _cleanup_uploaded_paths(written_paths)
@@ -281,15 +256,10 @@ async def upload_files(
            _make_file_sandbox_writable(file_path)
            sandbox.update_file(virtual_path, file_path.read_bytes())

-    message = f"Successfully uploaded {len(uploaded_files)} file(s)"
-    if skipped_files:
-        message += f"; skipped {len(skipped_files)} unsafe file(s)"
-
    return UploadResponse(
-        success=not skipped_files,
+        success=True,
        files=uploaded_files,
-        message=message,
-        skipped_files=skipped_files,
+        message=f"Successfully uploaded {len(uploaded_files)} file(s)",
    )


@@ -19,7 +19,6 @@ from langchain_core.messages import HumanMessage

 from app.gateway.deps import get_run_context, get_run_manager, get_stream_bridge
 from app.gateway.utils import sanitize_log_param
-from deerflow.config.app_config import get_app_config
 from deerflow.runtime import (
    END_SENTINEL,
    HEARTBEAT_SENTINEL,
@@ -137,24 +136,6 @@ def merge_run_context_overrides(config: dict[str, Any], context: Mapping[str, An
                runtime_context.setdefault(key, context[key])


-def inject_authenticated_user_context(config: dict[str, Any], request: Request) -> None:
-    """Stamp the authenticated user into the run context for background tools.
-
-    Tool execution may happen after the request handler has returned, so tools
-    that persist user-scoped files should not rely only on ambient ContextVars.
-    The value comes from server-side auth state, never from client context.
-    """
-
-    user = getattr(request.state, "user", None)
-    user_id = getattr(user, "id", None)
-    if user_id is None:
-        return
-
-    runtime_context = config.setdefault("context", {})
-    if isinstance(runtime_context, dict):
-        runtime_context["user_id"] = str(user_id)
-
-
 def resolve_agent_factory(assistant_id: str | None):
    """Resolve the agent factory callable from config.

@@ -268,23 +249,6 @@ async def start_run(

    disconnect = DisconnectMode.cancel if body.on_disconnect == "cancel" else DisconnectMode.continue_

-    body_context = getattr(body, "context", None) or {}
-    model_name = body_context.get("model_name")
-
-    # Coerce non-string model_name values to str before truncation.
-    if model_name is not None and not isinstance(model_name, str):
-        model_name = str(model_name)
-
-    # Validate model against the allowlist when a model_name is provided.
-    if model_name:
-        app_config = get_app_config()
-        resolved = app_config.get_model_config(model_name)
-        if resolved is None:
-            raise HTTPException(
-                status_code=400,
-                detail=f"Model {model_name!r} is not in the configured model allowlist",
-            )
-
    try:
        record = await run_mgr.create_or_reject(
            thread_id,
@@ -293,7 +257,6 @@ async def start_run(
            metadata=body.metadata or {},
            kwargs={"input": body.input, "config": body.config},
            multitask_strategy=body.multitask_strategy,
-            model_name=model_name,
        )
    except ConflictError as exc:
        raise HTTPException(status_code=409, detail=str(exc)) from exc
@@ -325,7 +288,6 @@ async def start_run(
    # that carries agent configuration (model_name, thinking_enabled, etc.).
    # Only agent-relevant keys are forwarded; unknown keys (e.g. thread_id) are ignored.
    merge_run_context_overrides(config, getattr(body, "context", None))
-    inject_authenticated_user_context(config, request)

    stream_modes = normalize_stream_modes(body.stream_mode)

@@ -79,9 +79,7 @@ async def main():
    from langgraph.runtime import Runtime

    from deerflow.agents import make_lead_agent
-    from deerflow.config.paths import get_paths
    from deerflow.mcp import initialize_mcp_tools
-    from deerflow.runtime.user_context import get_effective_user_id

    # Initialize MCP tools at startup
    try:
@@ -115,8 +113,6 @@ async def main():
        print("Tip: `uv sync --group dev` to enable arrow-key & history support")
    print("=" * 50)

-    seen_artifacts: set[str] = set()
-
    while True:
        try:
            if session:
@@ -138,22 +134,6 @@ async def main():
                last_message = result["messages"][-1]
                print(f"\nAgent: {last_message.content}")

-            # Show files presented to the user this turn (new artifacts only)
-            artifacts = result.get("artifacts") or []
-            new_artifacts = [p for p in artifacts if p not in seen_artifacts]
-            if new_artifacts:
-                thread_id = config["configurable"]["thread_id"]
-                user_id = get_effective_user_id()
-                paths = get_paths()
-                print("\n[Presented files]")
-                for virtual in new_artifacts:
-                    try:
-                        physical = paths.resolve_virtual_path(thread_id, virtual, user_id=user_id)
-                        print(f"  - {virtual}\n    → {physical}")
-                    except ValueError as exc:
-                        print(f"  - {virtual}    (failed to resolve physical path: {exc})")
-                seen_artifacts.update(new_artifacts)
-
        except (KeyboardInterrupt, EOFError):
            print("\nGoodbye!")
            break
@@ -6,16 +6,16 @@ This document provides a complete reference for the DeerFlow backend APIs.

 DeerFlow backend exposes two sets of APIs:

-1. **LangGraph-compatible API** - Agent interactions, threads, and streaming (`/api/langgraph/*`)
+1. **LangGraph API** - Agent interactions, threads, and streaming (`/api/langgraph/*`)
 2. **Gateway API** - Models, MCP, skills, uploads, and artifacts (`/api/*`)

 All APIs are accessed through the Nginx reverse proxy at port 2026.

-## LangGraph-compatible API
+## LangGraph API

 Base URL: `/api/langgraph`

-The public LangGraph-compatible API follows LangGraph SDK conventions. In the unified nginx deployment, Gateway owns `/api/langgraph/*` and translates those paths to its native `/api/*` run, thread, and streaming routers.
+The LangGraph API is provided by the LangGraph server and follows the LangGraph SDK conventions.

 ### Threads

@@ -104,11 +104,17 @@ Content-Type: application/json
 **Recursion Limit:**

 `config.recursion_limit` caps the number of graph steps LangGraph will execute
-in a single run. The unified Gateway path defaults to `100` in
-`build_run_config` (see `backend/app/gateway/services.py`), which is a safer
-starting point for plan-mode or subagent-heavy runs. Clients can still set
-`recursion_limit` explicitly in the request body; increase it if you run deeply
-nested subagent graphs.
+in a single run. The `/api/langgraph/*` endpoints go straight to the LangGraph
+server and therefore inherit LangGraph's native default of **25**, which is
+too low for plan-mode or subagent-heavy runs — the agent typically errors out
+with `GraphRecursionError` after the first round of subagent results comes
+back, before the lead agent can synthesize the final answer.
+
+DeerFlow's own Gateway and IM-channel paths mitigate this by defaulting to
+`100` in `build_run_config` (see `backend/app/gateway/services.py`), but
+clients calling the LangGraph API directly must set `recursion_limit`
+explicitly in the request body. `100` matches the Gateway default and is a
+safe starting point; increase it if you run deeply nested subagent graphs.

 **Configurable Options:**
 - `model_name` (string): Override the default model
@@ -535,28 +541,14 @@ All APIs return errors in a consistent format:

 ## Authentication

-DeerFlow enforces authentication for all non-public HTTP routes. Public routes are limited to health/docs metadata and these public auth endpoints:
+Currently, DeerFlow does not implement authentication. All APIs are accessible without credentials.

- `POST /api/v1/auth/initialize` creates the first admin account when no admin exists.
- `POST /api/v1/auth/login/local` logs in with email/password and sets an HttpOnly `access_token` cookie.
- `POST /api/v1/auth/register` creates a regular `user` account and sets the session cookie.
- `POST /api/v1/auth/logout` clears the session cookie.
- `GET /api/v1/auth/setup-status` reports whether the first admin still needs to be created.
+Note: This is about DeerFlow API authentication. MCP outbound connections can still use OAuth for configured HTTP/SSE MCP servers.

-The authenticated auth endpoints are:
-
- `GET /api/v1/auth/me` returns the current user.
- `POST /api/v1/auth/change-password` changes password, optionally changes email during setup, increments `token_version`, and reissues the cookie.
-
-Protected state-changing requests also require the CSRF double-submit token: send the `csrf_token` cookie value as the `X-CSRF-Token` header. Login/register/initialize/logout are bootstrap auth endpoints: they are exempt from the double-submit token but still reject hostile browser `Origin` headers.
-
-User isolation is enforced from the authenticated user context:
-
- Thread metadata is scoped by `threads_meta.user_id`; search/read/write/delete APIs only expose the current user's threads.
- Thread files live under `{base_dir}/users/{user_id}/threads/{thread_id}/user-data/` and are exposed inside the sandbox as `/mnt/user-data/`.
- Memory and custom agents are stored under `{base_dir}/users/{user_id}/...`.
-
-Note: MCP outbound connections can still use OAuth for configured HTTP/SSE MCP servers; that is separate from DeerFlow API authentication.
+For production deployments, it is recommended to:
+1. Use Nginx for basic auth or OAuth integration
+2. Deploy behind a VPN or private network
+3. Implement custom authentication middleware

 ---

@@ -575,13 +567,12 @@ location /api/ {

 ---

-## Streaming Support
+## WebSocket Support

-Gateway's LangGraph-compatible API streams run events with Server-Sent Events (SSE):
+The LangGraph server supports WebSocket connections for real-time streaming. Connect to:

-```http
-POST /api/langgraph/threads/{thread_id}/runs/stream
-Accept: text/event-stream
+```
+ws://localhost:2026/api/langgraph/threads/{thread_id}/runs/stream
 ```

 ---
@@ -617,21 +608,13 @@ const response = await fetch('/api/models');
 const data = await response.json();
 console.log(data.models);

-// Create a run and stream SSE events
-const streamResponse = await fetch(`/api/langgraph/threads/${threadId}/runs/stream`, {
-  method: "POST",
-  headers: {
-    "Content-Type": "application/json",
-    Accept: "text/event-stream",
-  },
-  body: JSON.stringify({
-    input: { messages: [{ role: "user", content: "Hello" }] },
-    stream_mode: ["values", "messages-tuple", "custom"],
-  }),
-});
-
-const reader = streamResponse.body?.getReader();
-// Decode and parse SSE frames from reader in your client code.
+// Using EventSource for streaming
+const eventSource = new EventSource(
+  `/api/langgraph/threads/${threadId}/runs/stream`
+);
+eventSource.onmessage = (event) => {
+  console.log(JSON.parse(event.data));
+};
 ```

 ### cURL Examples
@@ -666,7 +649,7 @@ curl -X POST http://localhost:2026/api/langgraph/threads/abc123/runs \
  }'
 ```

-> The unified Gateway path defaults `config.recursion_limit` to 100 for
-> plan-mode and subagent-heavy runs. Clients may still set
-> `config.recursion_limit` explicitly — see the [Create Run](#create-run)
-> section for details.
+> The `/api/langgraph/*` endpoints bypass DeerFlow's Gateway and inherit
+> LangGraph's native `recursion_limit` default of 25, which is too low for
+> plan-mode or subagent runs. Set `config.recursion_limit` explicitly — see
+> the [Create Run](#create-run) section for details.
@@ -14,28 +14,30 @@ This document provides a comprehensive overview of the DeerFlow backend architec
 │                          Nginx (Port 2026)                               │
 │                    Unified Reverse Proxy Entry Point                      │
 │  ┌────────────────────────────────────────────────────────────────────┐  │
-│  │  /api/langgraph/*  →  Gateway LangGraph-compatible runtime (8001)  │  │
-│  │  /api/*            →  Gateway REST APIs (8001)                     │  │
+│  │  /api/langgraph/*  →  LangGraph Server (2024)                      │  │
+│  │  /api/*            →  Gateway API (8001)                           │  │
 │  │  /*                →  Frontend (3000)                               │  │
 │  └────────────────────────────────────────────────────────────────────┘  │
 └─────────────────────────────────┬────────────────────────────────────────┘
                                  │
-          ┌───────────────────────┴───────────────────────┐
-          │                                               │
-          ▼                                               ▼
-┌─────────────────────────────────────────────┐ ┌─────────────────────┐
-│              Gateway API                    │ │     Frontend        │
-│              (Port 8001)                    │ │    (Port 3000)      │
-│                                             │ │                     │
-│  - LangGraph-compatible runs/threads API    │ │  - Next.js App      │
-│  - Embedded Agent Runtime                   │ │  - React UI         │
-│  - SSE Streaming                            │ │  - Chat Interface   │
-│  - Checkpointing                            │ │                     │
-│  - Models, MCP, Skills, Uploads, Artifacts  │ │                     │
-│  - Thread Cleanup                           │ │                     │
-└─────────────────────────────────────────────┘ └─────────────────────┘
-          │
-          ▼
+          ┌───────────────────────┼───────────────────────┐
+          │                       │                       │
+          ▼                       ▼                       ▼
+┌─────────────────────┐ ┌─────────────────────┐ ┌─────────────────────┐
+│   LangGraph Server  │ │    Gateway API      │ │     Frontend        │
+│     (Port 2024)     │ │    (Port 8001)      │ │    (Port 3000)      │
+│                     │ │                     │ │                     │
+│  - Agent Runtime    │ │  - Models API       │ │  - Next.js App      │
+│  - Thread Mgmt      │ │  - MCP Config       │ │  - React UI         │
+│  - SSE Streaming    │ │  - Skills Mgmt      │ │  - Chat Interface   │
+│  - Checkpointing    │ │  - File Uploads     │ │                     │
+│                     │ │  - Thread Cleanup   │ │                     │
+│                     │ │  - Artifacts        │ │                     │
+└─────────────────────┘ └─────────────────────┘ └─────────────────────┘
+          │                       │
+          │     ┌─────────────────┘
+          │     │
+          ▼     ▼
 ┌──────────────────────────────────────────────────────────────────────────┐
 │                         Shared Configuration                              │
 │  ┌─────────────────────────┐  ┌────────────────────────────────────────┐ │
@@ -50,9 +52,9 @@ This document provides a comprehensive overview of the DeerFlow backend architec

 ## Component Details

-### Gateway Embedded Agent Runtime
+### LangGraph Server

-The agent runtime is embedded in the FastAPI Gateway and built on LangGraph for robust multi-agent workflow orchestration. Nginx rewrites `/api/langgraph/*` to Gateway's native `/api/*` routes, so the public API remains compatible with LangGraph SDK clients without running a separate LangGraph server.
+The LangGraph server is the core agent runtime, built on LangGraph for robust multi-agent workflow orchestration.

 **Entry Point**: `packages/harness/deerflow/agents/lead_agent/agent.py:make_lead_agent`

@@ -63,8 +65,7 @@ The agent runtime is embedded in the FastAPI Gateway and built on LangGraph for
 - Tool execution orchestration
 - SSE streaming for real-time responses

-**Graph registry**: `langgraph.json` remains available for tooling, Studio, or direct LangGraph Server compatibility.
-It is not the default service entrypoint; scripts and Docker deployments run the Gateway embedded runtime.
+**Configuration**: `langgraph.json`

 ```json
 {
@@ -77,13 +78,12 @@ It is not the default service entrypoint; scripts and Docker deployments run the

 ### Gateway API

-FastAPI application providing REST endpoints plus the public LangGraph-compatible `/api/langgraph/*` runtime routes.
+FastAPI application providing REST endpoints for non-agent operations.

 **Entry Point**: `app/gateway/app.py`

 **Routers**:
 - `models.py` - `/api/models` - Model listing and details
- `thread_runs.py` / `runs.py` - `/api/threads/{id}/runs`, `/api/runs/*` - LangGraph-compatible runs and streaming
 - `mcp.py` - `/api/mcp` - MCP server configuration
 - `skills.py` - `/api/skills` - Skills management
 - `uploads.py` - `/api/threads/{id}/uploads` - File upload
@@ -91,7 +91,7 @@ FastAPI application providing REST endpoints plus the public LangGraph-compatibl
 - `artifacts.py` - `/api/threads/{id}/artifacts` - Artifact serving
 - `suggestions.py` - `/api/threads/{id}/suggestions` - Follow-up suggestion generation

-The web conversation delete flow first deletes Gateway-managed thread state through the LangGraph-compatible route, then the Gateway `threads.py` router removes DeerFlow-managed filesystem data via `Paths.delete_thread_dir()`.
+The web conversation delete flow is now split across both backend surfaces: LangGraph handles `DELETE /api/langgraph/threads/{thread_id}` for thread state, then the Gateway `threads.py` router removes DeerFlow-managed filesystem data via `Paths.delete_thread_dir()`.

 ### Agent Architecture

@@ -353,10 +353,10 @@ SKILL.md Format:
   POST /api/langgraph/threads/{thread_id}/runs
   {"input": {"messages": [{"role": "user", "content": "Hello"}]}}

-2. Nginx → Gateway API (8001)
-   `/api/langgraph/*` is rewritten to Gateway's LangGraph-compatible `/api/*` routes
+2. Nginx → LangGraph Server (2024)
+   Proxied to LangGraph server

-3. Gateway embedded runtime
+3. LangGraph Server
   a. Load/create thread state
   b. Execute middleware chain:
      - ThreadDataMiddleware: Set up paths
@@ -412,7 +412,7 @@ SKILL.md Format:
 ### Thread Cleanup Flow

 ```
-1. Client deletes conversation via the LangGraph-compatible Gateway route
+1. Client deletes conversation via LangGraph
   DELETE /api/langgraph/threads/{thread_id}

 2. Web UI follows up with Gateway cleanup
@@ -1,331 +0,0 @@
-# 用户认证与隔离设计
-
-本文档描述 DeerFlow 当前内置认证模块的设计，而不是历史 RFC。它覆盖浏览器登录、API 认证、CSRF、用户隔离、首次初始化、密码重置、内部调用和升级迁移。
-
-## 设计目标
-
-认证模块的核心目标是把 DeerFlow 从“本地单用户工具”提升为“可多用户部署的 agent runtime”，并让用户身份贯穿 HTTP API、LangGraph-compatible runtime、文件系统、memory、自定义 agent 和反馈数据。
-
-设计约束：
-
- 默认强制认证：除健康检查、文档和 auth bootstrap 端点外，HTTP 路由都必须有有效 session。
- 服务端持有所有权：客户端 metadata 不能声明 `user_id` 或 `owner_id`。
- 隔离默认开启：repository（仓储）、文件路径、memory、agent 配置默认按当前用户解析。
- 旧数据可升级：无认证版本留下的 thread 可以在 admin 存在后迁移到 admin。
- 密码不进日志：首次初始化由操作者设置密码；`reset_admin` 只写 0600 凭据文件。
-
-非目标：
-
- 当前 OAuth 端点只是占位，尚未实现第三方登录。
- 当前用户角色只有 `admin` 和 `user`，尚未实现细粒度 RBAC。
- 当前登录限速是进程内字典，多 worker 下不是全局精确限速。
-
-## 核心模型
-
-```mermaid
-graph TB
-  classDef actor fill:#D8CFC4,stroke:#6E6259,color:#2F2A26;
-  classDef api fill:#C9D7D2,stroke:#5D706A,color:#21302C;
-  classDef state fill:#D7D3E8,stroke:#6B6680,color:#29263A;
-  classDef data fill:#E5D2C4,stroke:#806A5B,color:#30251E;
-
-  Browser["Browser — access_token cookie and csrf_token cookie"]:::actor
-  AuthMiddleware["AuthMiddleware — strict session gate"]:::api
-  CSRFMiddleware["CSRFMiddleware — double-submit token and Origin check"]:::api
-  AuthRoutes["Auth routes — initialize login register logout me change-password"]:::api
-  UserContext["Current user ContextVar — request-scoped identity"]:::state
-  Repositories["Repositories — AUTO resolves user_id from context"]:::state
-  Files["Filesystem — users/{user_id}/threads/{thread_id}/user-data"]:::data
-  Memory["Memory and agents — users/{user_id}/memory.json and agents"]:::data
-
-  Browser --> AuthMiddleware
-  Browser --> CSRFMiddleware
-  AuthMiddleware --> AuthRoutes
-  AuthMiddleware --> UserContext
-  UserContext --> Repositories
-  UserContext --> Files
-  UserContext --> Memory
-```
-
-### 用户表
-
-用户记录定义在 `app.gateway.auth.models.User`，持久化到 `users` 表。关键字段：
-
-| 字段 | 语义 |
-|---|---|
-| `id` | 用户主键，JWT `sub` 使用该值 |
-| `email` | 唯一登录名 |
-| `password_hash` | bcrypt hash，OAuth 用户可为空 |
-| `system_role` | `admin` 或 `user` |
-| `needs_setup` | reset 后要求用户完成邮箱 / 密码设置 |
-| `token_version` | 改密码或 reset 时递增，用于废弃旧 JWT |
-
-### 运行时身份
-
-认证成功后，`AuthMiddleware` 把用户同时写入：
-
- `request.state.user`
- `request.state.auth`
- `deerflow.runtime.user_context` 的 `ContextVar`
-
-`ContextVar` 是这里的核心边界。上层 Gateway 负责写入身份，下层 persistence / file path 只读取结构化的当前用户，不反向依赖 `app.gateway.auth` 具体类型。
-
-可以把 repository 调用的用户参数理解成一个三态 ADT：
-
-```scala
-enum UserScope:
-  case AutoFromContext
-  case Explicit(userId: String)
-  case BypassForMigration
-```
-
-对应 Python 实现是 `AUTO | str | None`：
-
- `AUTO`：从 `ContextVar` 解析当前用户；没有上下文则抛错。
- `str`：显式指定用户，主要用于测试或管理脚本。
- `None`：跳过用户过滤，只允许迁移脚本或 admin CLI 使用。
-
-## 登录与初始化流程
-
-### 首次初始化
-
-首次启动时，如果没有 admin，服务不会自动创建账号，只记录日志提示访问 `/setup`。
-
-流程：
-
-1. 用户访问 `/setup`。
-2. 前端调用 `GET /api/v1/auth/setup-status`。
-3. 如果返回 `{"needs_setup": true}`，前端展示创建 admin 表单。
-4. 表单提交 `POST /api/v1/auth/initialize`。
-5. 服务端确认当前没有 admin，创建 `system_role="admin"`、`needs_setup=false` 的用户。
-6. 服务端设置 `access_token` HttpOnly cookie，用户进入 workspace。
-
-`/api/v1/auth/initialize` 只在没有 admin 时可用。并发初始化由数据库唯一约束兜底，失败方返回 409。
-
-### 普通登录
-
-`POST /api/v1/auth/login/local` 使用 `OAuth2PasswordRequestForm`：
-
- `username` 是邮箱。
- `password` 是密码。
- 成功后签发 JWT，放入 `access_token` HttpOnly cookie。
- 响应体只返回 `expires_in` 和 `needs_setup`，不返回 token。
-
-登录失败会按客户端 IP 计数。IP 解析只在 TCP peer 属于 `AUTH_TRUSTED_PROXIES` 时信任 `X-Real-IP`，不使用 `X-Forwarded-For`。
-
-### 注册
-
-`POST /api/v1/auth/register` 创建普通 `user`，并自动登录。
-
-当前实现允许在没有 admin 时注册普通用户，但 `setup-status` 仍会返回 `needs_setup=true`，因为 admin 仍不存在。这是当前产品策略边界：如果后续要求“必须先初始化 admin 才能注册普通用户”，需要在 `/register` 增加 admin-exists gate。
-
-### 改密码与 reset setup
-
-`POST /api/v1/auth/change-password` 需要当前密码和新密码：
-
- 校验当前密码。
- 更新 bcrypt hash。
- `token_version += 1`，使旧 JWT 立即失效。
- 重新签发 cookie。
- 如果 `needs_setup=true` 且传了 `new_email`，则更新邮箱并清除 `needs_setup`。
-
-`python -m app.gateway.auth.reset_admin` 会：
-
- 找到 admin 或指定邮箱用户。
- 生成随机密码。
- 更新密码 hash。
- `token_version += 1`。
- 设置 `needs_setup=true`。
- 写入 `.deer-flow/admin_initial_credentials.txt`，权限 `0600`。
-
-命令行只输出凭据文件路径，不输出明文密码。
-
-## HTTP 认证边界
-
-`AuthMiddleware` 是 fail-closed（默认拒绝）的全局认证门。
-
-公开路径：
-
- `/health`
- `/docs`
- `/redoc`
- `/openapi.json`
- `/api/v1/auth/login/local`
- `/api/v1/auth/register`
- `/api/v1/auth/logout`
- `/api/v1/auth/setup-status`
- `/api/v1/auth/initialize`
-
-其余路径都要求有效 `access_token` cookie。存在 cookie 但 JWT 无效、过期、用户不存在或 `token_version` 不匹配时，直接返回 401，而不是让请求穿透到业务路由。
-
-路由级别的 owner check 由 `require_permission(..., owner_check=True)` 完成：
-
- 读类请求允许旧的未追踪 legacy thread 兼容读取。
- 写 / 删除类请求使用 `require_existing=True`，要求 thread row 存在且属于当前用户，避免删除后缺 row 导致其他用户误通过。
-
-## CSRF 设计
-
-DeerFlow 使用 Double Submit Cookie：
-
- 服务端设置 `csrf_token` cookie。
- 前端 state-changing 请求发送同值 `X-CSRF-Token` header。
- 服务端用 `secrets.compare_digest` 比较 cookie/header。
-
-需要 CSRF 的方法：
-
- `POST`
- `PUT`
- `DELETE`
- `PATCH`
-
-auth bootstrap 端点（login/register/initialize/logout）不要求 double-submit token，因为首次调用时浏览器还没有 token；但这些端点会校验 browser `Origin`，拒绝 hostile Origin，避免 login CSRF / session fixation。
-
-## 用户隔离
-
-### Thread metadata
-
-Thread metadata 存在 `threads_meta`，关键隔离字段是 `user_id`。
-
-创建 thread 时：
-
- 客户端传入的 `metadata.user_id` 和 `metadata.owner_id` 会被剥离。
- `ThreadMetaRepository.create(..., user_id=AUTO)` 从 `ContextVar` 解析真实用户。
- `/api/threads/search` 默认只返回当前用户的 thread。
-
-读取 / 修改 / 删除时：
-
- `get()` 默认按当前用户过滤。
- `check_access()` 用于路由 owner check。
- 对其他用户的 thread 返回 404，避免泄露资源存在性。
-
-### 文件系统
-
-当前线程文件布局：
-
-```text
-{base_dir}/users/{user_id}/threads/{thread_id}/user-data/
-├── workspace/
-├── uploads/
-└── outputs/
-```
-
-agent 在 sandbox 内看到统一虚拟路径：
-
-```text
-/mnt/user-data/workspace
-/mnt/user-data/uploads
-/mnt/user-data/outputs
-```
-
-`ThreadDataMiddleware` 使用 `get_effective_user_id()` 解析当前用户并生成线程路径。没有认证上下文时会落到 `default` 用户桶，主要用于内部调用、嵌入式 client 或无 HTTP 的本地执行路径。
-
-### Memory
-
-默认 memory 存储：
-
-```text
-{base_dir}/users/{user_id}/memory.json
-{base_dir}/users/{user_id}/agents/{agent_name}/memory.json
-```
-
-有用户上下文时，空或相对 `memory.storage_path` 都使用上述 per-user 默认路径；只有绝对 `memory.storage_path` 会视为显式 opt-out（退出） per-user isolation，所有用户共享该路径。无用户上下文的 legacy 路径仍会把相对 `storage_path` 解析到 `Paths.base_dir` 下。
-
-### 自定义 agent
-
-用户自定义 agent 写入：
-
-```text
-{base_dir}/users/{user_id}/agents/{agent_name}/
-├── config.yaml
-├── SOUL.md
-└── memory.json
-```
-
-旧布局 `{base_dir}/agents/{agent_name}/` 只作为只读兼容回退。更新或删除旧共享 agent 会要求先运行迁移脚本。
-
-## 内部调用与 IM 渠道
-
-IM channel worker 不是浏览器用户，不持有浏览器 cookie。它们通过 Gateway 内部认证：
-
- 请求带 `X-DeerFlow-Internal-Token`。
- 同时带匹配的 CSRF cookie/header。
- 服务端识别为内部用户，`id="default"`、`system_role="internal"`。
-
-这意味着 channel 产生的数据默认进入 `default` 用户桶。这个选择适合“平台级 bot 身份”，但不是“每个 IM 用户单独隔离”。如果后续要做到外部 IM 用户隔离，需要把外部 platform user 映射到 DeerFlow user，并让 channel manager 设置对应的 scoped identity。
-
-## LangGraph-compatible 认证
-
-Gateway 内嵌 runtime 路径由 `AuthMiddleware` 和 `CSRFMiddleware` 保护。
-
-仓库仍保留 `app.gateway.langgraph_auth`，用于 LangGraph Server 直连模式：
-
- `@auth.authenticate` 校验 JWT cookie、CSRF、用户存在性和 `token_version`。
- `@auth.on` 在写入 metadata 时注入 `user_id`，并在读路径返回 `{"user_id": current_user}` 过滤条件。
-
-这保证 Gateway 路由和 LangGraph-compatible 直连模式使用同一 JWT 语义。
-
-## 升级与迁移
-
-从无认证版本升级时，可能存在没有 `user_id` 的历史 thread。
-
-当前策略：
-
-1. 首次启动如果没有 admin，只提示访问 `/setup`，不迁移。
-2. 操作者创建 admin。
-3. 后续启动时，`_ensure_admin_user()` 找到 admin，并把 LangGraph store 中缺少 `metadata.user_id` 的 thread 迁移到 admin。
-
-文件系统旧布局迁移由脚本处理：
-
-```bash
-cd backend
-PYTHONPATH=. python scripts/migrate_user_isolation.py --dry-run
-PYTHONPATH=. python scripts/migrate_user_isolation.py --user-id <target-user-id>
-```
-
-迁移脚本覆盖 legacy `memory.json`、`threads/` 和 `agents/` 到 per-user layout。
-
-## 安全不变量
-
-必须长期保持的不变量：
-
- JWT 只在 HttpOnly cookie 中传输，不出现在响应 JSON。
- 任何非 public HTTP 路由都不能只靠“cookie 存在”放行，必须严格验证 JWT。
- `token_version` 不匹配必须拒绝，保证改密码 / reset 后旧 session 失效。
- 客户端 metadata 中的 `user_id` / `owner_id` 必须剥离。
- repository 默认 `AUTO` 必须从当前用户上下文解析，不能静默退化成全局查询。
- 只有迁移脚本和 admin CLI 可以显式传 `user_id=None` 绕过隔离。
- 本地文件路径必须通过 `Paths` 和 sandbox path validation 解析，不能拼接未校验的用户输入。
- 捕获认证、迁移、后台任务异常必须记录日志；不能空 catch。
-
-## 已知边界
-
-| 边界 | 当前行为 | 后续方向 |
-|---|---|---|
-| 无 admin 时注册普通用户 | 允许注册普通 `user` | 如产品要求先初始化 admin，给 `/register` 加 gate |
-| 登录限速 | 进程内 dict，单 worker 精确，多 worker 近似 | Redis / DB-backed rate limiter |
-| OAuth | 端点占位，未实现 | 接入 provider 并统一 `token_version` / role 语义 |
-| IM 用户隔离 | channel 使用 `default` 内部用户 | 建立外部用户到 DeerFlow user 的映射 |
-| 绝对 memory path | 显式共享 memory | UI / docs 明确提示 opt-out 风险 |
-
-## 相关文件
-
-| 文件 | 职责 |
-|---|---|
-| `app/gateway/auth_middleware.py` | 全局认证门、JWT 严格验证、写入 user context |
-| `app/gateway/csrf_middleware.py` | CSRF double-submit 和 auth Origin 校验 |
-| `app/gateway/routers/auth.py` | initialize/login/register/logout/me/change-password |
-| `app/gateway/auth/jwt.py` | JWT 创建与解析 |
-| `app/gateway/auth/reset_admin.py` | 密码 reset CLI |
-| `app/gateway/auth/credential_file.py` | 0600 凭据文件写入 |
-| `app/gateway/authz.py` | 路由权限与 owner check |
-| `deerflow/runtime/user_context.py` | 当前用户 ContextVar 与 `AUTO` sentinel |
-| `deerflow/persistence/thread_meta/` | thread metadata owner filter |
-| `deerflow/config/paths.py` | per-user filesystem layout |
-| `deerflow/agents/middlewares/thread_data_middleware.py` | run 时解析用户线程目录 |
-| `deerflow/agents/memory/storage.py` | per-user memory storage |
-| `deerflow/config/agents_config.py` | per-user custom agents |
-| `app/channels/manager.py` | IM channel 内部认证调用 |
-| `scripts/migrate_user_isolation.py` | legacy 数据迁移到 per-user layout |
-| `.deer-flow/data/deerflow.db` | 统一 SQLite 数据库，包含 users / threads_meta / runs / feedback 等表 |
-| `.deer-flow/users/{user_id}/agents/{agent_name}/` | 用户自定义 agent 配置、SOUL 和 agent memory |
-| `.deer-flow/admin_initial_credentials.txt` | `reset_admin` 生成的新凭据文件（0600，读完应删除） |
@@ -24,11 +24,11 @@ All other test plan sections were executed against either:

 | Case | Title | What it covers | Why not run |
 |---|---|---|---|
-| TC-DOCKER-01 | `deerflow.db` volume persistence | Verify the `DEER_FLOW_HOME` bind mount survives container restart | needs `docker compose up` |
+| TC-DOCKER-01 | `users.db` volume persistence | Verify the `DEER_FLOW_HOME` bind mount survives container restart | needs `docker compose up` |
 | TC-DOCKER-02 | Session persistence across container restart | `AUTH_JWT_SECRET` env var keeps cookies valid after `docker compose down && up` | needs `docker compose down/up` |
 | TC-DOCKER-03 | Per-worker rate limiter divergence | Confirms in-process `_login_attempts` dict doesn't share state across `gunicorn` workers (4 by default in the compose file); known limitation, documented | needs multi-worker container |
-| TC-DOCKER-04 | IM channels use internal Gateway auth | Verify Feishu/Slack/Telegram dispatchers attach the process-local internal auth header plus CSRF cookie/header when calling Gateway-compatible LangGraph APIs | needs `docker logs` |
-| TC-DOCKER-05 | Reset credentials surfacing | `reset_admin` writes a 0600 credential file in `DEER_FLOW_HOME` instead of logging plaintext. The file-based behavior is validated by non-Docker reset tests, so the only Docker-specific gap is verifying the volume mount carries the file out to the host | needs container + host volume |
+| TC-DOCKER-04 | IM channels skip AuthMiddleware | Verify Feishu/Slack/Telegram dispatchers run in-container against `http://langgraph:2024` without going through nginx | needs `docker logs` |
+| TC-DOCKER-05 | Admin credentials surfacing | **Updated post-simplify** — was "log scrape", now "0600 credential file in `DEER_FLOW_HOME`". The file-based behavior is already validated by TC-1.1 + TC-UPG-13 on sg_dev (non-Docker), so the only Docker-specific gap is verifying the volume mount carries the file out to the host | needs container + host volume |
 | TC-DOCKER-06 | Gateway-mode Docker deploy | `./scripts/deploy.sh --gateway` produces a 3-container topology (no `langgraph` container); same auth flow as standard mode | needs `docker compose --profile gateway` |

 ## Coverage already provided by non-Docker tests
@@ -41,8 +41,8 @@ the test cases that ran on sg_dev or local:
 | TC-DOCKER-01 (volume persistence) | TC-REENT-01 on sg_dev (admin row survives gateway restart) — same SQLite file, just no container layer between |
 | TC-DOCKER-02 (session persistence) | TC-API-02/03/06 (cookie roundtrip), plus TC-REENT-04 (multi-cookie) — JWT verification is process-state-free, container restart is equivalent to `pkill uvicorn && uv run uvicorn` |
 | TC-DOCKER-03 (per-worker rate limit) | TC-GW-04 + TC-REENT-09 (single-worker rate limit + 5min expiry). The cross-worker divergence is an architectural property of the in-memory dict; no auth code path differs |
-| TC-DOCKER-04 (IM channels use internal auth) | Code-level: `app/channels/manager.py` creates the `langgraph_sdk` client with `create_internal_auth_headers()` plus CSRF cookie/header, so channel workers do not rely on browser cookies |
-| TC-DOCKER-05 (credential surfacing) | `reset_admin` writes `.deer-flow/admin_initial_credentials.txt` with mode 0600 and logs only the path — the only Docker-unique step is whether the bind mount projects this path onto the host, which is a `docker compose` config check, not a runtime behavior change |
+| TC-DOCKER-04 (IM channels skip auth) | Code-level only: `app/channels/manager.py` uses `langgraph_sdk` directly with no cookie handling. The langgraph_auth handler is bypassed by going through SDK, not HTTP |
+| TC-DOCKER-05 (credential surfacing) | TC-1.1 on sg_dev (file at `~/deer-flow/backend/.deer-flow/admin_initial_credentials.txt`, mode 0600, password 22 chars) — the only Docker-unique step is whether the bind mount projects this path onto the host, which is a `docker compose` config check, not a runtime behavior change |
 | TC-DOCKER-06 (gateway-mode container) | Section 七 7.2 covered by TC-GW-01..05 + Section 二 (gateway-mode auth flow on sg_dev) — same Gateway code, container is just a packaging change |

 ## Reproduction steps when Docker becomes available
@@ -72,6 +72,6 @@ Then run TC-DOCKER-01..06 from the test plan as written.
  about *container packaging* details (bind mounts, multi-worker, log
  collection), not about whether the auth code paths work.
 - **TC-DOCKER-05 was updated in place** in `AUTH_TEST_PLAN.md` to reflect
-  the current reset flow (`reset_admin` → 0600 credentials file, no log leak).
+  the post-simplify reality (credentials file → 0600 file, no log leak).
  The old "grep 'Password:' in docker logs" expectation would have failed
  silently and given a false sense of coverage.
@@ -19,7 +19,7 @@

 ```bash
 # 清除已有数据
-rm -f backend/.deer-flow/data/deerflow.db
+rm -f backend/.deer-flow/users.db

 # 选择模式启动
 make dev          # 标准模式
@@ -28,11 +28,10 @@ make dev-pro      # Gateway 模式
 ```

 **验证点：**
- [ ] 控制台不输出 admin 邮箱或明文密码
- [ ] 控制台提示 `First boot detected — no admin account exists.`
- [ ] 控制台提示访问 `/setup` 完成 admin 创建
- [ ] `GET /api/v1/auth/setup-status` 返回 `{"needs_setup": true}`
- [ ] 前端访问 `/login` 会跳转 `/setup`
+- [ ] 控制台输出 admin 邮箱和随机密码
+- [ ] 密码格式为 `secrets.token_urlsafe(16)` 的 22 字符字符串
+- [ ] 邮箱为 `admin@deerflow.dev`
+- [ ] 提示 `Change it after login: Settings -> Account`

 ### 1.2 非首次启动

@@ -43,8 +42,7 @@ make dev

 **验证点：**
 - [ ] 控制台不输出密码
- [ ] `GET /api/v1/auth/setup-status` 返回 `{"needs_setup": false}`
- [ ] 已登录用户如果 `needs_setup=True`，访问 workspace 会被引导到 `/setup` 完成改邮箱 / 改密码流程
+- [ ] 如果 admin 仍 `needs_setup=True`，控制台有 warning 提示

 ### 1.3 环境变量配置

@@ -78,22 +76,19 @@ make dev
 curl -s $BASE/api/v1/auth/setup-status | jq .
 ```

-**预期：**
- 干净数据库且尚未初始化 admin：返回 `{"needs_setup": true}`
- 已存在 admin：返回 `{"needs_setup": false}`
+**预期：** 返回 `{"needs_setup": false}`（admin 在启动时已自动创建，`count_users() > 0`）。仅在启动完成前的极短窗口内可能返回 `true`。

-#### TC-API-02: 首次初始化 Admin
+#### TC-API-02: Admin 首次登录

 ```bash
-curl -s -X POST $BASE/api/v1/auth/initialize \
-  -H "Content-Type: application/json" \
-  -d '{"email":"admin@example.com","password":"AdminPass1!"}' \
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@deerflow.dev&password=<控制台密码>" \
  -c cookies.txt | jq .
 ```

 **预期：**
- 状态码 201
- Body: `{"id": "...", "email": "admin@example.com", "system_role": "admin", "needs_setup": false}`
+- 状态码 200
+- Body: `{"expires_in": 604800, "needs_setup": true}`
 - `cookies.txt` 包含 `access_token`（HttpOnly）和 `csrf_token`（非 HttpOnly）

 #### TC-API-03: 获取当前用户
@@ -102,9 +97,9 @@ curl -s -X POST $BASE/api/v1/auth/initialize \
 curl -s $BASE/api/v1/auth/me -b cookies.txt | jq .
 ```

-**预期：** `{"id": "...", "email": "admin@example.com", "system_role": "admin", "needs_setup": false}`
+**预期：** `{"id": "...", "email": "admin@deerflow.dev", "system_role": "admin", "needs_setup": true}`

-#### TC-API-04: 改密码流程
+#### TC-API-04: Setup 流程（改邮箱 + 改密码）

 ```bash
 CSRF=$(grep csrf_token cookies.txt | awk '{print $NF}')
@@ -112,36 +107,13 @@ curl -s -X POST $BASE/api/v1/auth/change-password \
  -b cookies.txt \
  -H "Content-Type: application/json" \
  -H "X-CSRF-Token: $CSRF" \
-  -d '{"current_password":"AdminPass1!","new_password":"NewPass123!"}' | jq .
+  -d '{"current_password":"<控制台密码>","new_password":"NewPass123!","new_email":"admin@example.com"}' | jq .
 ```

 **预期：**
 - 状态码 200
 - `{"message": "Password changed successfully"}`
- 再调 `/auth/me` 仍为 `admin@example.com`，`needs_setup` 仍为 `false`
-
-#### TC-API-04a: reset_admin 后的 Setup 流程（改邮箱 + 改密码）
-
-```bash
-cd backend
-python -m app.gateway.auth.reset_admin --email admin@example.com
-# 从 .deer-flow/admin_initial_credentials.txt 读取 reset 后密码
-
-curl -s -X POST $BASE/api/v1/auth/login/local \
-  -d "username=admin@example.com&password=<凭据文件密码>" \
-  -c cookies.txt | jq .
-
-CSRF=$(grep csrf_token cookies.txt | awk '{print $NF}')
-curl -s -X POST $BASE/api/v1/auth/change-password \
-  -b cookies.txt \
-  -H "Content-Type: application/json" \
-  -H "X-CSRF-Token: $CSRF" \
-  -d '{"current_password":"<凭据文件密码>","new_password":"AdminPass2!","new_email":"admin2@example.com"}' | jq .
-```
-
-**预期：**
- 登录返回 `{"expires_in": 604800, "needs_setup": true}`
- `change-password` 后 `/auth/me` 邮箱变为 `admin2@example.com`，`needs_setup` 变为 `false`
+- 再调 `/auth/me` 邮箱变为 `admin@example.com`，`needs_setup` 变为 `false`

 #### TC-API-05: 普通用户注册

@@ -521,7 +493,7 @@ curl -s -X POST $BASE/api/v1/auth/register \

 ```bash
 # 检查数据库
-sqlite3 backend/.deer-flow/data/deerflow.db "SELECT email, password_hash FROM users LIMIT 3;"
+sqlite3 backend/.deer-flow/users.db "SELECT email, password_hash FROM users LIMIT 3;"
 ```

 **预期：** `password_hash` 以 `$2b$` 开头（bcrypt 格式）
@@ -534,25 +506,24 @@ sqlite3 backend/.deer-flow/data/deerflow.db "SELECT email, password_hash FROM us

 ### 4.1 首次登录流程

-#### TC-UI-01: 无 admin 时访问 workspace 跳转 setup
+#### TC-UI-01: 访问首页跳转登录

 1. 打开 `http://localhost:2026/workspace`
-2. **预期：** 自动跳转到 `/setup`
+2. **预期：** 自动跳转到 `/login`

-#### TC-UI-02: Setup 页面创建 admin
+#### TC-UI-02: Login 页面

-1. 输入 admin 邮箱、密码、确认密码
-2. 点击 Create Admin Account
+1. 输入 admin 邮箱和控制台密码
+2. 点击 Login
+3. **预期：** 跳转到 `/setup`（因为 `needs_setup=true`）
+
+#### TC-UI-03: Setup 页面
+
+1. 输入新邮箱、控制台密码（current）、新密码、确认密码
+2. 点击 Complete Setup
 3. **预期：** 跳转到 `/workspace`
 4. 刷新页面不跳回 `/setup`

-#### TC-UI-03: 已初始化后 Login 页面
-
-1. 退出登录后访问 `/login`
-2. 输入 admin 邮箱和密码
-3. 点击 Login
-4. **预期：** 跳转到 `/workspace`
-
 #### TC-UI-04: Setup 密码不匹配

 1. 新密码和确认密码不一致
@@ -631,7 +602,7 @@ sqlite3 backend/.deer-flow/data/deerflow.db "SELECT email, password_hash FROM us
 #### TC-UI-15: reset_admin 后重新登录

 1. 执行 `cd backend && python -m app.gateway.auth.reset_admin`
-2. 从 `.deer-flow/admin_initial_credentials.txt` 读取新密码并登录
+2. 使用新密码登录
 3. **预期：** 跳转到 `/setup` 页面（`needs_setup` 被重置为 true）
 4. 旧 session 已失效

@@ -674,28 +645,18 @@ make install
 make dev
 ```

-#### TC-UPG-01: 首次启动等待 admin 初始化
+#### TC-UPG-01: 首次启动创建 admin

 **预期：**
- [ ] 控制台不输出 admin 邮箱或随机密码
- [ ] 访问 `/setup` 可创建第一个 admin
+- [ ] 控制台输出 admin 邮箱（`admin@deerflow.dev`）和随机密码
 - [ ] 无报错，正常启动

 #### TC-UPG-02: 旧 Thread 迁移到 admin

 ```bash
-# 创建第一个 admin
-curl -s -X POST http://localhost:2026/api/v1/auth/initialize \
-  -H "Content-Type: application/json" \
-  -d '{"email":"admin@example.com","password":"AdminPass1!"}' \
-  -c cookies.txt
-
-# 重启一次：启动迁移只在已有 admin 的启动路径执行
-make stop && make dev
-
 # 登录 admin
 curl -s -X POST http://localhost:2026/api/v1/auth/login/local \
-  -d "username=admin@example.com&password=AdminPass1!" \
+  -d "username=admin@deerflow.dev&password=<控制台密码>" \
  -c cookies.txt

 # 查看 thread 列表
@@ -709,8 +670,8 @@ curl -s -X POST http://localhost:2026/api/threads/search \

 **预期：**
 - [ ] 返回的 thread 数量 ≥ 旧版创建的数量
- [ ] 控制台日志有 `Migrated N orphan LangGraph thread(s) to admin`
- [ ] 旧 thread 只对 admin 可见
+- [ ] 控制台日志有 `Migrated N orphaned thread(s) to admin`
+- [ ] 每个 thread 的 `metadata.owner_id` 都已被设为 admin 的 ID

 #### TC-UPG-03: 旧 Thread 内容完整

@@ -722,7 +683,7 @@ curl -s http://localhost:2026/api/threads/<old-thread-id> \

 **预期：**
 - [ ] `metadata.title` 保留原值（如 `old-thread-1`）
- [ ] 响应不回显服务端保留的 `user_id` / `owner_id`
+- [ ] `metadata.owner_id` 已填充

 #### TC-UPG-04: 新用户看不到旧 Thread

@@ -745,19 +706,18 @@ curl -s -X POST http://localhost:2026/api/threads/search \

 ### 5.3 数据库 Schema 兼容

-#### TC-UPG-05: 无 deerflow.db 时创建 schema 但不创建默认用户
+#### TC-UPG-05: 无 users.db 时自动创建

 ```bash
-ls -la backend/.deer-flow/data/deerflow.db
-sqlite3 backend/.deer-flow/data/deerflow.db "SELECT COUNT(*) FROM users;"
+ls -la backend/.deer-flow/users.db
 ```

-**预期：** 文件存在，`sqlite3` 可查到 `users` 表含 `needs_setup`、`token_version` 列；未调用 `/initialize` 前用户数为 0
+**预期：** 文件存在，`sqlite3` 可查到 `users` 表含 `needs_setup`、`token_version` 列

-#### TC-UPG-06: deerflow.db WAL 模式
+#### TC-UPG-06: users.db WAL 模式

 ```bash
-sqlite3 backend/.deer-flow/data/deerflow.db "PRAGMA journal_mode;"
+sqlite3 backend/.deer-flow/users.db "PRAGMA journal_mode;"
 ```

 **预期：** 返回 `wal`
@@ -808,9 +768,9 @@ make dev
 ```

 **预期：**
- [ ] 服务正常启动（忽略 `deerflow.db`，无 auth 相关代码不报错）
+- [ ] 服务正常启动（忽略 `users.db`，无 auth 相关代码不报错）
 - [ ] 旧对话数据仍然可访问
- [ ] `deerflow.db` 文件残留但不影响运行
+- [ ] `users.db` 文件残留但不影响运行

 #### TC-UPG-12: 再次升级到 auth 分支

@@ -821,47 +781,51 @@ make dev
 ```

 **预期：**
- [ ] 识别已有 `deerflow.db`，不重新创建 admin
- [ ] 旧的 admin 账号仍可登录（如果回退期间未删 `deerflow.db`）
+- [ ] 识别已有 `users.db`，不重新创建 admin
+- [ ] 旧的 admin 账号仍可登录（如果回退期间未删 `users.db`）

-### 5.7 Admin 初始化与 reset_admin
+### 5.7 休眠 Admin（初始密码未使用/未更改）

-> 首次启动不生成默认 admin，也不在日志输出密码。忘记密码时走 `reset_admin`，新密码写入 0600 凭据文件。
+> 首次启动生成 admin + 随机密码，但运维未登录、未改密码。
+> 密码只在首次启动的控制台闪过一次，后续启动不再显示。

-#### TC-UPG-13: 未初始化 admin 时重启不创建默认账号
+#### TC-UPG-13: 重启后自动重置密码并打印

 ```bash
-rm -f backend/.deer-flow/data/deerflow.db
+# 首次启动，记录密码
+rm -f backend/.deer-flow/users.db
 make dev
+# 控制台输出密码 P0，不登录
 make stop

+# 隔了几天，再次启动
 make dev
-curl -s $BASE/api/v1/auth/setup-status | jq .
+# 控制台输出新密码 P1
 ```

 **预期：**
- [ ] 控制台不输出密码
- [ ] `setup-status` 仍为 `{"needs_setup": true}`
- [ ] 访问 `/setup` 仍可创建第一个 admin
+- [ ] 控制台输出 `Admin account setup incomplete — password reset`
+- [ ] 输出新密码 P1（P0 已失效）
+- [ ] 用 P1 可以登录，P0 不可以
+- [ ] 登录后 `needs_setup=true`，跳转 `/setup`
+- [ ] `token_version` 递增（旧 session 如有也失效）

-#### TC-UPG-14: 密码丢失 — reset_admin 写入凭据文件
+#### TC-UPG-14: 密码丢失 — 无需 CLI，重启即可

 ```bash
-python -m app.gateway.auth.reset_admin --email admin@example.com
-ls -la backend/.deer-flow/admin_initial_credentials.txt
-cat backend/.deer-flow/admin_initial_credentials.txt
+# 忘记了控制台密码 → 直接重启服务
+make stop && make dev
+# 控制台自动输出新密码
 ```

 **预期：**
- [ ] 命令行只输出凭据文件路径，不输出明文密码
- [ ] 凭据文件权限为 `0600`
- [ ] 凭据文件包含 email + password 行
- [ ] 该用户下次登录返回 `needs_setup=true`
+- [ ] 无需 `reset_admin`，重启服务即可拿到新密码
+- [ ] `reset_admin` CLI 仍然可用作手动备选方案

-#### TC-UPG-15: 未初始化 admin 期间普通用户注册策略边界
+#### TC-UPG-15: 休眠 admin 期间普通用户注册

 ```bash
-# admin 尚不存在，普通用户尝试注册
+# admin 存在但从未登录，普通用户先注册
 curl -s -X POST $BASE/api/v1/auth/register \
  -H "Content-Type: application/json" \
  -d '{"email":"earlybird@example.com","password":"EarlyPass1!"}' \
@@ -869,11 +833,11 @@ curl -s -X POST $BASE/api/v1/auth/register \
 ```

 **预期：**
- [ ] 当前代码允许注册普通用户并自动登录（201，角色为 `user`）
- [ ] 但 `setup-status` 仍为 `{"needs_setup": true}`，因为 admin 仍不存在
- [ ] 这是一个产品策略边界：若要求“必须先有 admin”，需要在 `/register` 增加 admin-exists gate
+- [ ] 注册成功（201），角色为 `user`
+- [ ] 无法提权为 admin
+- [ ] 普通用户的数据与 admin 隔离

-#### TC-UPG-16: 普通用户数据与后续 admin 隔离
+#### TC-UPG-16: 休眠 admin 不影响后续操作

 ```bash
 # 普通用户正常创建 thread、发消息
@@ -885,13 +849,14 @@ curl -s -X POST $BASE/api/threads \
  -d '{"metadata":{}}' | jq .thread_id
 ```

-**预期：** 普通用户正常创建 thread；后续 admin 创建后，搜索不到该普通用户 thread
+**预期：** 正常创建，不受休眠 admin 影响

-#### TC-UPG-17: reset_admin 后完成 Setup
+#### TC-UPG-17: 休眠 admin 最终完成 Setup

 ```bash
+# 运维终于登录
 curl -s -X POST $BASE/api/v1/auth/login/local \
-  -d "username=admin@example.com&password=<凭据文件密码>" \
+  -d "username=admin@deerflow.dev&password=<P0或P1>" \
  -c admin.txt | jq .needs_setup
 # 预期: true

@@ -901,7 +866,7 @@ curl -s -X POST $BASE/api/v1/auth/change-password \
  -b admin.txt \
  -H "Content-Type: application/json" \
  -H "X-CSRF-Token: $CSRF" \
-  -d '{"current_password":"<凭据文件密码>","new_password":"AdminFinal1!","new_email":"admin@real.com"}' \
+  -d '{"current_password":"<密码>","new_password":"AdminFinal1!","new_email":"admin@real.com"}' \
  -c admin.txt

 # 验证
@@ -911,7 +876,7 @@ curl -s $BASE/api/v1/auth/me -b admin.txt | jq '{email, needs_setup}'
 **预期：**
 - [ ] `email` 变为 `admin@real.com`
 - [ ] `needs_setup` 变为 `false`
- [ ] 后续登录使用新密码
+- [ ] 后续重启控制台不再有 warning

 #### TC-UPG-18: 长期未用后 JWT 密钥轮换

@@ -925,8 +890,8 @@ make stop && make dev

 **预期：**
 - [ ] 服务正常启动
- [ ] 账号密码仍可登录（密码存在 DB，与 JWT 密钥无关）
- [ ] 旧的 JWT token 失效（密钥变了签名不匹配）
+- [ ] 旧密码仍可登录（密码存在 DB，与 JWT 密钥无关）
+- [ ] 旧的 JWT token 失效（密钥变了签名不匹配）— 但因为从未登录过也没有旧 token

 ---

@@ -945,7 +910,7 @@ for i in 1 2 3; do
 done

 # 检查 admin 数量
-sqlite3 backend/.deer-flow/data/deerflow.db \
+sqlite3 backend/.deer-flow/users.db \
  "SELECT COUNT(*) FROM users WHERE system_role='admin';"
 ```

@@ -1090,7 +1055,7 @@ curl -s -X POST $BASE/api/v1/auth/register \
 wait

 # 检查用户数
-sqlite3 backend/.deer-flow/data/deerflow.db \
+sqlite3 backend/.deer-flow/users.db \
  "SELECT COUNT(*) FROM users WHERE email='race@example.com';"
 ```

@@ -1200,16 +1165,13 @@ curl -s -w "%{http_code}" -X DELETE "$BASE/api/threads/$TID" \
 ```bash
 cd backend
 python -m app.gateway.auth.reset_admin
-cp .deer-flow/admin_initial_credentials.txt /tmp/deerflow-reset-p1.txt
-P1=$(awk -F': ' '/^password:/ {print $2}' /tmp/deerflow-reset-p1.txt)
+# 记录密码 P1

 python -m app.gateway.auth.reset_admin
-cp .deer-flow/admin_initial_credentials.txt /tmp/deerflow-reset-p2.txt
-P2=$(awk -F': ' '/^password:/ {print $2}' /tmp/deerflow-reset-p2.txt)
+# 记录密码 P2
 ```

 **预期：**
- [ ] `.deer-flow/admin_initial_credentials.txt` 每次都会被重写，文件权限为 `0600`
 - [ ] P1 ≠ P2（每次生成新随机密码）
 - [ ] P1 不可用，只有 P2 有效
 - [ ] `token_version` 递增了 2
@@ -1362,8 +1324,7 @@ done
 ```bash
 GW=http://localhost:8001

-for path in /health /api/v1/auth/setup-status /api/v1/auth/login/local \
-            /api/v1/auth/register /api/v1/auth/initialize /api/v1/auth/logout; do
+for path in /health /api/v1/auth/setup-status /api/v1/auth/login/local /api/v1/auth/register; do
  echo "$path: $(curl -s -w '%{http_code}' -o /dev/null $GW$path)"
 done
 # 预期: 200 或 405/422（方法不对但不是 401）
@@ -1438,9 +1399,9 @@ done
 >
 > 前置条件：
 > - `.env` 中设置 `AUTH_JWT_SECRET`（否则每次容器重启 session 全部失效）
-> - `DEER_FLOW_HOME` 挂载到宿主机目录（持久化 `deerflow.db`）
+> - `DEER_FLOW_HOME` 挂载到宿主机目录（持久化 `users.db`）

-#### TC-DOCKER-01: deerflow.db 通过 volume 持久化
+#### TC-DOCKER-01: users.db 通过 volume 持久化

 ```bash
 # 启动容器
@@ -1455,13 +1416,13 @@ curl -s -X POST $BASE/api/v1/auth/register \
  -H "Content-Type: application/json" \
  -d '{"email":"docker-test@example.com","password":"DockerTest1!"}' -w "\nHTTP %{http_code}"

-# 检查宿主机上的 deerflow.db
-ls -la ${DEER_FLOW_HOME:-backend/.deer-flow}/data/deerflow.db
-sqlite3 ${DEER_FLOW_HOME:-backend/.deer-flow}/data/deerflow.db \
+# 检查宿主机上的 users.db
+ls -la ${DEER_FLOW_HOME:-backend/.deer-flow}/users.db
+sqlite3 ${DEER_FLOW_HOME:-backend/.deer-flow}/users.db \
  "SELECT email FROM users WHERE email='docker-test@example.com';"
 ```

-**预期：** deerflow.db 在宿主机 `DEER_FLOW_HOME` 目录中，查询可见刚注册的用户。
+**预期：** users.db 在宿主机 `DEER_FLOW_HOME` 目录中，查询可见刚注册的用户。

 #### TC-DOCKER-02: 重启容器后 session 保持

@@ -1505,24 +1466,22 @@ done

 **已知限制：** In-process rate limiter 不跨 worker 共享。生产环境如需精确限速，需要 Redis 等外部存储。

-#### TC-DOCKER-04: IM 渠道使用内部认证
+#### TC-DOCKER-04: IM 渠道不经过 auth

 ```bash
-# IM 渠道（Feishu/Slack/Telegram）在 gateway 容器内部通过 LangGraph SDK 调 Gateway
-# 请求携带 process-local internal auth header，并带匹配的 CSRF cookie/header
+# IM 渠道（Feishu/Slack/Telegram）在 gateway 容器内部通过 LangGraph SDK 通信
+# 不走 nginx，不经过 AuthMiddleware

 # 验证方式：检查 gateway 日志中 channel manager 的请求不包含 auth 错误
 docker logs deer-flow-gateway 2>&1 | grep -E "ChannelManager|channel" | head -10
 ```

-**预期：** 无 auth 相关错误。渠道不依赖浏览器 cookie；服务端通过内部认证头把请求归入 `default` 用户桶。
+**预期：** 无 auth 相关错误。渠道通过 `langgraph-sdk` 直连 LangGraph Server（`http://langgraph:2024`），不走 auth 层。

-#### TC-DOCKER-05: reset_admin 密码写入 0600 凭证文件（不再走日志）
+#### TC-DOCKER-05: admin 密码写入 0600 凭证文件（不再走日志）

 ```bash
-# 首次启动不会自动生成 admin 密码。先重置已有 admin，凭据文件写在挂载到宿主机的 DEER_FLOW_HOME 下。
-docker exec deer-flow-gateway python -m app.gateway.auth.reset_admin --email docker-test@example.com
-
+# 凭证文件写在挂载到宿主机的 DEER_FLOW_HOME 下
 ls -la ${DEER_FLOW_HOME:-backend/.deer-flow}/admin_initial_credentials.txt
 # 预期文件权限: -rw------- (0600)

@@ -1553,15 +1512,14 @@ sleep 15
 docker ps --filter name=deer-flow-langgraph --format '{{.Names}}' | wc -l
 # 预期: 0

-# auth 流程正常：未登录受保护接口返回 401
+# auth 流程正常
 curl -s -w "%{http_code}" -o /dev/null $BASE/api/models
 # 预期: 401

-curl -s -X POST $BASE/api/v1/auth/initialize \
-  -H "Content-Type: application/json" \
-  -d '{"email":"admin@example.com","password":"AdminPass1!"}' \
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@deerflow.dev&password=<日志密码>" \
  -c cookies.txt -w "\nHTTP %{http_code}"
-# 预期: 201
+# 预期: 200
 ```

 ### 7.4 补充边界用例
@@ -1629,15 +1587,13 @@ curl -s -D - -X POST $BASE/api/v1/auth/login/local \
 #### TC-EDGE-05: HTTP 无 max_age / HTTPS 有 max_age

 ```bash
-GW=http://localhost:8001
-
 # HTTP
-curl -s -D - -X POST $GW/api/v1/auth/login/local \
+curl -s -D - -X POST $BASE/api/v1/auth/login/local \
  -d "username=admin@example.com&password=正确密码" 2>/dev/null \
  | grep "access_token=" | grep -oi "max-age=[0-9]*" || echo "NO max-age (HTTP session cookie)"

-# HTTPS：直连 Gateway 才能用 X-Forwarded-Proto 模拟 HTTPS；nginx 会覆盖该 header
-curl -s -D - -X POST $GW/api/v1/auth/login/local \
+# HTTPS
+curl -s -D - -X POST $BASE/api/v1/auth/login/local \
  -H "X-Forwarded-Proto: https" \
  -d "username=admin@example.com&password=正确密码" 2>/dev/null \
  | grep "access_token=" | grep -oi "max-age=[0-9]*"
@@ -1756,10 +1712,10 @@ curl -s -X POST $BASE/api/threads \
  -b cookies.txt \
  -H "Content-Type: application/json" \
  -H "X-CSRF-Token: $CSRF" \
-  -d '{"metadata":{"owner_id":"victim-user-id","user_id":"victim-user-id"}}' | jq .metadata
+  -d '{"metadata":{"owner_id":"victim-user-id"}}' | jq .metadata.owner_id
 ```

-**预期：** 返回的 `metadata` 不包含 `owner_id` 或 `user_id`。真实所有权写入 `threads_meta.user_id`，不从客户端 metadata 接收，也不通过 metadata 回显。
+**预期：** 返回的 `metadata.owner_id` 应为当前登录用户的 ID，不是请求中注入的 `victim-user-id`。服务端应覆盖客户端提供的 `user_id`。

 #### 7.5.6 HTTP Method 探测

@@ -1840,6 +1796,6 @@ cd backend && PYTHONPATH=. uv run pytest \
 # 核心接口冒烟
 curl -s $BASE/health                              # 200
 curl -s $BASE/api/models                          # 401 (无 cookie)
-curl -s $BASE/api/v1/auth/setup-status            # 200
+curl -s -X POST $BASE/api/v1/auth/setup-status    # 200
 curl -s $BASE/api/v1/auth/me -b cookies.txt       # 200 (有 cookie)
 ```
@@ -2,16 +2,13 @@

 DeerFlow 内置了认证模块。本文档面向从无认证版本升级的用户。

-完整设计见 [AUTH_DESIGN.md](AUTH_DESIGN.md)。
-
 ## 核心概念

 认证模块采用**始终强制**策略：

- 首次启动时不会自动创建账号；首次访问 `/setup` 时由操作者创建第一个 admin 账号
+- 首次启动时自动创建 admin 账号，随机密码打印到控制台日志
 - 认证从一开始就是强制的，无竞争窗口
- 已有 admin 后，服务启动时会把历史对话（升级前创建且缺少 `user_id` 的 thread）迁移到 admin 名下
- 新数据按用户隔离：thread、workspace/uploads/outputs、memory、自定义 agent 都归属当前用户
+- 历史对话（升级前创建的 thread）自动迁移到 admin 名下

 ## 升级步骤

@@ -28,41 +25,39 @@ cd backend && make install
 make dev
 ```

-如果没有 admin 账号，控制台只会提示：
+控制台会输出：

 ```
 ============================================================
-  First boot detected — no admin account exists.
-  Visit /setup to complete admin account creation.
+  Admin account created on first boot
+  Email:    admin@deerflow.dev
+  Password: aB3xK9mN_pQ7rT2w
+  Change it after login: Settings → Account
 ============================================================
 ```

-首次启动不会在日志里打印随机密码，也不会写入默认 admin。这样避免启动日志泄露凭据，也避免在操作者创建账号前出现可被猜测的默认身份。
+如果未登录就重启了服务，不用担心——只要 setup 未完成，每次启动都会重置密码并重新打印到控制台。

-### 3. 创建 admin
+### 3. 登录

-访问 `http://localhost:2026/setup`，填写邮箱和密码创建第一个 admin 账号。创建成功后会自动登录并进入 workspace。
+访问 `http://localhost:2026/login`，使用控制台输出的邮箱和密码登录。

-如果这是从无认证版本升级，创建 admin 后重启一次服务，让启动迁移把缺少 `user_id` 的历史 thread 归属到 admin。
+### 4. 修改密码

-### 4. 登录
-
-后续访问 `http://localhost:2026/login`，使用已创建的邮箱和密码登录。
+登录后进入 Settings → Account → Change Password。

 ### 5. 添加用户（可选）

-其他用户通过 `/login` 页面注册，自动获得 **user** 角色。每个用户只能看到自己的对话、上传文件、输出文件、memory 和自定义 agent。
+其他用户通过 `/login` 页面注册，自动获得 **user** 角色。每个用户只能看到自己的对话。

 ## 安全机制

 | 机制 | 说明 |
 |------|------|
 | JWT HttpOnly Cookie | Token 不暴露给 JavaScript，防止 XSS 窃取 |
-| CSRF Double Submit Cookie | 受保护的 POST/PUT/PATCH/DELETE 请求需携带 `X-CSRF-Token`；登录/注册/初始化/登出走 auth 端点 Origin 校验 |
+| CSRF Double Submit Cookie | 所有 POST/PUT/DELETE 请求需携带 `X-CSRF-Token` |
 | bcrypt 密码哈希 | 密码不以明文存储 |
-| Thread owner filter | `threads_meta.user_id` 由服务端认证上下文写入，搜索、读取、更新、删除默认按当前用户过滤 |
-| 文件系统隔离 | 线程数据写入 `{base_dir}/users/{user_id}/threads/{thread_id}/user-data/`，sandbox 内统一映射为 `/mnt/user-data/` |
-| Memory / agent 隔离 | 用户 memory 和自定义 agent 写入 `{base_dir}/users/{user_id}/...`；旧共享 agent 只作为只读兼容回退 |
+| 多租户隔离 | 用户只能访问自己的 thread |
 | HTTPS 自适应 | 检测 `x-forwarded-proto`，自动设置 `Secure` cookie 标志 |

 ## 常见操作
@@ -79,26 +74,22 @@ python -m app.gateway.auth.reset_admin
 python -m app.gateway.auth.reset_admin --email user@example.com
 ```

-会把新的随机密码写入 `.deer-flow/admin_initial_credentials.txt`，文件权限为 `0600`。命令行只输出文件路径，不输出明文密码。
+会输出新的随机密码。

 ### 完全重置

-删除统一 SQLite 数据库，重启后重新访问 `/setup` 创建新 admin：
+删除用户数据库，重启后自动创建新 admin：

 ```bash
-rm -f backend/.deer-flow/data/deerflow.db
-# 重启服务后访问 http://localhost:2026/setup
+rm -f backend/.deer-flow/users.db
+# 重启服务，控制台输出新密码
 ```

 ## 数据存储

 | 文件 | 内容 |
 |------|------|
-| `.deer-flow/data/deerflow.db` | 统一 SQLite 数据库（users、threads_meta、runs、feedback 等应用数据） |
-| `.deer-flow/users/{user_id}/threads/{thread_id}/user-data/` | 用户线程的 workspace、uploads、outputs |
-| `.deer-flow/users/{user_id}/memory.json` | 用户级 memory |
-| `.deer-flow/users/{user_id}/agents/{agent_name}/` | 用户自定义 agent 配置、SOUL 和 agent memory |
-| `.deer-flow/admin_initial_credentials.txt` | `reset_admin` 生成的新凭据文件（0600，读完应删除） |
+| `.deer-flow/users.db` | SQLite 用户数据库（密码哈希、角色） |
 | `.env` 中的 `AUTH_JWT_SECRET` | JWT 签名密钥（未设置时自动生成临时密钥，重启后 session 失效） |

 ### 生产环境建议
@@ -120,21 +111,19 @@ python -c "import secrets; print(secrets.token_urlsafe(32))"
 | `/api/v1/auth/me` | GET | 获取当前用户信息 |
 | `/api/v1/auth/change-password` | POST | 修改密码 |
 | `/api/v1/auth/setup-status` | GET | 检查 admin 是否存在 |
-| `/api/v1/auth/initialize` | POST | 首次初始化第一个 admin（仅无 admin 时可调用） |

 ## 兼容性

- **标准模式**（`make dev`）：完全兼容；无 admin 时访问 `/setup` 初始化
+- **标准模式**（`make dev`）：完全兼容，admin 自动创建
 - **Gateway 模式**（`make dev-pro`）：完全兼容
- **Docker 部署**：完全兼容，`.deer-flow/data/deerflow.db` 需持久化卷挂载
- **IM 渠道**（Feishu/Slack/Telegram）：通过 Gateway 内部认证通信，使用 `default` 用户桶
+- **Docker 部署**：完全兼容，`.deer-flow/users.db` 需持久化卷挂载
+- **IM 渠道**（Feishu/Slack/Telegram）：通过 LangGraph SDK 通信，不经过认证层
 - **DeerFlowClient**（嵌入式）：不经过 HTTP，不受认证影响

 ## 故障排查

 | 症状 | 原因 | 解决 |
 |------|------|------|
-| 启动后没看到密码 | 当前实现不在启动日志输出密码 | 首次安装访问 `/setup`；忘记密码用 `reset_admin` |
-| `/login` 自动跳到 `/setup` | 系统还没有 admin | 在 `/setup` 创建第一个 admin |
+| 启动后没看到密码 | admin 已存在（非首次启动） | 用 `reset_admin` 重置，或删 `users.db` |
 | 登录后 POST 返回 403 | CSRF token 缺失 | 确认前端已更新 |
 | 重启后需要重新登录 | `AUTH_JWT_SECRET` 未持久化 | 在 `.env` 中设置固定密钥 |
@@ -8,7 +8,6 @@ This directory contains detailed documentation for the DeerFlow backend.
 |----------|-------------|
 | [ARCHITECTURE.md](ARCHITECTURE.md) | System architecture overview |
 | [API.md](API.md) | Complete API reference |
-| [AUTH_DESIGN.md](AUTH_DESIGN.md) | User authentication, CSRF, and per-user isolation design |
 | [CONFIGURATION.md](CONFIGURATION.md) | Configuration options |
 | [SETUP.md](SETUP.md) | Quick setup guide |

@@ -43,7 +42,6 @@ docs/
 ├── README.md                  # This file
 ├── ARCHITECTURE.md            # System architecture
 ├── API.md                     # API reference
-├── AUTH_DESIGN.md             # User authentication and isolation design
 ├── CONFIGURATION.md           # Configuration guide
 ├── SETUP.md                   # Setup instructions
 ├── FILE_UPLOAD.md             # File upload feature
@@ -173,7 +173,7 @@ def _assemble_from_features(
      9.   MemoryMiddleware (memory feature)
      10.  ViewImageMiddleware (vision feature)
      11.  SubagentLimitMiddleware (subagent feature)
-      12.  LoopDetectionMiddleware (loop_detection feature)
+      12.  LoopDetectionMiddleware (always)
      13.  ClarificationMiddleware (always last)

    Two-phase ordering:
@@ -272,15 +272,10 @@ def _assemble_from_features(

        extra_tools.append(task_tool)

-    # --- [12] LoopDetection ---
-    if feat.loop_detection is not False:
-        if isinstance(feat.loop_detection, AgentMiddleware):
-            chain.append(feat.loop_detection)
-        else:
-            from deerflow.agents.middlewares.loop_detection_middleware import LoopDetectionMiddleware
-            from deerflow.config.loop_detection_config import LoopDetectionConfig
+    # --- [12] LoopDetection (always) ---
+    from deerflow.agents.middlewares.loop_detection_middleware import LoopDetectionMiddleware

-            chain.append(LoopDetectionMiddleware.from_config(LoopDetectionConfig()))
+    chain.append(LoopDetectionMiddleware())

    # --- [13] Clarification (always last among built-ins) ---
    chain.append(ClarificationMiddleware())
@@ -31,7 +31,6 @@ class RuntimeFeatures:
    vision: bool | AgentMiddleware = False
    auto_title: bool | AgentMiddleware = False
    guardrail: Literal[False] | AgentMiddleware = False
-    loop_detection: bool | AgentMiddleware = True


 # ---------------------------------------------------------------------------
@@ -20,8 +20,6 @@ from deerflow.agents.thread_state import ThreadState
 from deerflow.config.agents_config import load_agent_config, validate_agent_name
 from deerflow.config.app_config import AppConfig, get_app_config
 from deerflow.models import create_chat_model
-from deerflow.skills.tool_policy import filter_tools_by_skill_allowed_tools
-from deerflow.skills.types import Skill

 logger = logging.getLogger(__name__)

@@ -258,12 +256,6 @@ def _build_middlewares(
    resolved_app_config = app_config or get_app_config()
    middlewares = build_lead_runtime_middlewares(app_config=resolved_app_config, lazy_init=True)

-    # Always inject current date (and optionally memory) as <system-reminder> into the
-    # first HumanMessage to keep the system prompt fully static for prefix-cache reuse.
-    from deerflow.agents.middlewares.dynamic_context_middleware import DynamicContextMiddleware
-
-    middlewares.append(DynamicContextMiddleware(agent_name=agent_name, app_config=resolved_app_config))
-
    # Add summarization middleware if enabled
    summarization_middleware = _create_summarization_middleware(app_config=resolved_app_config)
    if summarization_middleware is not None:
@@ -305,9 +297,7 @@ def _build_middlewares(
        middlewares.append(SubagentLimitMiddleware(max_concurrent=max_concurrent_subagents))

    # LoopDetectionMiddleware — detect and break repetitive tool call loops
-    loop_detection_config = resolved_app_config.loop_detection
-    if loop_detection_config.enabled:
-        middlewares.append(LoopDetectionMiddleware.from_config(loop_detection_config))
+    middlewares.append(LoopDetectionMiddleware())

    # Inject custom middlewares before ClarificationMiddleware
    if custom_middlewares:
@@ -318,28 +308,6 @@ def _build_middlewares(
    return middlewares


-def _available_skill_names(agent_config, is_bootstrap: bool) -> set[str] | None:
-    if is_bootstrap:
-        return {"bootstrap"}
-    if agent_config and agent_config.skills is not None:
-        return set(agent_config.skills)
-    return None
-
-
-def _load_enabled_skills_for_tool_policy(available_skills: set[str] | None, *, app_config: AppConfig) -> list[Skill]:
-    try:
-        from deerflow.agents.lead_agent.prompt import get_enabled_skills_for_config
-
-        skills = get_enabled_skills_for_config(app_config)
-    except Exception:
-        logger.exception("Failed to load skills for allowed-tools policy")
-        raise
-
-    if available_skills is None:
-        return skills
-    return [skill for skill in skills if skill.name in available_skills]
-
-
 def make_lead_agent(config: RunnableConfig):
    """LangGraph graph factory; keep the signature compatible with LangGraph Server."""
    runtime_config = _get_runtime_config(config)
@@ -350,7 +318,7 @@ def make_lead_agent(config: RunnableConfig):
 def _make_lead_agent(config: RunnableConfig, *, app_config: AppConfig):
    # Lazy import to avoid circular dependency
    from deerflow.tools import get_available_tools
-    from deerflow.tools.builtins import setup_agent, update_agent
+    from deerflow.tools.builtins import setup_agent

    cfg = _get_runtime_config(config)
    resolved_app_config = app_config
@@ -365,7 +333,6 @@ def _make_lead_agent(config: RunnableConfig, *, app_config: AppConfig):
    agent_name = validate_agent_name(cfg.get("agent_name"))

    agent_config = load_agent_config(agent_name) if not is_bootstrap else None
-    available_skills = _available_skill_names(agent_config, is_bootstrap)
    # Custom agent model from agent config (if any), or None to let _resolve_model_name pick the default
    agent_model_name = agent_config.model if agent_config and agent_config.model else None

@@ -404,18 +371,15 @@ def _make_lead_agent(config: RunnableConfig, *, app_config: AppConfig):
            "is_plan_mode": is_plan_mode,
            "subagent_enabled": subagent_enabled,
            "tool_groups": agent_config.tool_groups if agent_config else None,
-            "available_skills": sorted(available_skills) if available_skills is not None else None,
+            "available_skills": ["bootstrap"] if is_bootstrap else (agent_config.skills if agent_config and agent_config.skills is not None else None),
        }
    )

-    skills_for_tool_policy = _load_enabled_skills_for_tool_policy(available_skills, app_config=resolved_app_config)
-
    if is_bootstrap:
        # Special bootstrap agent with minimal prompt for initial custom agent creation flow
-        tools = get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled, app_config=resolved_app_config) + [setup_agent]
        return create_agent(
            model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, app_config=resolved_app_config),
-            tools=filter_tools_by_skill_allowed_tools(tools, skills_for_tool_policy),
+            tools=get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled, app_config=resolved_app_config) + [setup_agent],
            middleware=_build_middlewares(config, model_name=model_name, app_config=resolved_app_config),
            system_prompt=apply_prompt_template(
                subagent_enabled=subagent_enabled,
@@ -426,14 +390,15 @@ def _make_lead_agent(config: RunnableConfig, *, app_config: AppConfig):
            state_schema=ThreadState,
        )

-    # Custom agents can update their own SOUL.md / config via update_agent.
-    # The default agent (no agent_name) does not see this tool.
-    extra_tools = [update_agent] if agent_name else []
    # Default lead agent (unchanged behavior)
-    tools = get_available_tools(model_name=model_name, groups=agent_config.tool_groups if agent_config else None, subagent_enabled=subagent_enabled, app_config=resolved_app_config)
    return create_agent(
        model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, reasoning_effort=reasoning_effort, app_config=resolved_app_config),
-        tools=filter_tools_by_skill_allowed_tools(tools + extra_tools, skills_for_tool_policy),
+        tools=get_available_tools(
+            model_name=model_name,
+            groups=agent_config.tool_groups if agent_config else None,
+            subagent_enabled=subagent_enabled,
+            app_config=resolved_app_config,
+        ),
        middleware=_build_middlewares(config, model_name=model_name, agent_name=agent_name, app_config=resolved_app_config),
        system_prompt=apply_prompt_template(
            subagent_enabled=subagent_enabled,
@@ -3,6 +3,7 @@ from __future__ import annotations
 import asyncio
 import logging
 import threading
+from datetime import datetime
 from functools import lru_cache
 from typing import TYPE_CHECKING

@@ -19,7 +20,6 @@ logger = logging.getLogger(__name__)
 _ENABLED_SKILLS_REFRESH_WAIT_TIMEOUT_SECONDS = 5.0
 _enabled_skills_lock = threading.Lock()
 _enabled_skills_cache: list[Skill] | None = None
-_enabled_skills_by_config_cache: dict[int, tuple[object, list[Skill]]] = {}
 _enabled_skills_refresh_active = False
 _enabled_skills_refresh_version = 0
 _enabled_skills_refresh_event = threading.Event()
@@ -84,7 +84,6 @@ def _invalidate_enabled_skills_cache() -> threading.Event:
    _get_cached_skills_prompt_section.cache_clear()
    with _enabled_skills_lock:
        _enabled_skills_cache = None
-        _enabled_skills_by_config_cache.clear()
        _enabled_skills_refresh_version += 1
        _enabled_skills_refresh_event.clear()
        if _enabled_skills_refresh_active:
@@ -108,15 +107,6 @@ def warm_enabled_skills_cache(timeout_seconds: float = _ENABLED_SKILLS_REFRESH_W


 def _get_enabled_skills():
-    return get_cached_enabled_skills()
-
-
-def get_cached_enabled_skills() -> list[Skill]:
-    """Return the cached enabled-skills list, kicking off a background refresh on miss.
-
-    Safe to call from request paths: never blocks on disk I/O. Returns an empty
-    list on cache miss; the next call will see the warmed result.
-    """
    with _enabled_skills_lock:
        cached = _enabled_skills_cache

@@ -127,29 +117,17 @@ def get_cached_enabled_skills() -> list[Skill]:
    return []


-def get_enabled_skills_for_config(app_config: AppConfig | None = None) -> list[Skill]:
+def _get_enabled_skills_for_config(app_config: AppConfig | None = None) -> list[Skill]:
    """Return enabled skills using the caller's config source.

-    When a concrete ``app_config`` is supplied, cache the loaded skills by that
-    config object's identity so request-scoped config injection still resolves
-    skill paths from the matching config without rescanning storage on every
-    agent factory call.
+    When a concrete ``app_config`` is supplied, bypass the global enabled-skills
+    cache so the skill list and skill paths are resolved from the same config
+    object. This keeps request-scoped config injection consistent even while the
+    release branch still supports global fallback paths.
    """
    if app_config is None:
        return _get_enabled_skills()
-
-    cache_key = id(app_config)
-    with _enabled_skills_lock:
-        cached = _enabled_skills_by_config_cache.get(cache_key)
-        if cached is not None:
-            cached_config, cached_skills = cached
-            if cached_config is app_config:
-                return list(cached_skills)
-
-    skills = list(get_or_new_skill_storage(app_config=app_config).load_skills(enabled_only=True))
-    with _enabled_skills_lock:
-        _enabled_skills_by_config_cache[cache_key] = (app_config, skills)
-    return list(skills)
+    return list(get_or_new_skill_storage(app_config=app_config).load_skills(enabled_only=True))


 def _skill_mutability_label(category: SkillCategory | str) -> str:
@@ -366,7 +344,8 @@ You are {agent_name}, an open-source super agent.
 </role>

 {soul}
-{self_update_section}
+{memory_context}
+
 <thinking_style>
 - Think concisely and strategically about the user's request BEFORE taking action
 - Break down the task: What is clear? What is ambiguous? What is missing?
@@ -625,7 +604,7 @@ You have access to skills that provide optimized workflows for specific tasks. E

 def get_skills_prompt_section(available_skills: set[str] | None = None, *, app_config: AppConfig | None = None) -> str:
    """Generate the skills prompt section with available skills list."""
-    skills = get_enabled_skills_for_config(app_config)
+    skills = _get_enabled_skills_for_config(app_config)

    if app_config is None:
        try:
@@ -664,26 +643,6 @@ def get_agent_soul(agent_name: str | None) -> str:
    return ""


-def _build_self_update_section(agent_name: str | None) -> str:
-    """Prompt block that teaches the custom agent to persist self-updates via update_agent."""
-    if not agent_name:
-        return ""
-    return f"""<self_update>
-You are running as the custom agent **{agent_name}** with a persisted SOUL.md and config.yaml.
-
-When the user asks you to update your own description, personality, behaviour, skill set, tool groups, or default model,
-you MUST persist the change with the `update_agent` tool. Do NOT use `bash`, `write_file`, or any sandbox tool to edit
-SOUL.md or config.yaml — those write into a temporary sandbox/tool workspace and the changes will be lost on the next turn.
-
-Rules:
- Always pass the FULL replacement text for `soul` (no patch semantics). Start from your current SOUL above and apply the user's edits.
- Only pass the fields that should change. Omit the others to preserve them.
- Pass `skills=[]` to disable all skills, or omit `skills` to keep the existing whitelist.
- After `update_agent` returns successfully, tell the user the change is persisted and will take effect on the next turn.
-</self_update>
-"""
-
-
 def get_deferred_tools_prompt_section(*, app_config: AppConfig | None = None) -> str:
    """Generate <available-deferred-tools> block for the system prompt.

@@ -773,6 +732,9 @@ def apply_prompt_template(
    available_skills: set[str] | None = None,
    app_config: AppConfig | None = None,
 ) -> str:
+    # Get memory context
+    memory_context = _get_memory_context(agent_name, app_config=app_config)
+
    # Include subagent section only if enabled (from runtime parameter)
    n = max_concurrent_subagents
    subagent_section = _build_subagent_section(n, app_config=app_config) if subagent_enabled else ""
@@ -806,18 +768,17 @@ def apply_prompt_template(
    custom_mounts_section = _build_custom_mounts_section(app_config=app_config)
    acp_and_mounts_section = "\n".join(section for section in (acp_section, custom_mounts_section) if section)

-    # Build and return the fully static system prompt.
-    # Memory and current date are injected per-turn via DynamicContextMiddleware
-    # as a <system-reminder> in the first HumanMessage, keeping this prompt
-    # identical across users and sessions for maximum prefix-cache reuse.
-    return SYSTEM_PROMPT_TEMPLATE.format(
+    # Format the prompt with dynamic skills and memory
+    prompt = SYSTEM_PROMPT_TEMPLATE.format(
        agent_name=agent_name or "DeerFlow 2.0",
        soul=get_agent_soul(agent_name),
-        self_update_section=_build_self_update_section(agent_name),
        skills_section=skills_section,
        deferred_tools_section=deferred_tools_section,
+        memory_context=memory_context,
        subagent_section=subagent_section,
        subagent_reminder=subagent_reminder,
        subagent_thinking=subagent_thinking,
        acp_section=acp_and_mounts_section,
    )
+
+    return prompt + f"\n<current_date>{datetime.now().strftime('%Y-%m-%d, %A')}</current_date>"
@@ -36,73 +36,42 @@ class DanglingToolCallMiddleware(AgentMiddleware[AgentState]):

    @staticmethod
    def _message_tool_calls(msg) -> list[dict]:
-        """Return normalized tool calls from structured fields or raw provider payloads.
-
-        LangChain stores malformed provider function calls in ``invalid_tool_calls``.
-        They do not execute, but provider adapters may still serialize enough of
-        the call id/name back into the next request that strict OpenAI-compatible
-        validators expect a matching ToolMessage. Treat them as dangling calls so
-        the next model request stays well-formed and the model sees a recoverable
-        tool error instead of another provider 400.
-        """
-        normalized: list[dict] = []
-
+        """Return normalized tool calls from structured fields or raw provider payloads."""
        tool_calls = getattr(msg, "tool_calls", None) or []
-        normalized.extend(list(tool_calls))
+        if tool_calls:
+            return list(tool_calls)

        raw_tool_calls = (getattr(msg, "additional_kwargs", None) or {}).get("tool_calls") or []
-        if not tool_calls:
-            for raw_tc in raw_tool_calls:
-                if not isinstance(raw_tc, dict):
-                    continue
-
-                function = raw_tc.get("function")
-                name = raw_tc.get("name")
-                if not name and isinstance(function, dict):
-                    name = function.get("name")
-
-                args = raw_tc.get("args", {})
-                if not args and isinstance(function, dict):
-                    raw_args = function.get("arguments")
-                    if isinstance(raw_args, str):
-                        try:
-                            parsed_args = json.loads(raw_args)
-                        except (TypeError, ValueError, json.JSONDecodeError):
-                            parsed_args = {}
-                        args = parsed_args if isinstance(parsed_args, dict) else {}
-
-                normalized.append(
-                    {
-                        "id": raw_tc.get("id"),
-                        "name": name or "unknown",
-                        "args": args if isinstance(args, dict) else {},
-                    }
-                )
-
-        for invalid_tc in getattr(msg, "invalid_tool_calls", None) or []:
-            if not isinstance(invalid_tc, dict):
+        normalized: list[dict] = []
+        for raw_tc in raw_tool_calls:
+            if not isinstance(raw_tc, dict):
                continue
+
+            function = raw_tc.get("function")
+            name = raw_tc.get("name")
+            if not name and isinstance(function, dict):
+                name = function.get("name")
+
+            args = raw_tc.get("args", {})
+            if not args and isinstance(function, dict):
+                raw_args = function.get("arguments")
+                if isinstance(raw_args, str):
+                    try:
+                        parsed_args = json.loads(raw_args)
+                    except (TypeError, ValueError, json.JSONDecodeError):
+                        parsed_args = {}
+                    args = parsed_args if isinstance(parsed_args, dict) else {}
+
            normalized.append(
                {
-                    "id": invalid_tc.get("id"),
-                    "name": invalid_tc.get("name") or "unknown",
-                    "args": {},
-                    "invalid": True,
-                    "error": invalid_tc.get("error"),
+                    "id": raw_tc.get("id"),
+                    "name": name or "unknown",
+                    "args": args if isinstance(args, dict) else {},
                }
            )

        return normalized

-    @staticmethod
-    def _synthetic_tool_message_content(tool_call: dict) -> str:
-        if tool_call.get("invalid"):
-            error = tool_call.get("error")
-            if isinstance(error, str) and error:
-                return f"[Tool call could not be executed because its arguments were invalid: {error}]"
-            return "[Tool call could not be executed because its arguments were invalid.]"
-        return "[Tool call was interrupted and did not return a result.]"
-
    def _build_patched_messages(self, messages: list) -> list | None:
        """Return a new message list with patches inserted at the correct positions.

@@ -145,7 +114,7 @@ class DanglingToolCallMiddleware(AgentMiddleware[AgentState]):
                if tc_id and tc_id not in existing_tool_msg_ids and tc_id not in patched_ids:
                    patched.append(
                        ToolMessage(
-                            content=self._synthetic_tool_message_content(tc),
+                            content="[Tool call was interrupted and did not return a result.]",
                            tool_call_id=tc_id,
                            name=tc.get("name", "unknown"),
                            status="error",
@@ -1,204 +0,0 @@
-"""Middleware to inject dynamic context (memory, current date) as a system-reminder.
-
-The system prompt is kept fully static for maximum prefix-cache reuse across users
-and sessions.  The current date is always injected.  Per-user memory is also injected
-when ``memory.injection_enabled`` is True in the app config.  Both are delivered once
-per conversation as a dedicated <system-reminder> HumanMessage inserted before the
-first user message (frozen-snapshot pattern).
-
-When a conversation spans midnight the middleware detects the date change and injects
-a lightweight date-update reminder as a separate HumanMessage before the current turn.
-This correction is persisted so subsequent turns on the new day see a consistent history
-and do not re-inject.
-
-Reminder format:
-
-    <system-reminder>
-    <memory>...</memory>
-
-    <current_date>2026-05-08, Friday</current_date>
-    </system-reminder>
-
-Date-update format:
-
-    <system-reminder>
-    <current_date>2026-05-09, Saturday</current_date>
-    </system-reminder>
-"""
-
-from __future__ import annotations
-
-import logging
-import re
-import uuid
-from datetime import datetime
-from typing import TYPE_CHECKING, override
-
-from langchain.agents.middleware import AgentMiddleware
-from langchain_core.messages import HumanMessage
-from langgraph.runtime import Runtime
-
-if TYPE_CHECKING:
-    from deerflow.config.app_config import AppConfig
-
-logger = logging.getLogger(__name__)
-
-_DATE_RE = re.compile(r"<current_date>([^<]+)</current_date>")
-_DYNAMIC_CONTEXT_REMINDER_KEY = "dynamic_context_reminder"
-_SUMMARY_MESSAGE_NAME = "summary"
-
-
-def _extract_date(content: str) -> str | None:
-    """Return the first <current_date> value found in *content*, or None."""
-    m = _DATE_RE.search(content)
-    return m.group(1) if m else None
-
-
-def is_dynamic_context_reminder(message: object) -> bool:
-    """Return whether *message* is a hidden dynamic-context reminder."""
-    return isinstance(message, HumanMessage) and bool(message.additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY))
-
-
-def _last_injected_date(messages: list) -> str | None:
-    """Scan messages in reverse and return the most recently injected date.
-
-    Detection uses the ``dynamic_context_reminder`` additional_kwargs flag rather
-    than content substring matching, so user messages containing ``<system-reminder>``
-    are not mistakenly treated as injected reminders.
-    """
-    for msg in reversed(messages):
-        if is_dynamic_context_reminder(msg):
-            content_str = msg.content if isinstance(msg.content, str) else str(msg.content)
-            return _extract_date(content_str)
-    return None
-
-
-def _is_user_injection_target(message: object) -> bool:
-    """Return whether *message* can receive a dynamic-context reminder."""
-    return isinstance(message, HumanMessage) and not is_dynamic_context_reminder(message) and message.name != _SUMMARY_MESSAGE_NAME
-
-
-class DynamicContextMiddleware(AgentMiddleware):
-    """Inject memory and current date into HumanMessages as a <system-reminder>.
-
-    First turn
-    ----------
-    Prepends a full system-reminder (memory + date) to the first HumanMessage and
-    persists it (same message ID).  The first message is then frozen for the whole
-    session — its content never changes again, so the prefix cache can hit on every
-    subsequent turn.
-
-    Midnight crossing
-    -----------------
-    If the conversation spans midnight, the current date differs from the date that
-    was injected earlier.  In that case a lightweight date-update reminder is prepended
-    to the **current** (last) HumanMessage and persisted.  Subsequent turns on the new
-    day see the corrected date in history and skip re-injection.
-    """
-
-    def __init__(self, agent_name: str | None = None, *, app_config: AppConfig | None = None):
-        super().__init__()
-        self._agent_name = agent_name
-        self._app_config = app_config
-
-    def _build_full_reminder(self) -> str:
-        from deerflow.agents.lead_agent.prompt import _get_memory_context
-
-        # Memory injection is gated by injection_enabled; date is always included.
-        injection_enabled = self._app_config.memory.injection_enabled if self._app_config else True
-        memory_context = _get_memory_context(self._agent_name, app_config=self._app_config) if injection_enabled else ""
-        current_date = datetime.now().strftime("%Y-%m-%d, %A")
-
-        lines: list[str] = ["<system-reminder>"]
-        if memory_context:
-            lines.append(memory_context.strip())
-            lines.append("")  # blank line separating memory from date
-        lines.append(f"<current_date>{current_date}</current_date>")
-        lines.append("</system-reminder>")
-
-        return "\n".join(lines)
-
-    def _build_date_update_reminder(self) -> str:
-        current_date = datetime.now().strftime("%Y-%m-%d, %A")
-        return "\n".join(
-            [
-                "<system-reminder>",
-                f"<current_date>{current_date}</current_date>",
-                "</system-reminder>",
-            ]
-        )
-
-    @staticmethod
-    def _make_reminder_and_user_messages(original: HumanMessage, reminder_content: str) -> tuple[HumanMessage, HumanMessage]:
-        """Return (reminder_msg, user_msg) using the ID-swap technique.
-
-        reminder_msg takes the original message's ID so that add_messages replaces it
-        in-place (preserving position).  user_msg carries the original content with a
-        derived ``{id}__user`` ID and is appended immediately after by add_messages.
-
-        If the original message has no ID a stable UUID is generated so the derived
-        ``{id}__user`` ID never collapses to the ambiguous ``None__user`` string.
-        """
-        stable_id = original.id or str(uuid.uuid4())
-        reminder_msg = HumanMessage(
-            content=reminder_content,
-            id=stable_id,
-            additional_kwargs={"hide_from_ui": True, _DYNAMIC_CONTEXT_REMINDER_KEY: True},
-        )
-        user_msg = HumanMessage(
-            content=original.content,
-            id=f"{stable_id}__user",
-            name=original.name,
-            additional_kwargs=original.additional_kwargs,
-        )
-        return reminder_msg, user_msg
-
-    def _inject(self, state) -> dict | None:
-        messages = list(state.get("messages", []))
-        if not messages:
-            return None
-
-        current_date = datetime.now().strftime("%Y-%m-%d, %A")
-        last_date = _last_injected_date(messages)
-        logger.debug(
-            "DynamicContextMiddleware._inject: msg_count=%d last_date=%r current_date=%r",
-            len(messages),
-            last_date,
-            current_date,
-        )
-
-        if last_date is None:
-            # ── First turn: inject full reminder as a separate HumanMessage ─────
-            first_idx = next((i for i, m in enumerate(messages) if _is_user_injection_target(m)), None)
-            if first_idx is None:
-                return None
-            full_reminder = self._build_full_reminder()
-            logger.info(
-                "DynamicContextMiddleware: injecting full reminder (len=%d, has_memory=%s) into first HumanMessage id=%r",
-                len(full_reminder),
-                "<memory>" in full_reminder,
-                messages[first_idx].id,
-            )
-            reminder_msg, user_msg = self._make_reminder_and_user_messages(messages[first_idx], full_reminder)
-            return {"messages": [reminder_msg, user_msg]}
-
-        if last_date == current_date:
-            # ── Same day: nothing to do ──────────────────────────────────────────
-            return None
-
-        # ── Midnight crossed: inject date-update reminder as a separate HumanMessage ──
-        last_human_idx = next((i for i in reversed(range(len(messages))) if _is_user_injection_target(messages[i])), None)
-        if last_human_idx is None:
-            return None
-
-        reminder_msg, user_msg = self._make_reminder_and_user_messages(messages[last_human_idx], self._build_date_update_reminder())
-        logger.info("DynamicContextMiddleware: midnight crossing detected — injected date update before current turn")
-        return {"messages": [reminder_msg, user_msg]}
-
-    @override
-    def before_agent(self, state, runtime: Runtime) -> dict | None:
-        return self._inject(state)
-
-    @override
-    async def abefore_agent(self, state, runtime: Runtime) -> dict | None:
-        return self._inject(state)
@@ -12,23 +12,19 @@ Detection strategy:
     response so the agent is forced to produce a final text answer.
 """

-from __future__ import annotations
-
 import hashlib
 import json
 import logging
 import threading
 from collections import OrderedDict, defaultdict
 from copy import deepcopy
-from typing import TYPE_CHECKING, override
+from typing import override

 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
+from langchain_core.messages import HumanMessage
 from langgraph.runtime import Runtime

-if TYPE_CHECKING:
-    from deerflow.config.loop_detection_config import LoopDetectionConfig
-
 logger = logging.getLogger(__name__)

 # Defaults — can be overridden via constructor
@@ -144,9 +140,6 @@ _TOOL_FREQ_HARD_STOP_MSG = "[FORCED STOP] Tool {tool_name} called {count} times
 class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
    """Detects and breaks repetitive tool call loops.

-    Threshold parameters are validated upstream by :class:`LoopDetectionConfig`;
-    construct via :meth:`from_config` to ensure values pass Pydantic validation.
-
    Args:
        warn_threshold: Number of identical tool call sets before injecting
            a warning message. Default: 3.
@@ -162,14 +155,6 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
            Default: 30.
        tool_freq_hard_limit: Number of calls to the same tool type before
            forcing a stop. Default: 50.
-        tool_freq_overrides: Per-tool overrides for frequency thresholds,
-            keyed by tool name. Each value is a ``(warn, hard_limit)`` tuple
-            that replaces ``tool_freq_warn`` / ``tool_freq_hard_limit`` for
-            that specific tool. Tools not listed here fall back to the global
-            thresholds. Useful for raising limits on intentionally
-            high-frequency tools (e.g. ``bash`` in batch pipelines) without
-            weakening protection on all other tools. Default: ``None``
-            (no overrides).
    """

    def __init__(
@@ -180,7 +165,6 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
        max_tracked_threads: int = _DEFAULT_MAX_TRACKED_THREADS,
        tool_freq_warn: int = _DEFAULT_TOOL_FREQ_WARN,
        tool_freq_hard_limit: int = _DEFAULT_TOOL_FREQ_HARD_LIMIT,
-        tool_freq_overrides: dict[str, tuple[int, int]] | None = None,
    ):
        super().__init__()
        self.warn_threshold = warn_threshold
@@ -189,26 +173,14 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
        self.max_tracked_threads = max_tracked_threads
        self.tool_freq_warn = tool_freq_warn
        self.tool_freq_hard_limit = tool_freq_hard_limit
-        self._tool_freq_overrides: dict[str, tuple[int, int]] = tool_freq_overrides or {}
        self._lock = threading.Lock()
+        # Per-thread tracking using OrderedDict for LRU eviction
        self._history: OrderedDict[str, list[str]] = OrderedDict()
        self._warned: dict[str, set[str]] = defaultdict(set)
+        # Per-thread, per-tool-type cumulative call counts
        self._tool_freq: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
        self._tool_freq_warned: dict[str, set[str]] = defaultdict(set)

-    @classmethod
-    def from_config(cls, config: LoopDetectionConfig) -> LoopDetectionMiddleware:
-        """Construct from a Pydantic-validated config, trusting its validation."""
-        return cls(
-            warn_threshold=config.warn_threshold,
-            hard_limit=config.hard_limit,
-            window_size=config.window_size,
-            max_tracked_threads=config.max_tracked_threads,
-            tool_freq_warn=config.tool_freq_warn,
-            tool_freq_hard_limit=config.tool_freq_hard_limit,
-            tool_freq_overrides={name: (o.warn, o.hard_limit) for name, o in config.tool_freq_overrides.items()},
-        )
-
    def _get_thread_id(self, runtime: Runtime) -> str:
        """Extract thread_id from runtime context for per-thread tracking."""
        thread_id = runtime.context.get("thread_id") if runtime.context else None
@@ -308,12 +280,7 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
                freq[name] += 1
                tc_count = freq[name]

-                if name in self._tool_freq_overrides:
-                    eff_warn, eff_hard = self._tool_freq_overrides[name]
-                else:
-                    eff_warn, eff_hard = self.tool_freq_warn, self.tool_freq_hard_limit
-
-                if tc_count >= eff_hard:
+                if tc_count >= self.tool_freq_hard_limit:
                    logger.error(
                        "Tool frequency hard limit reached — forcing stop",
                        extra={
@@ -324,7 +291,7 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
                    )
                    return _TOOL_FREQ_HARD_STOP_MSG.format(tool_name=name, count=tc_count), True

-                if tc_count >= eff_warn:
+                if tc_count >= self.tool_freq_warn:
                    warned = self._tool_freq_warned[thread_id]
                    if name not in warned:
                        warned.add(name)
@@ -389,30 +356,13 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
            return {"messages": [stripped_msg]}

        if warning:
-            # WORKAROUND for v2.0-m1 — see #2724.
-            #
-            # Append the warning to the AIMessage content instead of
-            # injecting a separate HumanMessage. Inserting any non-tool
-            # message between an AIMessage(tool_calls=...) and its
-            # ToolMessage responses breaks OpenAI/Moonshot strict pairing
-            # validation ("tool_call_ids did not have response messages")
-            # because the tools node has not run yet at after_model time.
-            # tool_calls are preserved so the tools node still executes.
-            #
-            # This is a temporary mitigation: mutating an existing
-            # AIMessage to carry framework-authored text leaks loop-warning
-            # text into downstream consumers (MemoryMiddleware fact
-            # extraction, TitleMiddleware, telemetry, model replay) as if
-            # the model said it. The proper fix is to defer warning
-            # injection from after_model to wrap_model_call so every prior
-            # ToolMessage is already in the request — see RFC #2517 (which
-            # lists "loop intervention does not leave invalid
-            # tool-call/tool-message state" as acceptance criteria) and
-            # the prototype on `fix/loop-detection-tool-call-pairing`.
-            messages = state.get("messages", [])
-            last_msg = messages[-1]
-            patched_msg = last_msg.model_copy(update={"content": self._append_text(last_msg.content, warning)})
-            return {"messages": [patched_msg]}
+            # Inject as HumanMessage instead of SystemMessage to avoid
+            # Anthropic's "multiple non-consecutive system messages" error.
+            # Anthropic models require system messages only at the start of
+            # the conversation; injecting one mid-conversation crashes
+            # langchain_anthropic's _format_messages(). HumanMessage works
+            # with all providers. See #1299.
+            return {"messages": [HumanMessage(content=warning, name="loop_warning")]}

        return None

@@ -7,7 +7,6 @@ from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
 from langgraph.runtime import Runtime

-from deerflow.agents.middlewares.tool_call_metadata import clone_ai_message_with_tool_calls
 from deerflow.subagents.executor import MAX_CONCURRENT_SUBAGENTS

 logger = logging.getLogger(__name__)
@@ -64,7 +63,7 @@ class SubagentLimitMiddleware(AgentMiddleware[AgentState]):
        logger.warning(f"Truncated {dropped_count} excess task tool call(s) from model response (limit: {self.max_concurrent})")

        # Replace the AIMessage with truncated tool_calls (same id triggers replacement)
-        updated_msg = clone_ai_message_with_tool_calls(last_msg, truncated_tool_calls)
+        updated_msg = last_msg.model_copy(update={"tool_calls": truncated_tool_calls})
        return {"messages": [updated_msg]}

    @override
@@ -14,9 +14,6 @@ from langgraph.config import get_config
 from langgraph.graph.message import REMOVE_ALL_MESSAGES
 from langgraph.runtime import Runtime

-from deerflow.agents.middlewares.dynamic_context_middleware import is_dynamic_context_reminder
-from deerflow.agents.middlewares.tool_call_metadata import clone_ai_message_with_tool_calls
-
 logger = logging.getLogger(__name__)


@@ -81,7 +78,10 @@ def _clone_ai_message(
    content: Any | None = None,
 ) -> AIMessage:
    """Clone an AIMessage while replacing its tool_calls list and optional content."""
-    return clone_ai_message_with_tool_calls(message, tool_calls, content=content)
+    update: dict[str, Any] = {"tool_calls": tool_calls}
+    if content is not None:
+        update["content"] = content
+    return message.model_copy(update=update)


@dataclass
@@ -136,7 +136,6 @@ class DeerFlowSummarizationMiddleware(SummarizationMiddleware):
            return None

        messages_to_summarize, preserved_messages = self._partition_with_skill_rescue(messages, cutoff_index)
-        messages_to_summarize, preserved_messages = self._preserve_dynamic_context_reminders(messages_to_summarize, preserved_messages)
        self._fire_hooks(messages_to_summarize, preserved_messages, runtime)
        summary = self._create_summary(messages_to_summarize)
        new_messages = self._build_new_messages(summary)
@@ -162,7 +161,6 @@ class DeerFlowSummarizationMiddleware(SummarizationMiddleware):
            return None

        messages_to_summarize, preserved_messages = self._partition_with_skill_rescue(messages, cutoff_index)
-        messages_to_summarize, preserved_messages = self._preserve_dynamic_context_reminders(messages_to_summarize, preserved_messages)
        self._fire_hooks(messages_to_summarize, preserved_messages, runtime)
        summary = await self._acreate_summary(messages_to_summarize)
        new_messages = self._build_new_messages(summary)
@@ -182,24 +180,6 @@ class DeerFlowSummarizationMiddleware(SummarizationMiddleware):
        """
        return [HumanMessage(content=f"Here is a summary of the conversation to date:\n\n{summary}", name="summary")]

-    def _preserve_dynamic_context_reminders(
-        self,
-        messages_to_summarize: list[AnyMessage],
-        preserved_messages: list[AnyMessage],
-    ) -> tuple[list[AnyMessage], list[AnyMessage]]:
-        """Keep hidden dynamic-context reminders out of summary compression.
-
-        These reminders carry the current date and optional memory. If summarization
-        removes them, DynamicContextMiddleware can mistake the summary HumanMessage
-        for the first user message and inject the reminder in the wrong place.
-        """
-        reminders = [msg for msg in messages_to_summarize if is_dynamic_context_reminder(msg)]
-        if not reminders:
-            return messages_to_summarize, preserved_messages
-
-        remaining = [msg for msg in messages_to_summarize if not is_dynamic_context_reminder(msg)]
-        return remaining, reminders + preserved_messages
-
    def _partition_with_skill_rescue(
        self,
        messages: list[AnyMessage],
@@ -9,7 +9,6 @@ from langchain.agents.middleware import AgentMiddleware
 from langgraph.config import get_config
 from langgraph.runtime import Runtime

-from deerflow.agents.middlewares.dynamic_context_middleware import is_dynamic_context_reminder
 from deerflow.config.title_config import get_title_config
 from deerflow.models import create_chat_model

@@ -62,10 +61,6 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):

        return ""

-    @staticmethod
-    def _is_user_message_for_title(message: object) -> bool:
-        return getattr(message, "type", None) == "human" and not is_dynamic_context_reminder(message)
-
    def _should_generate_title(self, state: TitleMiddlewareState) -> bool:
        """Check if we should generate a title for this thread."""
        config = self._get_title_config()
@@ -82,7 +77,7 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
            return False

        # Count user and assistant messages
-        user_messages = [m for m in messages if self._is_user_message_for_title(m)]
+        user_messages = [m for m in messages if m.type == "human"]
        assistant_messages = [m for m in messages if m.type == "ai"]

        # Generate title after first complete exchange
@@ -96,7 +91,7 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
        config = self._get_title_config()
        messages = state.get("messages", [])

-        user_msg_content = next((m.content for m in messages if self._is_user_message_for_title(m)), "")
+        user_msg_content = next((m.content for m in messages if m.type == "human"), "")
        assistant_msg_content = next((m.content for m in messages if m.type == "ai"), "")

        user_msg = self._normalize_content(user_msg_content)
@@ -1,303 +1,37 @@
-"""Middleware for logging token usage and annotating step attribution."""
-
-from __future__ import annotations
+"""Middleware for logging LLM token usage."""

 import logging
-from collections import defaultdict
-from typing import Any, override
+from typing import override

 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
-from langchain.agents.middleware.todo import Todo
-from langchain_core.messages import AIMessage
 from langgraph.runtime import Runtime

 logger = logging.getLogger(__name__)

-TOKEN_USAGE_ATTRIBUTION_KEY = "token_usage_attribution"
-
-
-def _string_arg(value: Any) -> str | None:
-    if isinstance(value, str):
-        normalized = value.strip()
-        return normalized or None
-    return None
-
-
-def _normalize_todos(value: Any) -> list[Todo]:
-    if not isinstance(value, list):
-        return []
-
-    normalized: list[Todo] = []
-    for item in value:
-        if not isinstance(item, dict):
-            continue
-
-        todo: Todo = {}
-        content = _string_arg(item.get("content"))
-        status = item.get("status")
-
-        if content is not None:
-            todo["content"] = content
-        if status in {"pending", "in_progress", "completed"}:
-            todo["status"] = status
-
-        normalized.append(todo)
-
-    return normalized
-
-
-def _todo_action_kind(previous: Todo | None, current: Todo) -> str:
-    status = current.get("status")
-    previous_content = previous.get("content") if previous else None
-    current_content = current.get("content")
-
-    if previous is None:
-        if status == "completed":
-            return "todo_complete"
-        if status == "in_progress":
-            return "todo_start"
-        return "todo_update"
-
-    if previous_content != current_content:
-        return "todo_update"
-
-    if status == "completed":
-        return "todo_complete"
-    if status == "in_progress":
-        return "todo_start"
-    return "todo_update"
-
-
-def _build_todo_actions(previous_todos: list[Todo], next_todos: list[Todo]) -> list[dict[str, Any]]:
-    # This is the single source of truth for precise write_todos token
-    # attribution. The frontend intentionally falls back to a generic
-    # "Update to-do list" label when this metadata is missing or malformed.
-    previous_by_content: dict[str, list[tuple[int, Todo]]] = defaultdict(list)
-    matched_previous_indices: set[int] = set()
-
-    for index, todo in enumerate(previous_todos):
-        content = todo.get("content")
-        if isinstance(content, str) and content:
-            previous_by_content[content].append((index, todo))
-
-    actions: list[dict[str, Any]] = []
-
-    for index, todo in enumerate(next_todos):
-        content = todo.get("content")
-        if not isinstance(content, str) or not content:
-            continue
-
-        previous_match: Todo | None = None
-        content_matches = previous_by_content.get(content)
-        if content_matches:
-            while content_matches and content_matches[0][0] in matched_previous_indices:
-                content_matches.pop(0)
-            if content_matches:
-                previous_index, previous_match = content_matches.pop(0)
-                matched_previous_indices.add(previous_index)
-
-        if previous_match is None and index < len(previous_todos) and index not in matched_previous_indices:
-            previous_match = previous_todos[index]
-            matched_previous_indices.add(index)
-
-        if previous_match is not None:
-            previous_content = previous_match.get("content")
-            previous_status = previous_match.get("status")
-            if previous_content == content and previous_status == todo.get("status"):
-                continue
-
-        actions.append(
-            {
-                "kind": _todo_action_kind(previous_match, todo),
-                "content": content,
-            }
-        )
-
-    for index, todo in enumerate(previous_todos):
-        if index in matched_previous_indices:
-            continue
-
-        content = todo.get("content")
-        if not isinstance(content, str) or not content:
-            continue
-
-        actions.append(
-            {
-                "kind": "todo_remove",
-                "content": content,
-            }
-        )
-
-    return actions
-
-
-def _describe_tool_call(tool_call: dict[str, Any], todos: list[Todo]) -> list[dict[str, Any]]:
-    name = _string_arg(tool_call.get("name")) or "unknown"
-    args = tool_call.get("args") if isinstance(tool_call.get("args"), dict) else {}
-    tool_call_id = _string_arg(tool_call.get("id"))
-
-    if name == "write_todos":
-        next_todos = _normalize_todos(args.get("todos"))
-        actions = _build_todo_actions(todos, next_todos)
-        if not actions:
-            return [
-                {
-                    "kind": "tool",
-                    "tool_name": name,
-                    "tool_call_id": tool_call_id,
-                }
-            ]
-        return [
-            {
-                **action,
-                "tool_call_id": tool_call_id,
-            }
-            for action in actions
-        ]
-
-    if name == "task":
-        return [
-            {
-                "kind": "subagent",
-                "description": _string_arg(args.get("description")),
-                "subagent_type": _string_arg(args.get("subagent_type")),
-                "tool_call_id": tool_call_id,
-            }
-        ]
-
-    if name in {"web_search", "image_search"}:
-        query = _string_arg(args.get("query"))
-        return [
-            {
-                "kind": "search",
-                "tool_name": name,
-                "query": query,
-                "tool_call_id": tool_call_id,
-            }
-        ]
-
-    if name == "present_files":
-        return [
-            {
-                "kind": "present_files",
-                "tool_call_id": tool_call_id,
-            }
-        ]
-
-    if name == "ask_clarification":
-        return [
-            {
-                "kind": "clarification",
-                "tool_call_id": tool_call_id,
-            }
-        ]
-
-    return [
-        {
-            "kind": "tool",
-            "tool_name": name,
-            "description": _string_arg(args.get("description")),
-            "tool_call_id": tool_call_id,
-        }
-    ]
-
-
-def _infer_step_kind(message: AIMessage, actions: list[dict[str, Any]]) -> str:
-    if actions:
-        first_kind = actions[0].get("kind")
-        if len(actions) == 1 and first_kind in {"todo_start", "todo_complete", "todo_update", "todo_remove"}:
-            return "todo_update"
-        if len(actions) == 1 and first_kind == "subagent":
-            return "subagent_dispatch"
-        return "tool_batch"
-
-    if message.content:
-        return "final_answer"
-    return "thinking"
-
-
-def _build_attribution(message: AIMessage, todos: list[Todo]) -> dict[str, Any]:
-    tool_calls = getattr(message, "tool_calls", None) or []
-    actions: list[dict[str, Any]] = []
-    current_todos = list(todos)
-
-    for raw_tool_call in tool_calls:
-        if not isinstance(raw_tool_call, dict):
-            continue
-
-        described_actions = _describe_tool_call(raw_tool_call, current_todos)
-        actions.extend(described_actions)
-
-        if raw_tool_call.get("name") == "write_todos":
-            args = raw_tool_call.get("args") if isinstance(raw_tool_call.get("args"), dict) else {}
-            current_todos = _normalize_todos(args.get("todos"))
-
-    tool_call_ids: list[str] = []
-    for tool_call in tool_calls:
-        if not isinstance(tool_call, dict):
-            continue
-
-        tool_call_id = _string_arg(tool_call.get("id"))
-        if tool_call_id is not None:
-            tool_call_ids.append(tool_call_id)
-
-    return {
-        # Schema changes should remain additive where possible so older
-        # frontends can ignore unknown fields and fall back safely.
-        "version": 1,
-        "kind": _infer_step_kind(message, actions),
-        "shared_attribution": len(actions) > 1,
-        "tool_call_ids": tool_call_ids,
-        "actions": actions,
-    }
-

 class TokenUsageMiddleware(AgentMiddleware):
-    """Logs token usage from model responses and annotates the AI step."""
-
-    def _apply(self, state: AgentState) -> dict | None:
-        messages = state.get("messages", [])
-        if not messages:
-            return None
-
-        last = messages[-1]
-        if not isinstance(last, AIMessage):
-            return None
-
-        usage = getattr(last, "usage_metadata", None)
-        if usage:
-            input_token_details = usage.get("input_token_details") or {}
-            output_token_details = usage.get("output_token_details") or {}
-            detail_parts = []
-            if input_token_details:
-                detail_parts.append(f"input_token_details={input_token_details}")
-            if output_token_details:
-                detail_parts.append(f"output_token_details={output_token_details}")
-            detail_suffix = f" {' '.join(detail_parts)}" if detail_parts else ""
-            logger.info(
-                "LLM token usage: input=%s output=%s total=%s%s",
-                usage.get("input_tokens", "?"),
-                usage.get("output_tokens", "?"),
-                usage.get("total_tokens", "?"),
-                detail_suffix,
-            )
-
-        todos = state.get("todos") or []
-        attribution = _build_attribution(last, todos if isinstance(todos, list) else [])
-        additional_kwargs = dict(getattr(last, "additional_kwargs", {}) or {})
-
-        if additional_kwargs.get(TOKEN_USAGE_ATTRIBUTION_KEY) == attribution:
-            return None
-
-        additional_kwargs[TOKEN_USAGE_ATTRIBUTION_KEY] = attribution
-        updated_msg = last.model_copy(update={"additional_kwargs": additional_kwargs})
-        return {"messages": [updated_msg]}
+    """Logs token usage from model response usage_metadata."""

    @override
    def after_model(self, state: AgentState, runtime: Runtime) -> dict | None:
-        return self._apply(state)
+        return self._log_usage(state)

    @override
    async def aafter_model(self, state: AgentState, runtime: Runtime) -> dict | None:
-        return self._apply(state)
+        return self._log_usage(state)
+
+    def _log_usage(self, state: AgentState) -> None:
+        messages = state.get("messages", [])
+        if not messages:
+            return None
+        last = messages[-1]
+        usage = getattr(last, "usage_metadata", None)
+        if usage:
+            logger.info(
+                "LLM token usage: input=%s output=%s total=%s",
+                usage.get("input_tokens", "?"),
+                usage.get("output_tokens", "?"),
+                usage.get("total_tokens", "?"),
+            )
+        return None
@@ -1,50 +0,0 @@
-"""Helpers for keeping AIMessage tool-call metadata consistent."""
-
-from __future__ import annotations
-
-from typing import Any
-
-from langchain_core.messages import AIMessage
-
-
-def _raw_tool_call_id(raw_tool_call: Any) -> str | None:
-    if not isinstance(raw_tool_call, dict):
-        return None
-
-    raw_id = raw_tool_call.get("id")
-    return raw_id if isinstance(raw_id, str) and raw_id else None
-
-
-def clone_ai_message_with_tool_calls(
-    message: AIMessage,
-    tool_calls: list[dict[str, Any]],
-    *,
-    content: Any | None = None,
-) -> AIMessage:
-    """Clone an AIMessage while keeping raw provider tool-call metadata in sync."""
-    kept_ids = {tc["id"] for tc in tool_calls if isinstance(tc.get("id"), str) and tc["id"]}
-
-    update: dict[str, Any] = {"tool_calls": tool_calls}
-    if content is not None:
-        update["content"] = content
-
-    additional_kwargs = dict(getattr(message, "additional_kwargs", {}) or {})
-    raw_tool_calls = additional_kwargs.get("tool_calls")
-    if isinstance(raw_tool_calls, list):
-        synced_raw_tool_calls = [raw_tc for raw_tc in raw_tool_calls if _raw_tool_call_id(raw_tc) in kept_ids]
-        if synced_raw_tool_calls:
-            additional_kwargs["tool_calls"] = synced_raw_tool_calls
-        else:
-            additional_kwargs.pop("tool_calls", None)
-
-    if not tool_calls:
-        additional_kwargs.pop("function_call", None)
-
-    update["additional_kwargs"] = additional_kwargs
-
-    response_metadata = dict(getattr(message, "response_metadata", {}) or {})
-    if not tool_calls and response_metadata.get("finish_reason") == "tool_calls":
-        response_metadata["finish_reason"] = "stop"
-    update["response_metadata"] = response_metadata
-
-    return message.model_copy(update=update)
@@ -228,14 +228,21 @@ class DeerFlowClient:
        max_concurrent_subagents = cfg.get("max_concurrent_subagents", 3)

        kwargs: dict[str, Any] = {
-            "model": create_chat_model(name=model_name, thinking_enabled=thinking_enabled),
+            "model": create_chat_model(name=model_name, thinking_enabled=thinking_enabled, app_config=self._app_config),
            "tools": self._get_tools(model_name=model_name, subagent_enabled=subagent_enabled),
-            "middleware": _build_middlewares(config, model_name=model_name, agent_name=self._agent_name, custom_middlewares=self._middlewares),
+            "middleware": _build_middlewares(
+                config,
+                model_name=model_name,
+                agent_name=self._agent_name,
+                custom_middlewares=self._middlewares,
+                app_config=self._app_config,
+            ),
            "system_prompt": apply_prompt_template(
                subagent_enabled=subagent_enabled,
                max_concurrent_subagents=max_concurrent_subagents,
                agent_name=self._agent_name,
                available_skills=self._available_skills,
+                app_config=self._app_config,
            ),
            "state_schema": ThreadState,
        }
@@ -243,7 +250,7 @@ class DeerFlowClient:
        if checkpointer is None:
            from deerflow.runtime.checkpointer import get_checkpointer

-            checkpointer = get_checkpointer()
+            checkpointer = get_checkpointer(app_config=self._app_config)
        if checkpointer is not None:
            kwargs["checkpointer"] = checkpointer

@@ -251,12 +258,15 @@ class DeerFlowClient:
        self._agent_config_key = key
        logger.info("Agent created: agent_name=%s, model=%s, thinking=%s", self._agent_name, model_name, thinking_enabled)

-    @staticmethod
-    def _get_tools(*, model_name: str | None, subagent_enabled: bool):
+    def _get_tools(self, *, model_name: str | None, subagent_enabled: bool):
        """Lazy import to avoid circular dependency at module level."""
        from deerflow.tools import get_available_tools

-        return get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled)
+        return get_available_tools(
+            model_name=model_name,
+            subagent_enabled=subagent_enabled,
+            app_config=self._app_config,
+        )

    @staticmethod
    def _serialize_tool_calls(tool_calls) -> list[dict]:
@@ -264,35 +274,25 @@ class DeerFlowClient:
        return [{"name": tc["name"], "args": tc["args"], "id": tc.get("id")} for tc in tool_calls]

    @staticmethod
-    def _serialize_additional_kwargs(msg) -> dict[str, Any] | None:
-        """Copy message additional_kwargs when present."""
-        additional_kwargs = getattr(msg, "additional_kwargs", None)
-        if isinstance(additional_kwargs, dict) and additional_kwargs:
-            return dict(additional_kwargs)
-        return None
-
-    @staticmethod
-    def _ai_text_event(msg_id: str | None, text: str, usage: dict | None, additional_kwargs: dict[str, Any] | None = None) -> "StreamEvent":
-        """Build a ``messages-tuple`` AI text event."""
+    def _ai_text_event(msg_id: str | None, text: str, usage: dict | None) -> "StreamEvent":
+        """Build a ``messages-tuple`` AI text event, attaching usage when present."""
        data: dict[str, Any] = {"type": "ai", "content": text, "id": msg_id}
        if usage:
            data["usage_metadata"] = usage
-        if additional_kwargs:
-            data["additional_kwargs"] = additional_kwargs
        return StreamEvent(type="messages-tuple", data=data)

    @staticmethod
-    def _ai_tool_calls_event(msg_id: str | None, tool_calls, additional_kwargs: dict[str, Any] | None = None) -> "StreamEvent":
+    def _ai_tool_calls_event(msg_id: str | None, tool_calls) -> "StreamEvent":
        """Build a ``messages-tuple`` AI tool-calls event."""
-        data: dict[str, Any] = {
-            "type": "ai",
-            "content": "",
-            "id": msg_id,
-            "tool_calls": DeerFlowClient._serialize_tool_calls(tool_calls),
-        }
-        if additional_kwargs:
-            data["additional_kwargs"] = additional_kwargs
-        return StreamEvent(type="messages-tuple", data=data)
+        return StreamEvent(
+            type="messages-tuple",
+            data={
+                "type": "ai",
+                "content": "",
+                "id": msg_id,
+                "tool_calls": DeerFlowClient._serialize_tool_calls(tool_calls),
+            },
+        )

    @staticmethod
    def _tool_message_event(msg: ToolMessage) -> "StreamEvent":
@@ -317,30 +317,19 @@ class DeerFlowClient:
                d["tool_calls"] = DeerFlowClient._serialize_tool_calls(msg.tool_calls)
            if getattr(msg, "usage_metadata", None):
                d["usage_metadata"] = msg.usage_metadata
-            if additional_kwargs := DeerFlowClient._serialize_additional_kwargs(msg):
-                d["additional_kwargs"] = additional_kwargs
            return d
        if isinstance(msg, ToolMessage):
-            d = {
+            return {
                "type": "tool",
                "content": DeerFlowClient._extract_text(msg.content),
                "name": getattr(msg, "name", None),
                "tool_call_id": getattr(msg, "tool_call_id", None),
                "id": getattr(msg, "id", None),
            }
-            if additional_kwargs := DeerFlowClient._serialize_additional_kwargs(msg):
-                d["additional_kwargs"] = additional_kwargs
-            return d
        if isinstance(msg, HumanMessage):
-            d = {"type": "human", "content": msg.content, "id": getattr(msg, "id", None)}
-            if additional_kwargs := DeerFlowClient._serialize_additional_kwargs(msg):
-                d["additional_kwargs"] = additional_kwargs
-            return d
+            return {"type": "human", "content": msg.content, "id": getattr(msg, "id", None)}
        if isinstance(msg, SystemMessage):
-            d = {"type": "system", "content": msg.content, "id": getattr(msg, "id", None)}
-            if additional_kwargs := DeerFlowClient._serialize_additional_kwargs(msg):
-                d["additional_kwargs"] = additional_kwargs
-            return d
+            return {"type": "system", "content": msg.content, "id": getattr(msg, "id", None)}
        return {"type": "unknown", "content": str(msg), "id": getattr(msg, "id", None)}

    @staticmethod
@@ -398,7 +387,7 @@ class DeerFlowClient:
        if checkpointer is None:
            from deerflow.runtime.checkpointer.provider import get_checkpointer

-            checkpointer = get_checkpointer()
+            checkpointer = get_checkpointer(app_config=self._app_config)

        thread_info_map = {}

@@ -453,7 +442,7 @@ class DeerFlowClient:
        if checkpointer is None:
            from deerflow.runtime.checkpointer.provider import get_checkpointer

-            checkpointer = get_checkpointer()
+            checkpointer = get_checkpointer(app_config=self._app_config)

        config = {"configurable": {"thread_id": thread_id}}
        checkpoints = []
@@ -563,7 +552,6 @@ class DeerFlowClient:
            - type="messages-tuple"  data={"type": "ai", "content": <delta>, "id": str}
            - type="messages-tuple"  data={"type": "ai", "content": <delta>, "id": str, "usage_metadata": {...}}
            - type="messages-tuple"  data={"type": "ai", "content": "", "id": str, "tool_calls": [...]}
-            - type="messages-tuple"  data={"type": "ai", "content": "", "id": str, "additional_kwargs": {...}}
            - type="messages-tuple"  data={"type": "tool", "content": str, "name": str, "tool_call_id": str, "id": str}
            - type="end"             data={"usage": {"input_tokens": int, "output_tokens": int, "total_tokens": int}}
        """
@@ -586,7 +574,6 @@ class DeerFlowClient:
        # in both the final ``messages`` chunk and the values snapshot —
        # count it only on whichever arrives first.
        counted_usage_ids: set[str] = set()
-        sent_additional_kwargs_by_id: dict[str, dict[str, Any]] = {}
        cumulative_usage: dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}

        def _account_usage(msg_id: str | None, usage: Any) -> dict | None:
@@ -616,20 +603,6 @@ class DeerFlowClient:
                "total_tokens": total_tokens,
            }

-        def _unsent_additional_kwargs(msg_id: str | None, additional_kwargs: dict[str, Any] | None) -> dict[str, Any] | None:
-            if not additional_kwargs:
-                return None
-            if not msg_id:
-                return additional_kwargs
-
-            sent = sent_additional_kwargs_by_id.setdefault(msg_id, {})
-            delta = {key: value for key, value in additional_kwargs.items() if sent.get(key) != value}
-            if not delta:
-                return None
-
-            sent.update(delta)
-            return delta
-
        for item in self._agent.stream(
            state,
            config=config,
@@ -657,31 +630,17 @@ class DeerFlowClient:

                if isinstance(msg_chunk, AIMessage):
                    text = self._extract_text(msg_chunk.content)
-                    additional_kwargs = self._serialize_additional_kwargs(msg_chunk)
                    counted_usage = _account_usage(msg_id, msg_chunk.usage_metadata)
-                    sent_additional_kwargs = False

                    if text:
                        if msg_id:
                            streamed_ids.add(msg_id)
-                        additional_kwargs_delta = _unsent_additional_kwargs(msg_id, additional_kwargs)
-                        yield self._ai_text_event(
-                            msg_id,
-                            text,
-                            counted_usage,
-                            additional_kwargs_delta,
-                        )
-                        sent_additional_kwargs = bool(additional_kwargs_delta)
+                        yield self._ai_text_event(msg_id, text, counted_usage)

                    if msg_chunk.tool_calls:
                        if msg_id:
                            streamed_ids.add(msg_id)
-                        additional_kwargs_delta = None if sent_additional_kwargs else _unsent_additional_kwargs(msg_id, additional_kwargs)
-                        yield self._ai_tool_calls_event(
-                            msg_id,
-                            msg_chunk.tool_calls,
-                            additional_kwargs_delta,
-                        )
+                        yield self._ai_tool_calls_event(msg_id, msg_chunk.tool_calls)

                elif isinstance(msg_chunk, ToolMessage):
                    if msg_id:
@@ -704,45 +663,17 @@ class DeerFlowClient:
                if msg_id and msg_id in streamed_ids:
                    if isinstance(msg, AIMessage):
                        _account_usage(msg_id, getattr(msg, "usage_metadata", None))
-                        additional_kwargs = self._serialize_additional_kwargs(msg)
-                        additional_kwargs_delta = _unsent_additional_kwargs(msg_id, additional_kwargs)
-                        if additional_kwargs_delta:
-                            # Metadata-only follow-up: ``messages-tuple`` has no
-                            # dedicated attribution event, so clients should
-                            # merge this empty-content AI event by message id
-                            # and ignore it for text rendering.
-                            yield self._ai_text_event(msg_id, "", None, additional_kwargs_delta)
                    continue

                if isinstance(msg, AIMessage):
                    counted_usage = _account_usage(msg_id, msg.usage_metadata)
-                    additional_kwargs = self._serialize_additional_kwargs(msg)
-                    sent_additional_kwargs = False

                    if msg.tool_calls:
-                        additional_kwargs_delta = _unsent_additional_kwargs(msg_id, additional_kwargs)
-                        yield self._ai_tool_calls_event(
-                            msg_id,
-                            msg.tool_calls,
-                            additional_kwargs_delta,
-                        )
-                        sent_additional_kwargs = bool(additional_kwargs_delta)
+                        yield self._ai_tool_calls_event(msg_id, msg.tool_calls)

                    text = self._extract_text(msg.content)
                    if text:
-                        additional_kwargs_delta = None if sent_additional_kwargs else _unsent_additional_kwargs(msg_id, additional_kwargs)
-                        yield self._ai_text_event(
-                            msg_id,
-                            text,
-                            counted_usage,
-                            additional_kwargs_delta,
-                        )
-                    elif msg_id:
-                        additional_kwargs_delta = None if sent_additional_kwargs else _unsent_additional_kwargs(msg_id, additional_kwargs)
-                        if not additional_kwargs_delta:
-                            continue
-                        # See the metadata-only follow-up convention above.
-                        yield self._ai_text_event(msg_id, "", None, additional_kwargs_delta)
+                        yield self._ai_text_event(msg_id, text, counted_usage)

                elif isinstance(msg, ToolMessage):
                    yield self._tool_message_event(msg)
@@ -84,52 +84,8 @@ class RemoteSandboxBackend(SandboxBackend):
        """
        return self._provisioner_discover(sandbox_id)

-    def list_running(self) -> list[SandboxInfo]:
-        """Return all sandboxes currently managed by the provisioner.
-
-        Calls ``GET /api/sandboxes`` so that ``AioSandboxProvider._reconcile_orphans()``
-        can adopt pods that were created by a previous process and were never
-        explicitly destroyed.
-        Without this, a process restart silently orphans all existing k8s Pods —
-        they stay running forever because the idle checker only
-        tracks in-process state.
-        """
-        return self._provisioner_list()
-
    # ── Provisioner API calls ─────────────────────────────────────────────

-    def _provisioner_list(self) -> list[SandboxInfo]:
-        """GET /api/sandboxes → list all running sandboxes."""
-        try:
-            resp = requests.get(f"{self._provisioner_url}/api/sandboxes", timeout=10)
-            resp.raise_for_status()
-            data = resp.json()
-            if not isinstance(data, dict):
-                logger.warning("Provisioner list_running returned non-dict payload: %r", type(data))
-                return []
-
-            sandboxes = data.get("sandboxes", [])
-            if not isinstance(sandboxes, list):
-                logger.warning("Provisioner list_running returned non-list sandboxes: %r", type(sandboxes))
-                return []
-
-            infos: list[SandboxInfo] = []
-            for sandbox in sandboxes:
-                if not isinstance(sandbox, dict):
-                    logger.warning("Provisioner list_running entry is not a dict: %r", type(sandbox))
-                    continue
-
-                sandbox_id = sandbox.get("sandbox_id")
-                sandbox_url = sandbox.get("sandbox_url")
-                if isinstance(sandbox_id, str) and sandbox_id and isinstance(sandbox_url, str) and sandbox_url:
-                    infos.append(SandboxInfo(sandbox_id=sandbox_id, sandbox_url=sandbox_url))
-
-            logger.info("Provisioner list_running: %d sandbox(es) found", len(infos))
-            return infos
-        except requests.RequestException as exc:
-            logger.warning("Provisioner list_running failed: %s", exc)
-            return []
-
    def _provisioner_create(self, thread_id: str, sandbox_id: str, extra_mounts: list[tuple[str, str, bool]] | None = None) -> SandboxInfo:
        """POST /api/sandboxes → create Pod + Service."""
        try:
@@ -1,3 +0,0 @@
-from .tools import web_search_tool
-
-__all__ = ["web_search_tool"]
@@ -1,95 +0,0 @@
-"""
-Web Search Tool - Search the web using Serper (Google Search API).
-
-Serper provides real-time Google Search results via a JSON API.
-An API key is required. Sign up at https://serper.dev to get one.
-"""
-
-import json
-import logging
-import os
-
-import httpx
-from langchain.tools import tool
-
-from deerflow.config import get_app_config
-
-logger = logging.getLogger(__name__)
-
-_SERPER_ENDPOINT = "https://google.serper.dev/search"
-_api_key_warned = False
-
-
-def _get_api_key() -> str | None:
-    config = get_app_config().get_tool_config("web_search")
-    if config is not None:
-        api_key = config.model_extra.get("api_key")
-        if isinstance(api_key, str) and api_key.strip():
-            return api_key
-    return os.getenv("SERPER_API_KEY")
-
-
-@tool("web_search", parse_docstring=True)
-def web_search_tool(query: str, max_results: int = 5) -> str:
-    """Search the web for information using Google Search via Serper.
-
-    Args:
-        query: Search keywords describing what you want to find. Be specific for better results.
-        max_results: Maximum number of search results to return. Default is 5.
-    """
-    global _api_key_warned
-
-    config = get_app_config().get_tool_config("web_search")
-    if config is not None and "max_results" in config.model_extra:
-        max_results = config.model_extra.get("max_results", max_results)
-
-    api_key = _get_api_key()
-    if not api_key:
-        if not _api_key_warned:
-            _api_key_warned = True
-            logger.warning("Serper API key is not set. Set SERPER_API_KEY in your environment or provide api_key in config.yaml. Sign up at https://serper.dev")
-        return json.dumps(
-            {"error": "SERPER_API_KEY is not configured", "query": query},
-            ensure_ascii=False,
-        )
-
-    headers = {
-        "X-API-KEY": api_key,
-        "Content-Type": "application/json",
-    }
-    payload = {"q": query, "num": max_results}
-
-    try:
-        with httpx.Client(timeout=30) as client:
-            response = client.post(_SERPER_ENDPOINT, headers=headers, json=payload)
-        response.raise_for_status()
-        data = response.json()
-    except httpx.HTTPStatusError as e:
-        logger.error(f"Serper API returned HTTP {e.response.status_code}: {e.response.text}")
-        return json.dumps(
-            {"error": f"Serper API error: HTTP {e.response.status_code}", "query": query},
-            ensure_ascii=False,
-        )
-    except Exception as e:
-        logger.error(f"Serper search failed: {type(e).__name__}: {e}")
-        return json.dumps({"error": str(e), "query": query}, ensure_ascii=False)
-
-    organic = data.get("organic", [])
-    if not organic:
-        return json.dumps({"error": "No results found", "query": query}, ensure_ascii=False)
-
-    normalized_results = [
-        {
-            "title": r.get("title", ""),
-            "url": r.get("link", ""),
-            "content": r.get("snippet", ""),
-        }
-        for r in organic[:max_results]
-    ]
-
-    output = {
-        "query": query,
-        "total_results": len(normalized_results),
-        "results": normalized_results,
-    }
-    return json.dumps(output, indent=2, ensure_ascii=False)
@@ -1,6 +1,5 @@
 from .app_config import get_app_config
 from .extensions_config import ExtensionsConfig, get_extensions_config
-from .loop_detection_config import LoopDetectionConfig
 from .memory_config import MemoryConfig, get_memory_config
 from .paths import Paths, get_paths
 from .skill_evolution_config import SkillEvolutionConfig
@@ -21,7 +20,6 @@ __all__ = [
    "SkillsConfig",
    "ExtensionsConfig",
    "get_extensions_config",
-    "LoopDetectionConfig",
    "MemoryConfig",
    "get_memory_config",
    "get_tracing_config",
@@ -1,22 +1,13 @@
-"""Configuration and loaders for custom agents.
-
-Custom agents are stored per-user under ``{base_dir}/users/{user_id}/agents/{name}/``.
-A legacy shared layout at ``{base_dir}/agents/{name}/`` is still readable so that
-installations that pre-date user isolation continue to work until they run the
-``scripts/migrate_user_isolation.py`` migration. New writes always target the
-per-user layout.
-"""
+"""Configuration and loaders for custom agents."""

 import logging
 import re
-from pathlib import Path
 from typing import Any

 import yaml
 from pydantic import BaseModel

 from deerflow.config.paths import get_paths
-from deerflow.runtime.user_context import get_effective_user_id

 logger = logging.getLogger(__name__)

@@ -49,47 +40,14 @@ class AgentConfig(BaseModel):
    skills: list[str] | None = None


-def resolve_agent_dir(name: str, *, user_id: str | None = None) -> Path:
-    """Return the on-disk directory for an agent, preferring the per-user layout.
-
-    Resolution order:
-    1. ``{base_dir}/users/{user_id}/agents/{name}/`` (per-user, current layout).
-    2. ``{base_dir}/agents/{name}/`` (legacy shared layout — read-only fallback).
-
-    If neither exists, the per-user path is returned so callers that intend to
-    create the agent write into the new layout.
-
-    Args:
-        name: Validated agent name.
-        user_id: Owner of the agent. Defaults to the effective user from the
-            request context (or ``"default"`` in no-auth mode).
-    """
-    paths = get_paths()
-    effective_user = user_id or get_effective_user_id()
-    user_path = paths.user_agent_dir(effective_user, name)
-    if user_path.exists():
-        return user_path
-
-    legacy_path = paths.agent_dir(name)
-    if legacy_path.exists():
-        return legacy_path
-
-    return user_path
-
-
-def load_agent_config(name: str | None, *, user_id: str | None = None) -> AgentConfig | None:
+def load_agent_config(name: str | None) -> AgentConfig | None:
    """Load the custom or default agent's config from its directory.

-    Reads from the per-user layout first; falls back to the legacy shared layout
-    for installations that have not yet been migrated.
-
    Args:
        name: The agent name.
-        user_id: Owner of the agent. Defaults to the effective user from the
-            current request context.

    Returns:
-        AgentConfig instance, or ``None`` if ``name`` is ``None``.
+        AgentConfig instance.

    Raises:
        FileNotFoundError: If the agent directory or config.yaml does not exist.
@@ -100,7 +58,7 @@ def load_agent_config(name: str | None, *, user_id: str | None = None) -> AgentC
        return None

    name = validate_agent_name(name)
-    agent_dir = resolve_agent_dir(name, user_id=user_id)
+    agent_dir = get_paths().agent_dir(name)
    config_file = agent_dir / "config.yaml"

    if not agent_dir.exists():
@@ -126,7 +84,7 @@ def load_agent_config(name: str | None, *, user_id: str | None = None) -> AgentC
    return AgentConfig(**data)


-def load_agent_soul(agent_name: str | None, *, user_id: str | None = None) -> str | None:
+def load_agent_soul(agent_name: str | None) -> str | None:
    """Read the SOUL.md file for a custom agent, if it exists.

    SOUL.md defines the agent's personality, values, and behavioral guardrails.
@@ -134,16 +92,11 @@ def load_agent_soul(agent_name: str | None, *, user_id: str | None = None) -> st

    Args:
        agent_name: The name of the agent or None for the default agent.
-        user_id: Owner of the agent. Defaults to the effective user from the
-            current request context.

    Returns:
        The SOUL.md content as a string, or None if the file does not exist.
    """
-    if agent_name:
-        agent_dir = resolve_agent_dir(agent_name, user_id=user_id)
-    else:
-        agent_dir = get_paths().base_dir
+    agent_dir = get_paths().agent_dir(agent_name) if agent_name else get_paths().base_dir
    soul_path = agent_dir / SOUL_FILENAME
    if not soul_path.exists():
        return None
@@ -151,50 +104,32 @@ def load_agent_soul(agent_name: str | None, *, user_id: str | None = None) -> st
    return content or None


-def list_custom_agents(*, user_id: str | None = None) -> list[AgentConfig]:
+def list_custom_agents() -> list[AgentConfig]:
    """Scan the agents directory and return all valid custom agents.

-    Returns the union of agents in the per-user layout and the legacy shared
-    layout, so that pre-migration installations remain visible until they are
-    migrated. Per-user entries shadow legacy entries with the same name.
-
-    Args:
-        user_id: Owner whose agents to list. Defaults to the effective user
-            from the current request context.
-
    Returns:
        List of AgentConfig for each valid agent directory found.
    """
-    paths = get_paths()
-    effective_user = user_id or get_effective_user_id()
+    agents_dir = get_paths().agents_dir
+
+    if not agents_dir.exists():
+        return []

-    seen: set[str] = set()
    agents: list[AgentConfig] = []

-    user_root = paths.user_agents_dir(effective_user)
-    legacy_root = paths.agents_dir
-
-    for root in (user_root, legacy_root):
-        if not root.exists():
+    for entry in sorted(agents_dir.iterdir()):
+        if not entry.is_dir():
            continue
-        for entry in sorted(root.iterdir()):
-            if not entry.is_dir():
-                continue
-            if entry.name in seen:
-                continue
-            config_file = entry / "config.yaml"
-            if not config_file.exists():
-                logger.debug(f"Skipping {entry.name}: no config.yaml")
-                continue

-            try:
-                agent_cfg = load_agent_config(entry.name, user_id=effective_user)
-                if agent_cfg is None:
-                    continue
-                agents.append(agent_cfg)
-                seen.add(entry.name)
-            except Exception as e:
-                logger.warning(f"Skipping agent '{entry.name}': {e}")
+        config_file = entry / "config.yaml"
+        if not config_file.exists():
+            logger.debug(f"Skipping {entry.name}: no config.yaml")
+            continue
+
+        try:
+            agent_cfg = load_agent_config(entry.name)
+            agents.append(agent_cfg)
+        except Exception as e:
+            logger.warning(f"Skipping agent '{entry.name}': {e}")

-    agents.sort(key=lambda a: a.name)
    return agents
@@ -1,6 +1,5 @@
 import logging
 import os
-from collections.abc import Mapping
 from contextvars import ContextVar
 from pathlib import Path
 from typing import Any, Self
@@ -15,7 +14,6 @@ from deerflow.config.checkpointer_config import CheckpointerConfig, load_checkpo
 from deerflow.config.database_config import DatabaseConfig
 from deerflow.config.extensions_config import ExtensionsConfig
 from deerflow.config.guardrails_config import GuardrailsConfig, load_guardrails_config_from_dict
-from deerflow.config.loop_detection_config import LoopDetectionConfig
 from deerflow.config.memory_config import MemoryConfig, load_memory_config_from_dict
 from deerflow.config.model_config import ModelConfig
 from deerflow.config.run_events_config import RunEventsConfig
@@ -101,7 +99,6 @@ class AppConfig(BaseModel):
    subagents: SubagentsAppConfig = Field(default_factory=SubagentsAppConfig, description="Subagent runtime configuration")
    guardrails: GuardrailsConfig = Field(default_factory=GuardrailsConfig, description="Guardrail middleware configuration")
    circuit_breaker: CircuitBreakerConfig = Field(default_factory=CircuitBreakerConfig, description="LLM circuit breaker configuration")
-    loop_detection: LoopDetectionConfig = Field(default_factory=LoopDetectionConfig, description="Loop detection middleware configuration")
    model_config = ConfigDict(extra="allow")
    database: DatabaseConfig = Field(default_factory=DatabaseConfig, description="Unified database backend configuration")
    run_events: RunEventsConfig = Field(default_factory=RunEventsConfig, description="Run event storage configuration")
@@ -160,54 +157,56 @@ class AppConfig(BaseModel):
        config_data = cls.resolve_env_variables(config_data)
        cls._apply_database_defaults(config_data)

+        # Load title config if present
+        if "title" in config_data:
+            load_title_config_from_dict(config_data["title"])
+
+        # Load summarization config if present
+        if "summarization" in config_data:
+            load_summarization_config_from_dict(config_data["summarization"])
+
+        # Load memory config if present
+        if "memory" in config_data:
+            load_memory_config_from_dict(config_data["memory"])
+
+        # Always refresh agents API config so removed config sections reset
+        # singleton-backed state to its default/disabled values on reload.
+        load_agents_api_config_from_dict(config_data.get("agents_api") or {})
+
+        # Load subagents config if present
+        if "subagents" in config_data:
+            load_subagents_config_from_dict(config_data["subagents"])
+
+        # Load tool_search config if present
+        if "tool_search" in config_data:
+            load_tool_search_config_from_dict(config_data["tool_search"])
+
+        # Load guardrails config if present
+        if "guardrails" in config_data:
+            load_guardrails_config_from_dict(config_data["guardrails"])
+
        # Load circuit_breaker config if present
        if "circuit_breaker" in config_data:
            config_data["circuit_breaker"] = config_data["circuit_breaker"]

+        # Load checkpointer config if present
+        if "checkpointer" in config_data:
+            load_checkpointer_config_from_dict(config_data["checkpointer"])
+
+        # Load stream bridge config if present
+        if "stream_bridge" in config_data:
+            load_stream_bridge_config_from_dict(config_data["stream_bridge"])
+
+        # Always refresh ACP agent config so removed entries do not linger across reloads.
+        load_acp_config_from_dict(config_data.get("acp_agents", {}))
+
        # Load extensions config separately (it's in a different file)
        extensions_config = ExtensionsConfig.from_file()
        config_data["extensions"] = extensions_config.model_dump()

        result = cls.model_validate(config_data)
-        acp_agents = cls._validate_acp_agents(config_data.get("acp_agents", {}))
-        cls._apply_singleton_configs(result, acp_agents)
        return result

-    @classmethod
-    def _validate_acp_agents(
-        cls,
-        config_data: Mapping[str, Mapping[str, object]] | None,
-    ) -> dict[str, ACPAgentConfig]:
-        if config_data is None:
-            config_data = {}
-        return {name: ACPAgentConfig(**cfg) for name, cfg in config_data.items()}
-
-    @classmethod
-    def _apply_singleton_configs(cls, config: Self, acp_agents: dict[str, ACPAgentConfig]) -> None:
-        from deerflow.config.checkpointer_config import get_checkpointer_config
-
-        previous_checkpointer_config = get_checkpointer_config()
-
-        load_title_config_from_dict(config.title.model_dump())
-        load_summarization_config_from_dict(config.summarization.model_dump())
-        load_memory_config_from_dict(config.memory.model_dump())
-        load_agents_api_config_from_dict(config.agents_api.model_dump())
-        load_subagents_config_from_dict(config.subagents.model_dump())
-        load_tool_search_config_from_dict(config.tool_search.model_dump())
-        load_guardrails_config_from_dict(config.guardrails.model_dump())
-        load_checkpointer_config_from_dict(config.checkpointer.model_dump() if config.checkpointer is not None else None)
-        load_stream_bridge_config_from_dict(config.stream_bridge.model_dump() if config.stream_bridge is not None else None)
-        load_acp_config_from_dict({name: agent.model_dump() for name, agent in acp_agents.items()})
-
-        if previous_checkpointer_config != config.checkpointer:
-            # These runtime singletons derive their backend from checkpointer config.
-            # Keep imports local to avoid cycles: both providers import get_app_config.
-            from deerflow.runtime.checkpointer import reset_checkpointer
-            from deerflow.runtime.store import reset_store
-
-            reset_checkpointer()
-            reset_store()
-
    @classmethod
    def _apply_database_defaults(cls, config_data: dict[str, Any]) -> None:
        """Apply config.yaml defaults for persistence when the section is absent."""
@@ -14,13 +14,12 @@ class CheckpointerConfig(BaseModel):
        description="Checkpointer backend type. "
        "'memory' is in-process only (lost on restart). "
        "'sqlite' persists to a local file (requires langgraph-checkpoint-sqlite). "
-        "'postgres' persists to PostgreSQL (install with deerflow-harness[postgres])."
+        "'postgres' persists to PostgreSQL (requires langgraph-checkpoint-postgres)."
    )
    connection_string: str | None = Field(
        default=None,
        description="Connection string for sqlite (file path) or postgres (DSN). "
-        "Optional for sqlite and defaults to 'store.db' when omitted. "
-        "Required for postgres. "
+        "Required for sqlite and postgres types. "
        "For sqlite, use a file path like '.deer-flow/checkpoints.db' or ':memory:' for in-memory. "
        "For postgres, use a DSN like 'postgresql://user:pass@localhost:5432/db'.",
    )
@@ -41,10 +40,7 @@ def set_checkpointer_config(config: CheckpointerConfig | None) -> None:
    _checkpointer_config = config


-def load_checkpointer_config_from_dict(config_dict: dict | None) -> None:
+def load_checkpointer_config_from_dict(config_dict: dict) -> None:
    """Load checkpointer configuration from a dictionary."""
    global _checkpointer_config
-    if config_dict is None:
-        _checkpointer_config = None
-        return
    _checkpointer_config = CheckpointerConfig(**config_dict)
@@ -1,73 +0,0 @@
-"""Configuration for loop detection middleware."""
-
-from pydantic import BaseModel, Field, model_validator
-
-
-class ToolFreqOverride(BaseModel):
-    """Per-tool frequency threshold override.
-
-    Can be higher or lower than the global defaults. Commonly used to raise
-    thresholds for high-frequency tools like bash in batch workflows (e.g.
-    RNA-seq pipelines) without weakening protection on every other tool.
-    """
-
-    warn: int = Field(ge=1)
-    hard_limit: int = Field(ge=1)
-
-    @model_validator(mode="after")
-    def _validate(self) -> "ToolFreqOverride":
-        if self.hard_limit < self.warn:
-            raise ValueError("hard_limit must be >= warn")
-        return self
-
-
-class LoopDetectionConfig(BaseModel):
-    """Configuration for repetitive tool-call loop detection."""
-
-    enabled: bool = Field(
-        default=True,
-        description="Whether to enable repetitive tool-call loop detection",
-    )
-    warn_threshold: int = Field(
-        default=3,
-        ge=1,
-        description="Number of identical tool-call sets before injecting a warning",
-    )
-    hard_limit: int = Field(
-        default=5,
-        ge=1,
-        description="Number of identical tool-call sets before forcing a stop",
-    )
-    window_size: int = Field(
-        default=20,
-        ge=1,
-        description="Number of recent tool-call sets to track per thread",
-    )
-    max_tracked_threads: int = Field(
-        default=100,
-        ge=1,
-        description="Maximum number of thread histories to keep in memory",
-    )
-    tool_freq_warn: int = Field(
-        default=30,
-        ge=1,
-        description="Number of calls to the same tool type before injecting a frequency warning",
-    )
-    tool_freq_hard_limit: int = Field(
-        default=50,
-        ge=1,
-        description="Number of calls to the same tool type before forcing a stop",
-    )
-    tool_freq_overrides: dict[str, ToolFreqOverride] = Field(
-        default_factory=dict,
-        description=("Per-tool overrides for tool_freq_warn / tool_freq_hard_limit, keyed by tool name. Values can be higher or lower than the global defaults. Commonly used to raise thresholds for high-frequency tools like bash."),
-    )
-
-    @model_validator(mode="after")
-    def validate_thresholds(self) -> "LoopDetectionConfig":
-        """Ensure hard stop cannot happen before the warning threshold."""
-        if self.hard_limit < self.warn_threshold:
-            raise ValueError("hard_limit must be greater than or equal to warn_threshold")
-        if self.tool_freq_hard_limit < self.tool_freq_warn:
-            raise ValueError("tool_freq_hard_limit must be greater than or equal to tool_freq_warn")
-        return self
@@ -132,20 +132,15 @@ class Paths:

    @property
    def agents_dir(self) -> Path:
-        """Legacy root for shared (pre user-isolation) custom agents: `{base_dir}/agents/`.
-
-        New code should use :meth:`user_agents_dir` instead. This property remains
-        only as a read-side fallback for installations that have not yet run the
-        ``migrate_user_isolation.py`` script.
-        """
+        """Root directory for all custom agents: `{base_dir}/agents/`."""
        return self.base_dir / "agents"

    def agent_dir(self, name: str) -> Path:
-        """Legacy per-agent directory (no user isolation): `{base_dir}/agents/{name}/`."""
+        """Directory for a specific agent: `{base_dir}/agents/{name}/`."""
        return self.agents_dir / name.lower()

    def agent_memory_file(self, name: str) -> Path:
-        """Legacy per-agent memory file: `{base_dir}/agents/{name}/memory.json`."""
+        """Per-agent memory file: `{base_dir}/agents/{name}/memory.json`."""
        return self.agent_dir(name) / "memory.json"

    def user_dir(self, user_id: str) -> Path:
@@ -156,17 +151,9 @@ class Paths:
        """Per-user memory file: `{base_dir}/users/{user_id}/memory.json`."""
        return self.user_dir(user_id) / "memory.json"

-    def user_agents_dir(self, user_id: str) -> Path:
-        """Per-user root for that user's custom agents: `{base_dir}/users/{user_id}/agents/`."""
-        return self.user_dir(user_id) / "agents"
-
-    def user_agent_dir(self, user_id: str, agent_name: str) -> Path:
-        """Per-user per-agent directory: `{base_dir}/users/{user_id}/agents/{name}/`."""
-        return self.user_agents_dir(user_id) / agent_name.lower()
-
    def user_agent_memory_file(self, user_id: str, agent_name: str) -> Path:
        """Per-user per-agent memory: `{base_dir}/users/{user_id}/agents/{name}/memory.json`."""
-        return self.user_agent_dir(user_id, agent_name) / "memory.json"
+        return self.user_dir(user_id) / "agents" / agent_name.lower() / "memory.json"

    def thread_dir(self, thread_id: str, *, user_id: str | None = None) -> Path:
        """
@@ -6,13 +6,6 @@ from pydantic import BaseModel, Field
 from deerflow.config.runtime_paths import project_root, resolve_path


-def _legacy_skills_candidates() -> tuple[Path, ...]:
-    """Return source-tree skills locations for monorepo compatibility."""
-    backend_dir = Path(__file__).resolve().parents[4]
-    repo_root = backend_dir.parent
-    return (repo_root / "skills",)
-
-
 class SkillsConfig(BaseModel):
    """Configuration for skills system"""

@@ -22,7 +15,7 @@ class SkillsConfig(BaseModel):
    )
    path: str | None = Field(
        default=None,
-        description=("Path to skills directory. If not specified, defaults to `skills` under the caller project root, falling back to the legacy repo-root location for monorepo compatibility."),
+        description="Path to skills directory. If not specified, defaults to skills under the caller project root.",
    )
    container_path: str = Field(
        default="/mnt/skills",
@@ -33,30 +26,15 @@ class SkillsConfig(BaseModel):
        """
        Get the resolved skills directory path.

-        Resolution order:
-            1. Explicit ``path`` field
-            2. ``DEER_FLOW_SKILLS_PATH`` environment variable
-            3. ``skills`` under the caller project root (``project_root()``)
-            4. Legacy repo-root candidates for monorepo compatibility (``_legacy_skills_candidates``)
-
-        When none of (3) or (4) exist on disk, the project-root default is returned so callers
-        can still surface a stable "no skills" location without raising.
+        Returns:
+            Path to the skills directory
        """
        if self.path:
            # Use configured path (can be absolute or relative to project root)
            return resolve_path(self.path)
        if env_path := os.getenv("DEER_FLOW_SKILLS_PATH"):
            return resolve_path(env_path)
-
-        project_default = project_root() / "skills"
-        if project_default.is_dir():
-            return project_default
-
-        for candidate in _legacy_skills_candidates():
-            if candidate.is_dir():
-                return candidate
-
-        return project_default
+        return project_root() / "skills"

    def get_skill_container_path(self, skill_name: str, category: str = "public") -> str:
        """
@@ -40,10 +40,7 @@ def set_stream_bridge_config(config: StreamBridgeConfig | None) -> None:
    _stream_bridge_config = config


-def load_stream_bridge_config_from_dict(config_dict: dict | None) -> None:
+def load_stream_bridge_config_from_dict(config_dict: dict) -> None:
    """Load stream bridge configuration from a dictionary."""
    global _stream_bridge_config
-    if config_dict is None:
-        _stream_bridge_config = None
-        return
    _stream_bridge_config = StreamBridgeConfig(**config_dict)
@@ -179,3 +179,9 @@ def load_subagents_config_from_dict(config_dict: dict) -> None:
            overrides_summary or "none",
            custom_agents_names or "none",
        )
+    else:
+        logger.info(
+            "Subagents config loaded: default timeout=%ss, default max_turns=%s, no per-agent overrides",
+            _subagents_config.timeout_seconds,
+            _subagents_config.max_turns,
+        )
@@ -4,4 +4,4 @@ from pydantic import BaseModel, Field
 class TokenUsageConfig(BaseModel):
    """Configuration for token usage tracking."""

-    enabled: bool = Field(default=True, description="Enable token usage tracking middleware")
+    enabled: bool = Field(default=False, description="Enable token usage tracking middleware")
@@ -1,6 +1,11 @@
 """Load MCP tools using langchain-mcp-adapters."""

+import asyncio
+import atexit
+import concurrent.futures
 import logging
+from collections.abc import Callable
+from typing import Any

 from langchain_core.tools import BaseTool

@@ -8,10 +13,46 @@ from deerflow.config.extensions_config import ExtensionsConfig
 from deerflow.mcp.client import build_servers_config
 from deerflow.mcp.oauth import build_oauth_tool_interceptor, get_initial_oauth_headers
 from deerflow.reflection import resolve_variable
-from deerflow.tools.sync import make_sync_tool_wrapper

 logger = logging.getLogger(__name__)

+# Global thread pool for sync tool invocation in async environments
+_SYNC_TOOL_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=10, thread_name_prefix="mcp-sync-tool")
+
+# Register shutdown hook for the global executor
+atexit.register(lambda: _SYNC_TOOL_EXECUTOR.shutdown(wait=False))
+
+
+def _make_sync_tool_wrapper(coro: Callable[..., Any], tool_name: str) -> Callable[..., Any]:
+    """Build a synchronous wrapper for an asynchronous tool coroutine.
+
+    Args:
+        coro: The tool's asynchronous coroutine.
+        tool_name: Name of the tool (for logging).
+
+    Returns:
+        A synchronous function that correctly handles nested event loops.
+    """
+
+    def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            loop = None
+
+        try:
+            if loop is not None and loop.is_running():
+                # Use global executor to avoid nested loop issues and improve performance
+                future = _SYNC_TOOL_EXECUTOR.submit(asyncio.run, coro(*args, **kwargs))
+                return future.result()
+            else:
+                return asyncio.run(coro(*args, **kwargs))
+        except Exception as e:
+            logger.error(f"Error invoking MCP tool '{tool_name}' via sync wrapper: {e}", exc_info=True)
+            raise
+
+    return sync_wrapper
+

 async def get_mcp_tools() -> list[BaseTool]:
    """Get all tools from enabled MCP servers.
@@ -85,7 +126,7 @@ async def get_mcp_tools() -> list[BaseTool]:
        # Patch tools to support sync invocation, as deerflow client streams synchronously
        for tool in tools:
            if getattr(tool, "func", None) is None and getattr(tool, "coroutine", None) is not None:
-                tool.func = make_sync_tool_wrapper(tool.coroutine, tool.name)
+                tool.func = _make_sync_tool_wrapper(tool.coroutine, tool.name)

        return tools

@@ -196,10 +196,6 @@ class ClaudeChatModel(ChatAnthropic):
        enforced by both the Anthropic API and AWS Bedrock.  Breakpoints are
        placed on the *last* eligible blocks because later breakpoints cover a
        larger prefix and yield better cache hit rates.
-
-        The system prompt is expected to be fully static (no per-user memory or
-        current date).  Dynamic context is injected per-turn via
-        DynamicContextMiddleware as a <system-reminder> in the first HumanMessage.
        """
        MAX_CACHE_BREAKPOINTS = 4

@@ -27,34 +27,6 @@ from deerflow.models.credential_loader import CodexCliCredential, load_codex_cli
 logger = logging.getLogger(__name__)

 CODEX_BASE_URL = "https://chatgpt.com/backend-api/codex"
-
-
-def _build_usage_metadata(oai_usage: dict) -> dict:
-    """Convert Codex/Responses API usage dict to LangChain usage_metadata format.
-
-    Maps OpenAI Responses API token usage fields to the dict structure that
-    LangChain AIMessage.usage_metadata expects. This avoids depending on
-    langchain_openai private helpers like ``_create_usage_metadata_responses``.
-    """
-    input_tokens = oai_usage.get("input_tokens", 0)
-    output_tokens = oai_usage.get("output_tokens", 0)
-    total_tokens = oai_usage.get("total_tokens", input_tokens + output_tokens)
-    metadata: dict = {
-        "input_tokens": input_tokens,
-        "output_tokens": output_tokens,
-        "total_tokens": total_tokens,
-    }
-    input_details = oai_usage.get("input_tokens_details") or {}
-    output_details = oai_usage.get("output_tokens_details") or {}
-    cache_read = input_details.get("cached_tokens")
-    if cache_read is not None:
-        metadata["input_token_details"] = {"cache_read": cache_read}
-    reasoning = output_details.get("reasoning_tokens")
-    if reasoning is not None:
-        metadata["output_token_details"] = {"reasoning": reasoning}
-    return metadata
-
-
 MAX_RETRIES = 3


@@ -374,7 +346,6 @@ class CodexChatModel(BaseChatModel):
                )

        usage = response.get("usage", {})
-        usage_metadata = _build_usage_metadata(usage) if usage else None
        additional_kwargs = {}
        if reasoning_content:
            additional_kwargs["reasoning_content"] = reasoning_content
@@ -384,7 +355,6 @@ class CodexChatModel(BaseChatModel):
            tool_calls=tool_calls if tool_calls else [],
            invalid_tool_calls=invalid_tool_calls,
            additional_kwargs=additional_kwargs,
-            usage_metadata=usage_metadata,
            response_metadata={
                "model": response.get("model", self.model),
                "usage": usage,
@@ -81,16 +81,7 @@ async def init_engine(
        try:
            import asyncpg  # noqa: F401
        except ImportError:
-            raise ImportError(
-                "database.backend is set to 'postgres' but asyncpg is not installed.\n"
-                "Install it with:\n"
-                "    cd backend && uv sync --all-packages --extra postgres\n"
-                "On the next `make dev` the postgres extra is auto-detected from\n"
-                "config.yaml (database.backend: postgres) and reinstalled, so it\n"
-                "will not be wiped again. Set UV_EXTRAS=postgres in .env to opt in\n"
-                "explicitly. Or switch to backend: sqlite in config.yaml for\n"
-                "single-node deployment."
-            ) from None
+            raise ImportError("database.backend is set to 'postgres' but asyncpg is not installed.\nInstall it with:\n    uv sync --extra postgres\nOr switch to backend: sqlite in config.yaml for single-node deployment.") from None

    if backend == "sqlite":
        import os
@@ -1,195 +0,0 @@
-"""Dialect-aware JSON value matching for SQLAlchemy (SQLite + PostgreSQL)."""
-
-from __future__ import annotations
-
-import re
-from dataclasses import dataclass
-from typing import Any
-
-from sqlalchemy import BigInteger, Float, String, bindparam
-from sqlalchemy.ext.compiler import compiles
-from sqlalchemy.sql.compiler import SQLCompiler
-from sqlalchemy.sql.expression import ColumnElement
-from sqlalchemy.sql.visitors import InternalTraversal
-from sqlalchemy.types import Boolean, TypeEngine
-
-# Key is interpolated into compiled SQL; restrict charset to prevent injection.
-_KEY_CHARSET_RE = re.compile(r"^[A-Za-z0-9_\-]+$")
-
-# Allowed value types for metadata filter values (same set accepted by JsonMatch).
-ALLOWED_FILTER_VALUE_TYPES: tuple[type, ...] = (type(None), bool, int, float, str)
-
-# SQLite raises an overflow when binding values outside signed 64-bit range;
-# PostgreSQL overflows during BIGINT cast. Reject at validation time instead.
-_INT64_MIN = -(2**63)
-_INT64_MAX = 2**63 - 1
-
-
-def validate_metadata_filter_key(key: object) -> bool:
-    """Return True if *key* is safe for use as a JSON metadata filter key.
-
-    A key is "safe" when it is a string matching ``[A-Za-z0-9_-]+``. The
-    charset is restricted because the key is interpolated into the
-    compiled SQL path expression (``$."<key>"`` / ``->`` literal), so any
-    laxer pattern would open a SQL/JSONPath injection surface.
-    """
-    return isinstance(key, str) and bool(_KEY_CHARSET_RE.match(key))
-
-
-def validate_metadata_filter_value(value: object) -> bool:
-    """Return True if *value* is an allowed type for a JSON metadata filter.
-
-    Matches the set of types ``_build_clause`` knows how to compile into
-    a dialect-portable predicate. Anything else (list/dict/bytes/...) is
-    intentionally rejected rather than silently coerced via ``str()`` —
-    silent coercion would (a) produce wrong matches and (b) break
-    SQLAlchemy's ``inherit_cache`` invariant when ``value`` is unhashable.
-
-    Integer values are additionally restricted to the signed 64-bit range
-    ``[-2**63, 2**63 - 1]``: SQLite overflows when binding larger values
-    and PostgreSQL overflows during the ``BIGINT`` cast.
-    """
-    if not isinstance(value, ALLOWED_FILTER_VALUE_TYPES):
-        return False
-    if isinstance(value, int) and not isinstance(value, bool):
-        if not (_INT64_MIN <= value <= _INT64_MAX):
-            return False
-    return True
-
-
-class JsonMatch(ColumnElement):
-    """Dialect-portable ``column[key] == value`` for JSON columns.
-
-    Compiles to ``json_type``/``json_extract`` on SQLite and
-    ``json_typeof``/``->>`` on PostgreSQL, with type-safe comparison
-    that distinguishes bool vs int and NULL vs missing key.
-
-    *key* must be a single literal key matching ``[A-Za-z0-9_-]+``.
-    *value* must be one of: ``None``, ``bool``, ``int`` (signed 64-bit), ``float``, ``str``.
-    """
-
-    inherit_cache = True
-    type = Boolean()
-    _is_implicitly_boolean = True
-
-    _traverse_internals = [
-        ("column", InternalTraversal.dp_clauseelement),
-        ("key", InternalTraversal.dp_string),
-        ("value", InternalTraversal.dp_plain_obj),
-    ]
-
-    def __init__(self, column: ColumnElement, key: str, value: object) -> None:
-        if not validate_metadata_filter_key(key):
-            raise ValueError(f"JsonMatch key must match {_KEY_CHARSET_RE.pattern!r}; got: {key!r}")
-        if not validate_metadata_filter_value(value):
-            if isinstance(value, int) and not isinstance(value, bool):
-                raise TypeError(f"JsonMatch int value out of signed 64-bit range [-2**63, 2**63-1]: {value!r}")
-            raise TypeError(f"JsonMatch value must be None, bool, int, float, or str; got: {type(value).__name__!r}")
-        self.column = column
-        self.key = key
-        self.value = value
-        super().__init__()
-
-
-@dataclass(frozen=True)
-class _Dialect:
-    """Per-dialect names used when emitting JSON type/value comparisons."""
-
-    null_type: str
-    num_types: tuple[str, ...]
-    num_cast: str
-    int_types: tuple[str, ...]
-    int_cast: str
-    # None for SQLite where json_type already returns 'integer'/'real';
-    # regex literal for PostgreSQL where json_typeof returns 'number' for
-    # both ints and floats, so an extra guard prevents CAST errors on floats.
-    int_guard: str | None
-    string_type: str
-    bool_type: str | None
-
-
-_SQLITE = _Dialect(
-    null_type="null",
-    num_types=("integer", "real"),
-    num_cast="REAL",
-    int_types=("integer",),
-    int_cast="INTEGER",
-    int_guard=None,
-    string_type="text",
-    bool_type=None,
-)
-
-_PG = _Dialect(
-    null_type="null",
-    num_types=("number",),
-    num_cast="DOUBLE PRECISION",
-    int_types=("number",),
-    int_cast="BIGINT",
-    int_guard="'^-?[0-9]+$'",
-    string_type="string",
-    bool_type="boolean",
-)
-
-
-def _bind(compiler: SQLCompiler, value: object, sa_type: TypeEngine[Any], **kw: Any) -> str:
-    param = bindparam(None, value, type_=sa_type)
-    return compiler.process(param, **kw)
-
-
-def _type_check(typeof: str, types: tuple[str, ...]) -> str:
-    if len(types) == 1:
-        return f"{typeof} = '{types[0]}'"
-    quoted = ", ".join(f"'{t}'" for t in types)
-    return f"{typeof} IN ({quoted})"
-
-
-def _build_clause(compiler: SQLCompiler, typeof: str, extract: str, value: object, dialect: _Dialect, **kw: Any) -> str:
-    if value is None:
-        return f"{typeof} = '{dialect.null_type}'"
-    if isinstance(value, bool):
-        # bool check must precede int check — bool is a subclass of int in Python
-        bool_str = "true" if value else "false"
-        if dialect.bool_type is None:
-            return f"{typeof} = '{bool_str}'"
-        return f"({typeof} = '{dialect.bool_type}' AND {extract} = '{bool_str}')"
-    if isinstance(value, int):
-        bp = _bind(compiler, value, BigInteger(), **kw)
-        if dialect.int_guard:
-            # CASE prevents CAST error when json_typeof = 'number' also matches floats
-            return f"(CASE WHEN {_type_check(typeof, dialect.int_types)} AND {extract} ~ {dialect.int_guard} THEN CAST({extract} AS {dialect.int_cast}) END = {bp})"
-        return f"({_type_check(typeof, dialect.int_types)} AND CAST({extract} AS {dialect.int_cast}) = {bp})"
-    if isinstance(value, float):
-        bp = _bind(compiler, value, Float(), **kw)
-        return f"({_type_check(typeof, dialect.num_types)} AND CAST({extract} AS {dialect.num_cast}) = {bp})"
-    bp = _bind(compiler, str(value), String(), **kw)
-    return f"({typeof} = '{dialect.string_type}' AND {extract} = {bp})"
-
-
-@compiles(JsonMatch, "sqlite")
-def _compile_sqlite(element: JsonMatch, compiler: SQLCompiler, **kw: Any) -> str:
-    if not validate_metadata_filter_key(element.key):
-        raise ValueError(f"Key escaped validation: {element.key!r}")
-    col = compiler.process(element.column, **kw)
-    path = f'$."{element.key}"'
-    typeof = f"json_type({col}, '{path}')"
-    extract = f"json_extract({col}, '{path}')"
-    return _build_clause(compiler, typeof, extract, element.value, _SQLITE, **kw)
-
-
-@compiles(JsonMatch, "postgresql")
-def _compile_pg(element: JsonMatch, compiler: SQLCompiler, **kw: Any) -> str:
-    if not validate_metadata_filter_key(element.key):
-        raise ValueError(f"Key escaped validation: {element.key!r}")
-    col = compiler.process(element.column, **kw)
-    typeof = f"json_typeof({col} -> '{element.key}')"
-    extract = f"({col} ->> '{element.key}')"
-    return _build_clause(compiler, typeof, extract, element.value, _PG, **kw)
-
-
-@compiles(JsonMatch)
-def _compile_default(element: JsonMatch, compiler: SQLCompiler, **kw: Any) -> str:
-    raise NotImplementedError(f"JsonMatch supports only sqlite and postgresql; got dialect: {compiler.dialect.name}")
-
-
-def json_match(column: ColumnElement, key: str, value: object) -> JsonMatch:
-    return JsonMatch(column, key, value)
@@ -23,18 +23,6 @@ class RunRepository(RunStore):
    def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
        self._sf = session_factory

-    @staticmethod
-    def _normalize_model_name(model_name: str | None) -> str | None:
-        """Normalize model_name for storage: strip whitespace, truncate to 128 chars."""
-        if model_name is None:
-            return None
-        if not isinstance(model_name, str):
-            model_name = str(model_name)
-        normalized = model_name.strip()
-        if len(normalized) > 128:
-            normalized = normalized[:128]
-        return normalized
-
    @staticmethod
    def _safe_json(obj: Any) -> Any:
        """Ensure obj is JSON-serializable. Falls back to model_dump() or str()."""
@@ -82,7 +70,6 @@ class RunRepository(RunStore):
        thread_id,
        assistant_id=None,
        user_id: str | None | _AutoSentinel = AUTO,
-        model_name: str | None = None,
        status="pending",
        multitask_strategy="reject",
        metadata=None,
@@ -98,7 +85,6 @@ class RunRepository(RunStore):
            thread_id=thread_id,
            assistant_id=assistant_id,
            user_id=resolved_user_id,
-            model_name=self._normalize_model_name(model_name),
            status=status,
            multitask_strategy=multitask_strategy,
            metadata_json=self._safe_json(metadata) or {},
@@ -4,7 +4,7 @@ from __future__ import annotations

 from typing import TYPE_CHECKING

-from deerflow.persistence.thread_meta.base import InvalidMetadataFilterError, ThreadMetaStore
+from deerflow.persistence.thread_meta.base import ThreadMetaStore
 from deerflow.persistence.thread_meta.memory import MemoryThreadMetaStore
 from deerflow.persistence.thread_meta.model import ThreadMetaRow
 from deerflow.persistence.thread_meta.sql import ThreadMetaRepository
@@ -14,7 +14,6 @@ if TYPE_CHECKING:
    from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker

 __all__ = [
-    "InvalidMetadataFilterError",
    "MemoryThreadMetaStore",
    "ThreadMetaRepository",
    "ThreadMetaRow",
@@ -15,15 +15,10 @@ three-state semantics (see :mod:`deerflow.runtime.user_context`):
 from __future__ import annotations

 import abc
-from typing import Any

 from deerflow.runtime.user_context import AUTO, _AutoSentinel


-class InvalidMetadataFilterError(ValueError):
-    """Raised when all client-supplied metadata filter keys are rejected."""
-
-
 class ThreadMetaStore(abc.ABC):
    @abc.abstractmethod
    async def create(
@@ -45,12 +40,12 @@ class ThreadMetaStore(abc.ABC):
    async def search(
        self,
        *,
-        metadata: dict[str, Any] | None = None,
+        metadata: dict | None = None,
        status: str | None = None,
        limit: int = 100,
        offset: int = 0,
        user_id: str | None | _AutoSentinel = AUTO,
-    ) -> list[dict[str, Any]]:
+    ) -> list[dict]:
        pass

    @abc.abstractmethod
@@ -7,13 +7,13 @@ router for thread records.

 from __future__ import annotations

+import time
 from typing import Any

 from langgraph.store.base import BaseStore

 from deerflow.persistence.thread_meta.base import ThreadMetaStore
 from deerflow.runtime.user_context import AUTO, _AutoSentinel, resolve_user_id
-from deerflow.utils.time import coerce_iso, now_iso

 THREADS_NS: tuple[str, ...] = ("threads",)

@@ -48,7 +48,7 @@ class MemoryThreadMetaStore(ThreadMetaStore):
        metadata: dict | None = None,
    ) -> dict:
        resolved_user_id = resolve_user_id(user_id, method_name="MemoryThreadMetaStore.create")
-        now = now_iso()
+        now = time.time()
        record: dict[str, Any] = {
            "thread_id": thread_id,
            "assistant_id": assistant_id,
@@ -69,12 +69,12 @@ class MemoryThreadMetaStore(ThreadMetaStore):
    async def search(
        self,
        *,
-        metadata: dict[str, Any] | None = None,
+        metadata: dict | None = None,
        status: str | None = None,
        limit: int = 100,
        offset: int = 0,
        user_id: str | None | _AutoSentinel = AUTO,
-    ) -> list[dict[str, Any]]:
+    ) -> list[dict]:
        resolved_user_id = resolve_user_id(user_id, method_name="MemoryThreadMetaStore.search")
        filter_dict: dict[str, Any] = {}
        if metadata:
@@ -106,7 +106,7 @@ class MemoryThreadMetaStore(ThreadMetaStore):
        if record is None:
            return
        record["display_name"] = display_name
-        record["updated_at"] = now_iso()
+        record["updated_at"] = time.time()
        await self._store.aput(THREADS_NS, thread_id, record)

    async def update_status(self, thread_id: str, status: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
@@ -114,7 +114,7 @@ class MemoryThreadMetaStore(ThreadMetaStore):
        if record is None:
            return
        record["status"] = status
-        record["updated_at"] = now_iso()
+        record["updated_at"] = time.time()
        await self._store.aput(THREADS_NS, thread_id, record)

    async def update_metadata(self, thread_id: str, metadata: dict, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
@@ -124,7 +124,7 @@ class MemoryThreadMetaStore(ThreadMetaStore):
        merged = dict(record.get("metadata") or {})
        merged.update(metadata)
        record["metadata"] = merged
-        record["updated_at"] = now_iso()
+        record["updated_at"] = time.time()
        await self._store.aput(THREADS_NS, thread_id, record)

    async def delete(self, thread_id: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
@@ -144,8 +144,6 @@ class MemoryThreadMetaStore(ThreadMetaStore):
            "display_name": val.get("display_name"),
            "status": val.get("status", "idle"),
            "metadata": val.get("metadata", {}),
-            # ``coerce_iso`` heals legacy unix-second values written by
-            # earlier Gateway versions that called ``str(time.time())``.
-            "created_at": coerce_iso(val.get("created_at", "")),
-            "updated_at": coerce_iso(val.get("updated_at", "")),
+            "created_at": str(val.get("created_at", "")),
+            "updated_at": str(val.get("updated_at", "")),
        }
@@ -2,20 +2,16 @@

 from __future__ import annotations

-import logging
 from datetime import UTC, datetime
 from typing import Any

 from sqlalchemy import select, update
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker

-from deerflow.persistence.json_compat import json_match
-from deerflow.persistence.thread_meta.base import InvalidMetadataFilterError, ThreadMetaStore
+from deerflow.persistence.thread_meta.base import ThreadMetaStore
 from deerflow.persistence.thread_meta.model import ThreadMetaRow
 from deerflow.runtime.user_context import AUTO, _AutoSentinel, resolve_user_id

-logger = logging.getLogger(__name__)
-

 class ThreadMetaRepository(ThreadMetaStore):
    def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
@@ -24,7 +20,7 @@ class ThreadMetaRepository(ThreadMetaStore):
    @staticmethod
    def _row_to_dict(row: ThreadMetaRow) -> dict[str, Any]:
        d = row.to_dict()
-        d["metadata"] = d.pop("metadata_json", None) or {}
+        d["metadata"] = d.pop("metadata_json", {})
        for key in ("created_at", "updated_at"):
            val = d.get(key)
            if isinstance(val, datetime):
@@ -108,43 +104,39 @@ class ThreadMetaRepository(ThreadMetaStore):
    async def search(
        self,
        *,
-        metadata: dict[str, Any] | None = None,
+        metadata: dict | None = None,
        status: str | None = None,
        limit: int = 100,
        offset: int = 0,
        user_id: str | None | _AutoSentinel = AUTO,
-    ) -> list[dict[str, Any]]:
+    ) -> list[dict]:
        """Search threads with optional metadata and status filters.

        Owner filter is enforced by default: caller must be in a user
        context. Pass ``user_id=None`` to bypass (migration/CLI).
        """
        resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.search")
-        stmt = select(ThreadMetaRow).order_by(ThreadMetaRow.updated_at.desc(), ThreadMetaRow.thread_id.desc())
+        stmt = select(ThreadMetaRow).order_by(ThreadMetaRow.updated_at.desc())
        if resolved_user_id is not None:
            stmt = stmt.where(ThreadMetaRow.user_id == resolved_user_id)
        if status:
            stmt = stmt.where(ThreadMetaRow.status == status)

        if metadata:
-            applied = 0
-            for key, value in metadata.items():
-                try:
-                    stmt = stmt.where(json_match(ThreadMetaRow.metadata_json, key, value))
-                    applied += 1
-                except (ValueError, TypeError) as exc:
-                    logger.warning("Skipping metadata filter key %s: %s", ascii(key), exc)
-            if applied == 0:
-                # Comma-separated plain string (no list repr / nested
-                # quoting) so the 400 detail surfaced by the Gateway is
-                # easy for clients to read. Sorted for determinism.
-                rejected_keys = ", ".join(sorted(str(k) for k in metadata))
-                raise InvalidMetadataFilterError(f"All metadata filter keys were rejected as unsafe: {rejected_keys}")
-
-        stmt = stmt.limit(limit).offset(offset)
-        async with self._sf() as session:
-            result = await session.execute(stmt)
-            return [self._row_to_dict(r) for r in result.scalars()]
+            # When metadata filter is active, fetch a larger window and filter
+            # in Python. TODO(Phase 2): use JSON DB operators (Postgres @>,
+            # SQLite json_extract) for server-side filtering.
+            stmt = stmt.limit(limit * 5 + offset)
+            async with self._sf() as session:
+                result = await session.execute(stmt)
+                rows = [self._row_to_dict(r) for r in result.scalars()]
+            rows = [r for r in rows if all(r.get("metadata", {}).get(k) == v for k, v in metadata.items())]
+            return rows[offset : offset + limit]
+        else:
+            stmt = stmt.limit(limit).offset(offset)
+            async with self._sf() as session:
+                result = await session.execute(stmt)
+                return [self._row_to_dict(r) for r in result.scalars()]

    async def _check_ownership(self, session: AsyncSession, thread_id: str, resolved_user_id: str | None) -> bool:
        """Return True if the row exists and is owned (or filter bypassed)."""
@@ -25,7 +25,7 @@ from collections.abc import Iterator

 from langgraph.types import Checkpointer

-from deerflow.config.app_config import get_app_config
+from deerflow.config.app_config import AppConfig, get_app_config
 from deerflow.config.checkpointer_config import CheckpointerConfig
 from deerflow.runtime.store._sqlite_utils import ensure_sqlite_parent_dir, resolve_sqlite_conn_str

@@ -36,9 +36,7 @@ logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------

 SQLITE_INSTALL = "langgraph-checkpoint-sqlite is required for the SQLite checkpointer. Install it with: uv add langgraph-checkpoint-sqlite"
-POSTGRES_INSTALL = (
-    "langgraph-checkpoint-postgres is required for the PostgreSQL checkpointer. Install the package extra with: pip install 'deerflow-harness[postgres]' (or use: uv sync --all-packages --extra postgres when developing locally)"
-)
+POSTGRES_INSTALL = "langgraph-checkpoint-postgres is required for the PostgreSQL checkpointer. Install it with: uv add langgraph-checkpoint-postgres psycopg[binary] psycopg-pool"
 POSTGRES_CONN_REQUIRED = "checkpointer.connection_string is required for the postgres backend"

 # ---------------------------------------------------------------------------
@@ -100,9 +98,78 @@ def _sync_checkpointer_cm(config: CheckpointerConfig) -> Iterator[Checkpointer]:

 _checkpointer: Checkpointer | None = None
 _checkpointer_ctx = None  # open context manager keeping the connection alive
+_explicit_checkpointers: dict[int, Checkpointer] = {}
+_explicit_checkpointer_contexts: dict[int, object] = {}


-def get_checkpointer() -> Checkpointer:
+def _default_in_memory_checkpointer() -> Checkpointer:
+    from langgraph.checkpoint.memory import InMemorySaver
+
+    logger.info("Checkpointer: using InMemorySaver (in-process, not persistent)")
+    return InMemorySaver()
+
+
+def _persistent_database_backend(db_config) -> str | None:
+    backend = getattr(db_config, "backend", None)
+    if backend in {"sqlite", "postgres"}:
+        return backend
+    return None
+
+
+@contextlib.contextmanager
+def _sync_checkpointer_from_database_cm(db_config) -> Iterator[Checkpointer]:
+    """Context manager that creates a sync checkpointer from unified DatabaseConfig."""
+    backend = _persistent_database_backend(db_config)
+    if backend is None:
+        yield _default_in_memory_checkpointer()
+        return
+
+    if backend == "sqlite":
+        try:
+            from langgraph.checkpoint.sqlite import SqliteSaver
+        except ImportError as exc:
+            raise ImportError(SQLITE_INSTALL) from exc
+
+        conn_str = db_config.checkpointer_sqlite_path
+        ensure_sqlite_parent_dir(conn_str)
+        with SqliteSaver.from_conn_string(conn_str) as saver:
+            saver.setup()
+            logger.info("Checkpointer: using SqliteSaver (%s)", conn_str)
+            yield saver
+        return
+
+    if backend == "postgres":
+        try:
+            from langgraph.checkpoint.postgres import PostgresSaver
+        except ImportError as exc:
+            raise ImportError(POSTGRES_INSTALL) from exc
+
+        if not db_config.postgres_url:
+            raise ValueError("database.postgres_url is required for the postgres backend")
+
+        with PostgresSaver.from_conn_string(db_config.postgres_url) as saver:
+            saver.setup()
+            logger.info("Checkpointer: using PostgresSaver")
+            yield saver
+        return
+
+    raise ValueError(f"Unknown database backend: {backend!r}")
+
+
+def _build_checkpointer_from_app_config(app_config: AppConfig) -> tuple[Checkpointer, object | None]:
+    if app_config.checkpointer is not None:
+        ctx = _sync_checkpointer_cm(app_config.checkpointer)
+        return ctx.__enter__(), ctx
+
+    db_config = getattr(app_config, "database", None)
+    if _persistent_database_backend(db_config) is not None:
+        ctx = _sync_checkpointer_from_database_cm(db_config)
+        return ctx.__enter__(), ctx
+
+    return _default_in_memory_checkpointer(), None
+
+
+def get_checkpointer(app_config: AppConfig | None = None) -> Checkpointer:
    """Return the global sync checkpointer singleton, creating it on first call.

    Returns an ``InMemorySaver`` when no checkpointer is configured in *config.yaml*.
@@ -113,6 +180,18 @@ def get_checkpointer() -> Checkpointer:
    """
    global _checkpointer, _checkpointer_ctx

+    if app_config is not None:
+        cache_key = id(app_config)
+        cached = _explicit_checkpointers.get(cache_key)
+        if cached is not None:
+            return cached
+
+        explicit_checkpointer, explicit_ctx = _build_checkpointer_from_app_config(app_config)
+        _explicit_checkpointers[cache_key] = explicit_checkpointer
+        if explicit_ctx is not None:
+            _explicit_checkpointer_contexts[cache_key] = explicit_ctx
+        return explicit_checkpointer
+
    if _checkpointer is not None:
        return _checkpointer

@@ -123,28 +202,30 @@ def get_checkpointer() -> Checkpointer:
    from deerflow.config.checkpointer_config import get_checkpointer_config

    config = get_checkpointer_config()
+    global_app_config = _app_config

-    if config is None and _app_config is None:
+    if config is None and global_app_config is None:
        # Only load app config lazily when neither the app config nor an explicit
        # checkpointer config has been initialized yet. This keeps tests that
        # intentionally set the global checkpointer config isolated from any
        # ambient config.yaml on disk.
        try:
-            get_app_config()
+            global_app_config = get_app_config()
        except FileNotFoundError:
            # In test environments without config.yaml, this is expected.
            pass
        config = get_checkpointer_config()
-    if config is None:
-        from langgraph.checkpoint.memory import InMemorySaver

-        logger.info("Checkpointer: using InMemorySaver (in-process, not persistent)")
-        _checkpointer = InMemorySaver()
+    if config is not None:
+        _checkpointer_ctx = _sync_checkpointer_cm(config)
+        _checkpointer = _checkpointer_ctx.__enter__()
        return _checkpointer

-    _checkpointer_ctx = _sync_checkpointer_cm(config)
-    _checkpointer = _checkpointer_ctx.__enter__()
+    if global_app_config is not None:
+        _checkpointer, _checkpointer_ctx = _build_checkpointer_from_app_config(global_app_config)
+        return _checkpointer

+    _checkpointer = _default_in_memory_checkpointer()
    return _checkpointer


@@ -163,6 +244,18 @@ def reset_checkpointer() -> None:
        _checkpointer_ctx = None
    _checkpointer = None

+    for cache_key, ctx in list(_explicit_checkpointer_contexts.items()):
+        try:
+            ctx.__exit__(None, None, None)
+        except Exception:
+            logger.warning("Error during explicit checkpointer cleanup", exc_info=True)
+        finally:
+            _explicit_checkpointer_contexts.pop(cache_key, None)
+            _explicit_checkpointers.pop(cache_key, None)
+
+    _explicit_checkpointers.clear()
+    _explicit_checkpointer_contexts.clear()
+

 # ---------------------------------------------------------------------------
 # Sync context manager
@@ -170,7 +263,7 @@ def reset_checkpointer() -> None:


@contextlib.contextmanager
-def checkpointer_context() -> Iterator[Checkpointer]:
+def checkpointer_context(app_config: AppConfig | None = None) -> Iterator[Checkpointer]:
    """Sync context manager that yields a checkpointer and cleans up on exit.

    Unlike :func:`get_checkpointer`, this does **not** cache the instance —
@@ -183,12 +276,16 @@ def checkpointer_context() -> Iterator[Checkpointer]:
    Yields an ``InMemorySaver`` when no checkpointer is configured in *config.yaml*.
    """

-    config = get_app_config()
-    if config.checkpointer is None:
-        from langgraph.checkpoint.memory import InMemorySaver
-
-        yield InMemorySaver()
+    resolved_app_config = app_config or get_app_config()
+    if resolved_app_config.checkpointer is not None:
+        with _sync_checkpointer_cm(resolved_app_config.checkpointer) as saver:
+            yield saver
        return

-    with _sync_checkpointer_cm(config.checkpointer) as saver:
-        yield saver
+    db_config = getattr(resolved_app_config, "database", None)
+    if _persistent_database_backend(db_config) is not None:
+        with _sync_checkpointer_from_database_cm(db_config) as saver:
+            yield saver
+        return
+
+    yield _default_in_memory_checkpointer()
@@ -9,7 +9,6 @@ from __future__ import annotations
 import json
 import logging
 from datetime import UTC, datetime
-from typing import Any

 from sqlalchemy import delete, func, select
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
@@ -34,21 +33,20 @@ class DbRunEventStore(RunEventStore):
        if isinstance(val, datetime):
            d["created_at"] = val.isoformat()
        d.pop("id", None)
-        # Restore structured content that was JSON-serialized on write.
+        # Restore dict content that was JSON-serialized on write
        raw = d.get("content", "")
-        metadata = d.get("metadata", {})
-        if isinstance(raw, str) and (metadata.get("content_is_json") or metadata.get("content_is_dict")):
+        if isinstance(raw, str) and d.get("metadata", {}).get("content_is_dict"):
            try:
                d["content"] = json.loads(raw)
            except (json.JSONDecodeError, ValueError):
-                # Content looked like JSON but failed to parse;
+                # Content looked like JSON (content_is_dict flag) but failed to parse;
                # keep the raw string as-is.
                logger.debug("Failed to deserialize content as JSON for event seq=%s", d.get("seq"))
        return d

-    def _truncate_trace(self, category: str, content: Any, metadata: dict | None) -> tuple[Any, dict]:
+    def _truncate_trace(self, category: str, content: str | dict, metadata: dict | None) -> tuple[str | dict, dict]:
        if category == "trace":
-            text = content if isinstance(content, str) else json.dumps(content, default=str, ensure_ascii=False)
+            text = json.dumps(content, default=str, ensure_ascii=False) if isinstance(content, dict) else content
            encoded = text.encode("utf-8")
            if len(encoded) > self._max_trace_content:
                # Truncate by bytes, then decode back (may cut a multi-byte char, so use errors="ignore")
@@ -56,18 +54,6 @@ class DbRunEventStore(RunEventStore):
                metadata = {**(metadata or {}), "content_truncated": True, "original_byte_length": len(encoded)}
        return content, metadata or {}

-    @staticmethod
-    def _content_to_db(content: Any, metadata: dict | None) -> tuple[str, dict]:
-        metadata = metadata or {}
-        if isinstance(content, str):
-            return content, metadata
-
-        db_content = json.dumps(content, default=str, ensure_ascii=False)
-        metadata = {**metadata, "content_is_json": True}
-        if isinstance(content, dict):
-            metadata["content_is_dict"] = True
-        return db_content, metadata
-
    @staticmethod
    def _user_id_from_context() -> str | None:
        """Soft read of user_id from contextvar for write paths.
@@ -96,7 +82,11 @@ class DbRunEventStore(RunEventStore):
        the initial ``human_message`` event (once per run).
        """
        content, metadata = self._truncate_trace(category, content, metadata)
-        db_content, metadata = self._content_to_db(content, metadata)
+        if isinstance(content, dict):
+            db_content = json.dumps(content, default=str, ensure_ascii=False)
+            metadata = {**(metadata or {}), "content_is_dict": True}
+        else:
+            db_content = content
        user_id = self._user_id_from_context()
        async with self._sf() as session:
            async with session.begin():
@@ -138,7 +128,11 @@ class DbRunEventStore(RunEventStore):
                    category = e.get("category", "trace")
                    metadata = e.get("metadata")
                    content, metadata = self._truncate_trace(category, content, metadata)
-                    db_content, metadata = self._content_to_db(content, metadata)
+                    if isinstance(content, dict):
+                        db_content = json.dumps(content, default=str, ensure_ascii=False)
+                        metadata = {**(metadata or {}), "content_is_dict": True}
+                    else:
+                        db_content = content
                    row = RunEventRow(
                        thread_id=e["thread_id"],
                        run_id=e["run_id"],
@@ -20,13 +20,12 @@ from __future__ import annotations
 import asyncio
 import logging
 import time
-from collections.abc import Mapping
 from datetime import UTC, datetime
 from typing import TYPE_CHECKING, Any, cast
 from uuid import UUID

 from langchain_core.callbacks import BaseCallbackHandler
-from langchain_core.messages import AIMessage, AnyMessage, BaseMessage, HumanMessage, ToolMessage
+from langchain_core.messages import AnyMessage, BaseMessage, HumanMessage, ToolMessage
 from langgraph.types import Command

 if TYPE_CHECKING:
@@ -64,16 +63,6 @@ class RunJournal(BaseCallbackHandler):
        self._total_tokens = 0
        self._llm_call_count = 0

-        # Caller-bucketed token accumulators
-        self._lead_agent_tokens = 0
-        self._subagent_tokens = 0
-        self._middleware_tokens = 0
-
-        # Dedup: LangChain may fire on_llm_end multiple times for the same run_id
-        self._counted_llm_run_ids: set[str] = set()
-        self._counted_external_source_ids: set[str] = set()
-        self._counted_message_llm_run_ids: set[str] = set()
-
        # Convenience fields
        self._last_ai_msg: str | None = None
        self._first_human_msg: str | None = None
@@ -88,50 +77,6 @@ class RunJournal(BaseCallbackHandler):

    # -- Lifecycle callbacks --

-    @staticmethod
-    def _message_text(message: BaseMessage) -> str:
-        """Extract displayable text from a message's mixed content shape."""
-        content = getattr(message, "content", None)
-        if isinstance(content, str):
-            return content
-        if isinstance(content, list):
-            parts: list[str] = []
-            for block in content:
-                if isinstance(block, str):
-                    parts.append(block)
-                elif isinstance(block, Mapping):
-                    text = block.get("text")
-                    if isinstance(text, str):
-                        parts.append(text)
-                    else:
-                        nested = block.get("content")
-                        if isinstance(nested, str):
-                            parts.append(nested)
-            return "".join(parts)
-        if isinstance(content, Mapping):
-            for key in ("text", "content"):
-                value = content.get(key)
-                if isinstance(value, str):
-                    return value
-
-        text = getattr(message, "text", None)
-        if isinstance(text, str):
-            return text
-        return ""
-
-    def _record_message_summary(self, message: BaseMessage, *, caller: str | None = None) -> None:
-        """Update run-level convenience fields for persisted run rows."""
-        self._msg_count += 1
-
-        # ``last_ai_message`` should represent the lead agent's user-facing
-        # answer. Middleware/subagent model calls and empty tool-call-only
-        # AI messages must not overwrite the last useful assistant text.
-        is_ai_message = isinstance(message, AIMessage) or getattr(message, "type", None) == "ai"
-        if is_ai_message and (caller is None or caller == "lead_agent"):
-            text = self._message_text(message).strip()
-            if text:
-                self._last_ai_msg = text[:2000]
-
    def on_chain_start(
        self,
        serialized: dict[str, Any],
@@ -210,7 +155,6 @@ class RunJournal(BaseCallbackHandler):
                            content=m.model_dump(),
                            metadata={"caller": caller},
                        )
-                        self._record_message_summary(m, caller=caller)
                        break
                if self._first_human_msg:
                    break
@@ -269,34 +213,20 @@ class RunJournal(BaseCallbackHandler):
                    "llm_call_index": call_index,
                },
            )
-            if rid not in self._counted_message_llm_run_ids:
-                self._record_message_summary(message, caller=caller)

-            # Token accumulation (dedup by langchain run_id to avoid double-counting
-            # when the callback fires more than once for the same response)
+            # Token accumulation
            if self._track_tokens:
                input_tk = usage_dict.get("input_tokens", 0) or 0
                output_tk = usage_dict.get("output_tokens", 0) or 0
                total_tk = usage_dict.get("total_tokens", 0) or 0
                if total_tk == 0:
                    total_tk = input_tk + output_tk
-                if total_tk > 0 and rid not in self._counted_llm_run_ids:
-                    self._counted_llm_run_ids.add(rid)
+                if total_tk > 0:
                    self._total_input_tokens += input_tk
                    self._total_output_tokens += output_tk
                    self._total_tokens += total_tk
                    self._llm_call_count += 1

-                    if caller.startswith("subagent:"):
-                        self._subagent_tokens += total_tk
-                    elif caller.startswith("middleware:"):
-                        self._middleware_tokens += total_tk
-                    else:
-                        self._lead_agent_tokens += total_tk
-
-        if messages:
-            self._counted_message_llm_run_ids.add(str(run_id))
-
    def on_llm_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
        self._llm_start_times.pop(str(run_id), None)
        self._put(event_type="llm.error", category="trace", content=str(error))
@@ -312,14 +242,12 @@ class RunJournal(BaseCallbackHandler):
            if isinstance(output, ToolMessage):
                msg = cast(ToolMessage, output)
                self._put(event_type="llm.tool.result", category="message", content=msg.model_dump())
-                self._record_message_summary(msg)
            elif isinstance(output, Command):
                cmd = cast(Command, output)
                messages = cmd.update.get("messages", [])
                for message in messages:
                    if isinstance(message, BaseMessage):
                        self._put(event_type="llm.tool.result", category="message", content=message.model_dump())
-                        self._record_message_summary(message)
                    else:
                        logger.warning(f"on_tool_end {run_id}: command update message is not BaseMessage: {type(message)}")
            else:
@@ -402,49 +330,6 @@ class RunJournal(BaseCallbackHandler):

    # -- Public methods (called by worker) --

-    def record_external_llm_usage_records(
-        self,
-        records: list[dict[str, int | str]],
-    ) -> None:
-        """Record token usage from external sources (e.g., subagents).
-
-        Each record should contain:
-            source_run_id: Unique identifier to prevent double-counting
-            caller: Caller tag (e.g. "subagent:general-purpose")
-            input_tokens: Input token count
-            output_tokens: Output token count
-            total_tokens: Total token count (computed from input+output if 0/missing)
-        """
-        if not self._track_tokens:
-            return
-        for record in records:
-            source_id = str(record.get("source_run_id", ""))
-            if not source_id:
-                continue
-            if source_id in self._counted_external_source_ids:
-                continue
-
-            total_tk = record.get("total_tokens", 0) or 0
-            if total_tk <= 0:
-                input_tk = record.get("input_tokens", 0) or 0
-                output_tk = record.get("output_tokens", 0) or 0
-                total_tk = input_tk + output_tk
-            if total_tk <= 0:
-                continue
-
-            self._counted_external_source_ids.add(source_id)
-            self._total_input_tokens += record.get("input_tokens", 0) or 0
-            self._total_output_tokens += record.get("output_tokens", 0) or 0
-            self._total_tokens += total_tk
-
-            caller = str(record.get("caller", ""))
-            if caller.startswith("subagent:"):
-                self._subagent_tokens += total_tk
-            elif caller.startswith("middleware:"):
-                self._middleware_tokens += total_tk
-            else:
-                self._lead_agent_tokens += total_tk
-
    def set_first_human_message(self, content: str) -> None:
        """Record the first human message for convenience fields."""
        self._first_human_msg = content[:2000] if content else None
@@ -491,9 +376,6 @@ class RunJournal(BaseCallbackHandler):
            "total_output_tokens": self._total_output_tokens,
            "total_tokens": self._total_tokens,
            "llm_call_count": self._llm_call_count,
-            "lead_agent_tokens": self._lead_agent_tokens,
-            "subagent_tokens": self._subagent_tokens,
-            "middleware_tokens": self._middleware_tokens,
            "message_count": self._msg_count,
            "last_ai_message": self._last_ai_msg,
            "first_human_message": self._first_human_msg,
@@ -6,10 +6,9 @@ import asyncio
 import logging
 import uuid
 from dataclasses import dataclass, field
+from datetime import UTC, datetime
 from typing import TYPE_CHECKING

-from deerflow.utils.time import now_iso as _now_iso
-
 from .schemas import DisconnectMode, RunStatus

 if TYPE_CHECKING:
@@ -18,6 +17,10 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)


+def _now_iso() -> str:
+    return datetime.now(UTC).isoformat()
+
+
@dataclass
 class RunRecord:
    """Mutable record for a single run."""
@@ -36,7 +39,6 @@ class RunRecord:
    abort_event: asyncio.Event = field(default_factory=asyncio.Event, repr=False)
    abort_action: str = "interrupt"
    error: str | None = None
-    model_name: str | None = None


 class RunManager:
@@ -66,7 +68,6 @@ class RunManager:
                metadata=record.metadata or {},
                kwargs=record.kwargs or {},
                created_at=record.created_at,
-                model_name=record.model_name,
            )
        except Exception:
            logger.warning("Failed to persist run %s to store", record.run_id, exc_info=True)
@@ -139,18 +140,6 @@ class RunManager:
                logger.warning("Failed to persist status update for run %s", run_id, exc_info=True)
        logger.info("Run %s -> %s", run_id, status.value)

-    async def update_model_name(self, run_id: str, model_name: str | None) -> None:
-        """Update the model name for a run."""
-        async with self._lock:
-            record = self._runs.get(run_id)
-            if record is None:
-                logger.warning("update_model_name called for unknown run %s", run_id)
-                return
-            record.model_name = model_name
-            record.updated_at = _now_iso()
-        await self._persist_to_store(record)
-        logger.info("Run %s model_name=%s", run_id, model_name)
-
    async def cancel(self, run_id: str, *, action: str = "interrupt") -> bool:
        """Request cancellation of a run.

@@ -185,7 +174,6 @@ class RunManager:
        metadata: dict | None = None,
        kwargs: dict | None = None,
        multitask_strategy: str = "reject",
-        model_name: str | None = None,
    ) -> RunRecord:
        """Atomically check for inflight runs and create a new one.

@@ -236,7 +224,6 @@ class RunManager:
                kwargs=kwargs or {},
                created_at=now,
                updated_at=now,
-                model_name=model_name,
            )
            self._runs[run_id] = record

@@ -23,7 +23,6 @@ class RunStore(abc.ABC):
        thread_id: str,
        assistant_id: str | None = None,
        user_id: str | None = None,
-        model_name: str | None = None,
        status: str = "pending",
        multitask_strategy: str = "reject",
        metadata: dict[str, Any] | None = None,
@@ -22,7 +22,6 @@ class MemoryRunStore(RunStore):
        thread_id,
        assistant_id=None,
        user_id=None,
-        model_name=None,
        status="pending",
        multitask_strategy="reject",
        metadata=None,
@@ -36,7 +35,6 @@ class MemoryRunStore(RunStore):
            "thread_id": thread_id,
            "assistant_id": assistant_id,
            "user_id": user_id,
-            "model_name": model_name,
            "status": status,
            "multitask_strategy": multitask_strategy,
            "metadata": metadata or {},
@@ -23,8 +23,6 @@ from dataclasses import dataclass, field
 from functools import lru_cache
 from typing import TYPE_CHECKING, Any, Literal, cast

-from langgraph.checkpoint.base import empty_checkpoint
-
 if TYPE_CHECKING:
    from langchain_core.messages import HumanMessage

@@ -230,17 +228,6 @@ async def run_agent(
        else:
            agent = agent_factory(config=runnable_config)

-        # Capture the effective (resolved) model name from the agent's metadata.
-        # _resolve_model_name in agent.py may return the default model if the
-        # requested name is not in the allowlist — this update ensures the
-        # persisted model_name reflects the actual model used.
-        if record.model_name is not None:
-            resolved = getattr(agent, "metadata", {}) or {}
-            if isinstance(resolved, dict):
-                effective = resolved.get("model_name")
-                if effective and effective != record.model_name:
-                    await run_manager.update_model_name(record.run_id, effective)
-
        # 4. Attach checkpointer and store
        if checkpointer is not None:
            agent.checkpointer = checkpointer
@@ -455,12 +442,6 @@ async def _rollback_to_pre_run_checkpoint(
    if checkpoint_to_restore.get("id") is None:
        logger.warning("Run %s rollback skipped: pre-run checkpoint has no checkpoint id", run_id)
        return
-    restore_marker = _new_checkpoint_marker()
-    checkpoint_to_restore = {
-        **checkpoint_to_restore,
-        "id": restore_marker["id"],
-        "ts": restore_marker["ts"],
-    }
    metadata = pre_run_snapshot.get("metadata", {})
    metadata_to_restore = metadata if isinstance(metadata, dict) else {}
    raw_checkpoint_ns = pre_run_snapshot.get("checkpoint_ns")
@@ -512,11 +493,6 @@ async def _rollback_to_pre_run_checkpoint(
        )


-def _new_checkpoint_marker() -> dict[str, str]:
-    marker = empty_checkpoint()
-    return {"id": marker["id"], "ts": marker["ts"]}
-
-
 def _lg_mode_to_sse_event(mode: str) -> str:
    """Map LangGraph internal stream_mode name to SSE event name.

@@ -26,7 +26,7 @@ from collections.abc import Iterator

 from langgraph.store.base import BaseStore

-from deerflow.config.app_config import get_app_config
+from deerflow.config.app_config import AppConfig, get_app_config
 from deerflow.runtime.store._sqlite_utils import ensure_sqlite_parent_dir, resolve_sqlite_conn_str

 logger = logging.getLogger(__name__)
@@ -36,9 +36,7 @@ logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------

 SQLITE_STORE_INSTALL = "langgraph-checkpoint-sqlite is required for the SQLite store. Install it with: uv add langgraph-checkpoint-sqlite"
-POSTGRES_STORE_INSTALL = (
-    "langgraph-checkpoint-postgres is required for the PostgreSQL store. Install the package extra with: pip install 'deerflow-harness[postgres]' (or use: uv sync --all-packages --extra postgres when developing locally)"
-)
+POSTGRES_STORE_INSTALL = "langgraph-checkpoint-postgres is required for the PostgreSQL store. Install it with: uv add langgraph-checkpoint-postgres psycopg[binary] psycopg-pool"
 POSTGRES_CONN_REQUIRED = "checkpointer.connection_string is required for the postgres backend"

 # ---------------------------------------------------------------------------
@@ -100,9 +98,26 @@ def _sync_store_cm(config) -> Iterator[BaseStore]:

 _store: BaseStore | None = None
 _store_ctx = None  # open context manager keeping the connection alive
+_explicit_stores: dict[int, BaseStore] = {}
+_explicit_store_contexts: dict[int, object] = {}


-def get_store() -> BaseStore:
+def _default_in_memory_store() -> BaseStore:
+    from langgraph.store.memory import InMemoryStore
+
+    logger.warning("No 'checkpointer' section in config.yaml — using InMemoryStore for the store. Thread list will be lost on server restart. Configure a sqlite or postgres backend for persistence.")
+    return InMemoryStore()
+
+
+def _build_store_from_app_config(app_config: AppConfig) -> tuple[BaseStore, object | None]:
+    if app_config.checkpointer is not None:
+        ctx = _sync_store_cm(app_config.checkpointer)
+        return ctx.__enter__(), ctx
+
+    return _default_in_memory_store(), None
+
+
+def get_store(app_config: AppConfig | None = None) -> BaseStore:
    """Return the global sync Store singleton, creating it on first call.

    Returns an :class:`~langgraph.store.memory.InMemoryStore` when no
@@ -114,6 +129,18 @@ def get_store() -> BaseStore:
    """
    global _store, _store_ctx

+    if app_config is not None:
+        cache_key = id(app_config)
+        cached = _explicit_stores.get(cache_key)
+        if cached is not None:
+            return cached
+
+        explicit_store, explicit_ctx = _build_store_from_app_config(app_config)
+        _explicit_stores[cache_key] = explicit_store
+        if explicit_ctx is not None:
+            _explicit_store_contexts[cache_key] = explicit_ctx
+        return explicit_store
+
    if _store is not None:
        return _store

@@ -132,10 +159,7 @@ def get_store() -> BaseStore:
        config = get_checkpointer_config()

    if config is None:
-        from langgraph.store.memory import InMemoryStore
-
-        logger.warning("No 'checkpointer' section in config.yaml — using InMemoryStore for the store. Thread list will be lost on server restart. Configure a sqlite or postgres backend for persistence.")
-        _store = InMemoryStore()
+        _store = _default_in_memory_store()
        return _store

    _store_ctx = _sync_store_cm(config)
@@ -158,6 +182,18 @@ def reset_store() -> None:
        _store_ctx = None
    _store = None

+    for cache_key, ctx in list(_explicit_store_contexts.items()):
+        try:
+            ctx.__exit__(None, None, None)
+        except Exception:
+            logger.warning("Error during explicit store cleanup", exc_info=True)
+        finally:
+            _explicit_store_contexts.pop(cache_key, None)
+            _explicit_stores.pop(cache_key, None)
+
+    _explicit_stores.clear()
+    _explicit_store_contexts.clear()
+

 # ---------------------------------------------------------------------------
 # Sync context manager
@@ -165,7 +201,7 @@ def reset_store() -> None:


@contextlib.contextmanager
-def store_context() -> Iterator[BaseStore]:
+def store_context(app_config: AppConfig | None = None) -> Iterator[BaseStore]:
    """Sync context manager that yields a Store and cleans up on exit.

    Unlike :func:`get_store`, this does **not** cache the instance — each
@@ -178,13 +214,10 @@ def store_context() -> Iterator[BaseStore]:
    Yields an :class:`~langgraph.store.memory.InMemoryStore` when no
    checkpointer is configured in *config.yaml*.
    """
-    config = get_app_config()
-    if config.checkpointer is None:
-        from langgraph.store.memory import InMemoryStore
-
-        logger.warning("No 'checkpointer' section in config.yaml — using InMemoryStore for the store. Thread list will be lost on server restart. Configure a sqlite or postgres backend for persistence.")
-        yield InMemoryStore()
+    resolved_app_config = app_config or get_app_config()
+    if resolved_app_config.checkpointer is None:
+        yield _default_in_memory_store()
        return

-    with _sync_store_cm(config.checkpointer) as store:
+    with _sync_store_cm(resolved_app_config.checkpointer) as store:
        yield store
@@ -109,34 +109,6 @@ def get_effective_user_id() -> str:
    return str(user.id)


-def resolve_runtime_user_id(runtime: object | None) -> str:
-    """Single source of truth for a tool/middleware's effective user_id.
-
-    Resolution order (most authoritative first):
-      1. ``runtime.context["user_id"]`` — set by ``inject_authenticated_user_context``
-         in the gateway from the auth-validated ``request.state.user``. This is
-         the only source that survives boundaries where the contextvar may have
-         been lost (background tasks scheduled outside the request task,
-         worker pools that don't copy_context, future cross-process drivers).
-      2. The ``_current_user`` ContextVar — set by the auth middleware at
-         request entry. Reliable for in-task work; copied by ``asyncio``
-         child tasks and by ``ContextThreadPoolExecutor``.
-      3. ``DEFAULT_USER_ID`` — last-resort fallback so unauthenticated
-         CLI / migration / test paths keep working without raising.
-
-    Tools that persist user-scoped state (custom agents, memory, uploads)
-    MUST call this instead of ``get_effective_user_id()`` directly so they
-    benefit from the runtime.context channel that ``setup_agent`` already
-    relies on.
-    """
-    context = getattr(runtime, "context", None)
-    if isinstance(context, dict):
-        ctx_user_id = context.get("user_id")
-        if ctx_user_id:
-            return str(ctx_user_id)
-    return get_effective_user_id()
-
-
 # ---------------------------------------------------------------------------
 # Sentinel-based user_id resolution
 # ---------------------------------------------------------------------------
@@ -42,13 +42,6 @@ class LocalSandbox(Sandbox):
        """Return whether the selected shell is cmd.exe."""
        return LocalSandbox._shell_name(shell) in {"cmd", "cmd.exe"}

-    @staticmethod
-    def _is_msys_shell(shell: str) -> bool:
-        """Return whether the selected shell is a Git Bash/MSYS shell."""
-        normalized = shell.replace("\\", "/").lower()
-        shell_name = LocalSandbox._shell_name(shell)
-        return shell_name in {"sh.exe", "bash.exe"} and any(part in normalized for part in ("/git/", "/mingw", "/msys"))
-
    @staticmethod
    def _find_first_available_shell(candidates: tuple[str, ...]) -> str | None:
        """Return the first executable shell path or command found from candidates."""
@@ -310,19 +303,12 @@ class LocalSandbox(Sandbox):
        shell = self._get_shell()

        if os.name == "nt":
-            env = None
            if self._is_powershell(shell):
                args = [shell, "-NoProfile", "-Command", resolved_command]
            elif self._is_cmd_shell(shell):
                args = [shell, "/c", resolved_command]
            else:
                args = [shell, "-c", resolved_command]
-                if self._is_msys_shell(shell):
-                    env = {
-                        **os.environ,
-                        "MSYS_NO_PATHCONV": "1",
-                        "MSYS2_ARG_CONV_EXCL": "*",
-                    }

            result = subprocess.run(
                args,
@@ -330,7 +316,6 @@ class LocalSandbox(Sandbox):
                capture_output=True,
                text=True,
                timeout=600,
-                env=env,
            )
        else:
            args = [shell, "-c", resolved_command]
@@ -119,13 +119,3 @@ class LocalSandboxProvider(SandboxProvider):
        # For Docker-based providers (e.g., AioSandboxProvider), cleanup
        # happens at application shutdown via the shutdown() method.
        pass
-
-    def reset(self) -> None:
-        # reset_sandbox_provider() must also clear the module singleton.
-        global _singleton
-        _singleton = None
-
-    def shutdown(self) -> None:
-        # LocalSandboxProvider has no extra resources beyond the shared
-        # singleton, so shutdown uses the same cleanup path as reset.
-        self.reset()
@@ -37,10 +37,6 @@ class SandboxProvider(ABC):
        """
        pass

-    def reset(self) -> None:
-        """Clear cached state that survives provider instance replacement."""
-        pass
-

 _default_sandbox_provider: SandboxProvider | None = None

@@ -69,18 +65,11 @@ def reset_sandbox_provider() -> None:
    The next call to `get_sandbox_provider()` will create a new instance.
    Useful for testing or when switching configurations.

-    Providers can override `reset()` to clear any module-level state they keep
-    alive across instances (for example, `LocalSandboxProvider`'s cached
-    `LocalSandbox` singleton). Without it, config/mount changes would not take
-    effect on the next acquire().
-
    Note: If the provider has active sandboxes, they will be orphaned.
    Use `shutdown_sandbox_provider()` for proper cleanup.
    """
    global _default_sandbox_provider
-    if _default_sandbox_provider is not None:
-        _default_sandbox_provider.reset()
-        _default_sandbox_provider = None
+    _default_sandbox_provider = None


 def shutdown_sandbox_provider() -> None:
@@ -3,9 +3,10 @@ import re
 import shlex
 from pathlib import Path

-from langchain.tools import tool
+from langchain.tools import ToolRuntime, tool
+from langgraph.typing import ContextT

-from deerflow.agents.thread_state import ThreadDataState
+from deerflow.agents.thread_state import ThreadDataState, ThreadState
 from deerflow.config import get_app_config
 from deerflow.config.paths import VIRTUAL_PATH_PREFIX
 from deerflow.sandbox.exceptions import (
@@ -18,7 +19,6 @@ from deerflow.sandbox.sandbox import Sandbox
 from deerflow.sandbox.sandbox_provider import get_sandbox_provider
 from deerflow.sandbox.search import GrepMatch
 from deerflow.sandbox.security import LOCAL_HOST_BASH_DISABLED_MESSAGE, is_host_bash_allowed
-from deerflow.tools.types import Runtime

 _ABSOLUTE_PATH_PATTERN = re.compile(r"(?<![:\w])(?<!:/)/(?:[^\s\"'`;&|<>()]+)")
 _FILE_URL_PATTERN = re.compile(r"\bfile://\S+", re.IGNORECASE)
@@ -419,7 +419,7 @@ def _join_path_preserving_style(base: str, relative: str) -> str:
    return f"{stripped_base}{separator}{normalized_relative}"


-def _sanitize_error(error: Exception, runtime: Runtime | None = None) -> str:
+def _sanitize_error(error: Exception, runtime: "ToolRuntime[ContextT, ThreadState] | None" = None) -> str:
    """Sanitize an error message to avoid leaking host filesystem paths.

    In local-sandbox mode, resolved host paths in the error string are masked
@@ -994,7 +994,7 @@ def _apply_cwd_prefix(command: str, thread_data: ThreadDataState | None) -> str:
    return command


-def get_thread_data(runtime: Runtime | None) -> ThreadDataState | None:
+def get_thread_data(runtime: ToolRuntime[ContextT, ThreadState] | None) -> ThreadDataState | None:
    """Extract thread_data from runtime state."""
    if runtime is None:
        return None
@@ -1003,7 +1003,7 @@ def get_thread_data(runtime: Runtime | None) -> ThreadDataState | None:
    return runtime.state.get("thread_data")


-def is_local_sandbox(runtime: Runtime | None) -> bool:
+def is_local_sandbox(runtime: ToolRuntime[ContextT, ThreadState] | None) -> bool:
    """Check if the current sandbox is a local sandbox.

    Path replacement is only needed for local sandbox since aio sandbox
@@ -1019,7 +1019,7 @@ def is_local_sandbox(runtime: Runtime | None) -> bool:
    return sandbox_state.get("sandbox_id") == "local"


-def sandbox_from_runtime(runtime: Runtime | None = None) -> Sandbox:
+def sandbox_from_runtime(runtime: ToolRuntime[ContextT, ThreadState] | None = None) -> Sandbox:
    """Extract sandbox instance from tool runtime.

    DEPRECATED: Use ensure_sandbox_initialized() for lazy initialization support.
@@ -1048,7 +1048,7 @@ def sandbox_from_runtime(runtime: Runtime | None = None) -> Sandbox:
    return sandbox


-def ensure_sandbox_initialized(runtime: Runtime | None = None) -> Sandbox:
+def ensure_sandbox_initialized(runtime: ToolRuntime[ContextT, ThreadState] | None = None) -> Sandbox:
    """Ensure sandbox is initialized, acquiring lazily if needed.

    On first call, acquires a sandbox from the provider and stores it in runtime state.
@@ -1107,7 +1107,7 @@ def ensure_sandbox_initialized(runtime: Runtime | None = None) -> Sandbox:
    return sandbox


-def ensure_thread_directories_exist(runtime: Runtime | None) -> None:
+def ensure_thread_directories_exist(runtime: ToolRuntime[ContextT, ThreadState] | None) -> None:
    """Ensure thread data directories (workspace, uploads, outputs) exist.

    This function is called lazily when any sandbox tool is first used.
@@ -1221,7 +1221,7 @@ def _truncate_ls_output(output: str, max_chars: int) -> str:


@tool("bash", parse_docstring=True)
-def bash_tool(runtime: Runtime, description: str, command: str) -> str:
+def bash_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, command: str) -> str:
    """Execute a bash command in a Linux environment.


@@ -1270,7 +1270,7 @@ def bash_tool(runtime: Runtime, description: str, command: str) -> str:


@tool("ls", parse_docstring=True)
-def ls_tool(runtime: Runtime, description: str, path: str) -> str:
+def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path: str) -> str:
    """List the contents of a directory up to 2 levels deep in tree format.

    Args:
@@ -1318,7 +1318,7 @@ def ls_tool(runtime: Runtime, description: str, path: str) -> str:

@tool("glob", parse_docstring=True)
 def glob_tool(
-    runtime: Runtime,
+    runtime: ToolRuntime[ContextT, ThreadState],
    description: str,
    pattern: str,
    path: str,
@@ -1368,7 +1368,7 @@ def glob_tool(

@tool("grep", parse_docstring=True)
 def grep_tool(
-    runtime: Runtime,
+    runtime: ToolRuntime[ContextT, ThreadState],
    description: str,
    pattern: str,
    path: str,
@@ -1438,7 +1438,7 @@ def grep_tool(

@tool("read_file", parse_docstring=True)
 def read_file_tool(
-    runtime: Runtime,
+    runtime: ToolRuntime[ContextT, ThreadState],
    description: str,
    path: str,
    start_line: int | None = None,
@@ -1493,19 +1493,18 @@ def read_file_tool(

@tool("write_file", parse_docstring=True)
 def write_file_tool(
-    runtime: Runtime,
+    runtime: ToolRuntime[ContextT, ThreadState],
    description: str,
    path: str,
    content: str,
    append: bool = False,
 ) -> str:
-    """Write text content to a file. By default this overwrites the target file; set append to true to add content to the end without replacing existing content.
+    """Write text content to a file.

    Args:
        description: Explain why you are writing to this file in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
        path: The **absolute** path to the file to write to. ALWAYS PROVIDE THIS PARAMETER SECOND.
        content: The content to write to the file. ALWAYS PROVIDE THIS PARAMETER THIRD.
-        append: Whether to append content to the end of the file instead of overwriting it. Defaults to false.
    """
    try:
        sandbox = ensure_sandbox_initialized(runtime)
@@ -1534,7 +1533,7 @@ def write_file_tool(

@tool("str_replace", parse_docstring=True)
 def str_replace_tool(
-    runtime: Runtime,
+    runtime: ToolRuntime[ContextT, ThreadState],
    description: str,
    path: str,
    old_str: str,
@@ -9,29 +9,6 @@ from .types import SKILL_MD_FILE, Skill, SkillCategory
 logger = logging.getLogger(__name__)


-def parse_allowed_tools(raw: object, skill_file: Path) -> list[str] | None:
-    """Parse the optional allowed-tools frontmatter field.
-
-    Returns None when the field is omitted. Returns a list when the field is a
-    YAML sequence of strings, including an empty list for explicit no-tool
-    skills. Raises ValueError for malformed values.
-    """
-    if raw is None:
-        return None
-    if not isinstance(raw, list):
-        raise ValueError(f"allowed-tools in {skill_file} must be a list of strings")
-
-    allowed_tools: list[str] = []
-    for item in raw:
-        if not isinstance(item, str):
-            raise ValueError(f"allowed-tools in {skill_file} must contain only strings")
-        tool_name = item.strip()
-        if not tool_name:
-            raise ValueError(f"allowed-tools in {skill_file} cannot contain empty tool names")
-        allowed_tools.append(tool_name)
-    return allowed_tools
-
-
 def parse_skill_file(skill_file: Path, category: SkillCategory, relative_path: Path | None = None) -> Skill | None:
    """Parse a SKILL.md file and extract metadata.

@@ -87,12 +64,6 @@ def parse_skill_file(skill_file: Path, category: SkillCategory, relative_path: P
        if license_text is not None:
            license_text = str(license_text).strip() or None

-        try:
-            allowed_tools = parse_allowed_tools(metadata.get("allowed-tools"), skill_file)
-        except ValueError as exc:
-            logger.error("Invalid allowed-tools in %s: %s", skill_file, exc)
-            return None
-
        return Skill(
            name=name,
            description=description,
@@ -101,7 +72,6 @@ def parse_skill_file(skill_file: Path, category: SkillCategory, relative_path: P
            skill_file=skill_file,
            relative_path=relative_path or Path(skill_file.parent.name),
            category=category,
-            allowed_tools=allowed_tools,
            enabled=True,  # Actual state comes from the extensions config file.
        )

@@ -1,44 +0,0 @@
-import logging
-from typing import Protocol
-
-from deerflow.skills.types import Skill
-
-logger = logging.getLogger(__name__)
-
-
-class NamedTool(Protocol):
-    name: str
-
-
-def allowed_tool_names_for_skills(skills: list[Skill]) -> set[str] | None:
-    """Return the union of explicit skill allowed-tools declarations.
-
-    None means legacy allow-all behavior. It is returned only when no loaded
-    skill declares allowed-tools. Once any skill declares the field, legacy
-    skills without the field contribute no tools instead of disabling the
-    explicit restrictions from other skills.
-    """
-    if not skills:
-        return None
-
-    allowed: set[str] = set()
-    has_explicit_declaration = False
-    for skill in skills:
-        if skill.allowed_tools is None:
-            continue
-        has_explicit_declaration = True
-        if not skill.allowed_tools:
-            logger.info("Skill %s declared empty allowed-tools", skill.name)
-        allowed.update(skill.allowed_tools)
-
-    if not has_explicit_declaration:
-        return None
-    return allowed
-
-
-def filter_tools_by_skill_allowed_tools[ToolT: NamedTool](tools: list[ToolT], skills: list[Skill]) -> list[ToolT]:
-    allowed = allowed_tool_names_for_skills(skills)
-    if allowed is None:
-        return tools
-
-    return [tool for tool in tools if tool.name in allowed]
@@ -27,7 +27,6 @@ class Skill:
    skill_file: Path
    relative_path: Path  # Relative path from category root to skill directory
    category: SkillCategory  # 'public' or 'custom'
-    allowed_tools: list[str] | None = None
    enabled: bool = False  # Whether this skill is enabled

    @property
@@ -8,7 +8,6 @@ from pathlib import Path

 import yaml

-from deerflow.skills.parser import parse_allowed_tools
 from deerflow.skills.types import SKILL_MD_FILE

 # Allowed properties in SKILL.md frontmatter
@@ -85,9 +84,4 @@ def _validate_skill_frontmatter(skill_dir: Path) -> tuple[bool, str, str | None]
        if len(description) > 1024:
            return False, f"Description is too long ({len(description)} characters). Maximum is 1024 characters.", None

-    try:
-        parse_allowed_tools(frontmatter.get("allowed-tools"), skill_md)
-    except ValueError as e:
-        return False, str(e).replace(str(skill_md), SKILL_MD_FILE), None
-
    return True, "Skill is valid!", name
@@ -26,7 +26,7 @@ class SubagentConfig:

    name: str
    description: str
-    system_prompt: str | None = None
+    system_prompt: str
    tools: list[str] | None = None
    disallowed_tools: list[str] | None = field(default_factory=lambda: ["task"])
    skills: list[str] | None = None
@@ -23,10 +23,7 @@ from deerflow.agents.thread_state import SandboxState, ThreadDataState, ThreadSt
 from deerflow.config import get_app_config
 from deerflow.config.app_config import AppConfig
 from deerflow.models import create_chat_model
-from deerflow.skills.tool_policy import filter_tools_by_skill_allowed_tools
-from deerflow.skills.types import Skill
 from deerflow.subagents.config import SubagentConfig, resolve_subagent_model_name
-from deerflow.subagents.token_collector import SubagentTokenCollector

 logger = logging.getLogger(__name__)

@@ -71,8 +68,6 @@ class SubagentResult:
    started_at: datetime | None = None
    completed_at: datetime | None = None
    ai_messages: list[dict[str, Any]] | None = None
-    token_usage_records: list[dict[str, int | str]] = field(default_factory=list)
-    usage_reported: bool = False
    cancel_event: threading.Event = field(default_factory=threading.Event, repr=False)

    def __post_init__(self):
@@ -265,16 +260,16 @@ class SubagentExecutor:
        # Generate trace_id if not provided (for top-level calls)
        self.trace_id = trace_id or str(uuid.uuid4())[:8]

-        self._base_tools = _filter_tools(
+        # Filter tools based on config
+        self.tools = _filter_tools(
            tools,
            config.tools,
            config.disallowed_tools,
        )
-        self.tools = self._base_tools

        logger.info(f"[trace={self.trace_id}] SubagentExecutor initialized: {config.name} with {len(self.tools)} tools")

-    def _create_agent(self, tools: list[BaseTool] | None = None):
+    def _create_agent(self):
        """Create the agent instance."""
        app_config = self.app_config or get_app_config()
        if self.model_name is None:
@@ -286,48 +281,15 @@ class SubagentExecutor:
        # Reuse shared middleware composition with lead agent.
        middlewares = build_subagent_runtime_middlewares(app_config=app_config, model_name=self.model_name, lazy_init=True)

-        # system_prompt is included in initial state messages (see _build_initial_state)
-        # to avoid multiple SystemMessages which some LLM APIs don't support.
        return create_agent(
            model=model,
-            tools=tools if tools is not None else self.tools,
+            tools=self.tools,
            middleware=middlewares,
-            system_prompt=None,
+            system_prompt=self.config.system_prompt,
            state_schema=ThreadState,
        )

-    async def _load_skills(self) -> list[Skill]:
-        """Load enabled skill metadata based on config.skills."""
-        if self.config.skills is not None and len(self.config.skills) == 0:
-            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} skills=[] — skipping skill loading")
-            return []
-
-        try:
-            from deerflow.skills.storage import get_or_new_skill_storage
-
-            storage_kwargs = {"app_config": self.app_config} if self.app_config is not None else {}
-            storage = await asyncio.to_thread(get_or_new_skill_storage, **storage_kwargs)
-            # Use asyncio.to_thread to avoid blocking the event loop (LangGraph ASGI requirement)
-            all_skills = await asyncio.to_thread(storage.load_skills, enabled_only=True)
-            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} loaded {len(all_skills)} enabled skills from disk")
-        except Exception:
-            logger.exception(f"[trace={self.trace_id}] Failed to load skills for subagent {self.config.name}")
-            raise
-
-        if not all_skills:
-            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} no enabled skills found")
-            return []
-
-        # Filter by config.skills whitelist
-        if self.config.skills is not None:
-            allowed = set(self.config.skills)
-            return [s for s in all_skills if s.name in allowed]
-        return all_skills
-
-    def _apply_skill_allowed_tools(self, skills: list[Skill]) -> list[BaseTool]:
-        return filter_tools_by_skill_allowed_tools(self._base_tools, skills)
-
-    async def _load_skill_messages(self, skills: list[Skill]) -> list[SystemMessage]:
+    async def _load_skill_messages(self) -> list[SystemMessage]:
        """Load skill content as conversation items based on config.skills.

        Aligned with Codex's pattern: each subagent loads its own skills
@@ -341,6 +303,33 @@ class SubagentExecutor:
        Returns:
            List of SystemMessages containing skill content.
        """
+        if self.config.skills is not None and len(self.config.skills) == 0:
+            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} skills=[] — skipping skill loading")
+            return []
+
+        try:
+            from deerflow.skills.storage import get_or_new_skill_storage
+
+            storage_kwargs = {"app_config": self.app_config} if self.app_config is not None else {}
+            storage = await asyncio.to_thread(get_or_new_skill_storage, **storage_kwargs)
+            # Use asyncio.to_thread to avoid blocking the event loop (LangGraph ASGI requirement)
+            all_skills = await asyncio.to_thread(storage.load_skills, enabled_only=True)
+            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} loaded {len(all_skills)} enabled skills from disk")
+        except Exception:
+            logger.warning(f"[trace={self.trace_id}] Failed to load skills for subagent {self.config.name}", exc_info=True)
+            return []
+
+        if not all_skills:
+            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} no enabled skills found")
+            return []
+
+        # Filter by config.skills whitelist
+        if self.config.skills is not None:
+            allowed = set(self.config.skills)
+            skills = [s for s in all_skills if s.name in allowed]
+        else:
+            skills = all_skills
+
        if not skills:
            return []

@@ -358,34 +347,21 @@ class SubagentExecutor:

        return messages

-    async def _build_initial_state(self, task: str) -> tuple[dict[str, Any], list[BaseTool]]:
+    async def _build_initial_state(self, task: str) -> dict[str, Any]:
        """Build the initial state for agent execution.

        Args:
            task: The task description.

        Returns:
-            Initial state dictionary and tools filtered by loaded skill metadata.
+            Initial state dictionary.
        """
-
        # Load skills as conversation items (Codex pattern)
-        skills = await self._load_skills()
-        filtered_tools = self._apply_skill_allowed_tools(skills)
-        skill_messages = await self._load_skill_messages(skills)
-
-        # Combine system_prompt and skills into a single SystemMessage.
-        # Some LLM APIs reject multiple SystemMessages with
-        # "System message must be at the beginning."
-        system_parts: list[str] = []
-        if self.config.system_prompt:
-            system_parts.append(self.config.system_prompt)
-        for skill_msg in skill_messages:
-            system_parts.append(skill_msg.content)
-
-        messages: list[Any] = []
-        if system_parts:
-            messages.append(SystemMessage(content="\n\n".join(system_parts)))
+        skill_messages = await self._load_skill_messages()

+        messages: list = []
+        # Skill content injected as developer/system messages before the task
+        messages.extend(skill_messages)
        # Then the actual task
        messages.append(HumanMessage(content=task))

@@ -399,7 +375,7 @@ class SubagentExecutor:
        if self.thread_data is not None:
            state["thread_data"] = self.thread_data

-        return state, filtered_tools
+        return state

    async def _aexecute(self, task: str, result_holder: SubagentResult | None = None) -> SubagentResult:
        """Execute a task asynchronously.
@@ -428,20 +404,13 @@ class SubagentExecutor:
            ai_messages = []
            result.ai_messages = ai_messages

-        collector: SubagentTokenCollector | None = None
        try:
-            state, filtered_tools = await self._build_initial_state(task)
-            agent = self._create_agent(filtered_tools)
-
-            # Token collector for subagent LLM calls
-            collector_caller = f"subagent:{self.config.name}"
-            collector = SubagentTokenCollector(caller=collector_caller)
+            agent = self._create_agent()
+            state = await self._build_initial_state(task)

            # Build config with thread_id for sandbox access and recursion limit
            run_config: RunnableConfig = {
                "recursion_limit": self.config.max_turns,
-                "callbacks": [collector],
-                "tags": [collector_caller],
            }
            context: dict[str, Any] = {}
            if self.thread_id:
@@ -464,8 +433,6 @@ class SubagentExecutor:
                        result.status = SubagentStatus.CANCELLED
                        result.error = "Cancelled by user"
                        result.completed_at = datetime.now()
-                if collector is not None:
-                    result.token_usage_records = collector.snapshot_records()
                return result

            async for chunk in agent.astream(state, config=run_config, context=context, stream_mode="values"):  # type: ignore[arg-type]
@@ -480,7 +447,6 @@ class SubagentExecutor:
                            result.status = SubagentStatus.CANCELLED
                            result.error = "Cancelled by user"
                            result.completed_at = datetime.now()
-                    result.token_usage_records = collector.snapshot_records()
                    return result

                final_state = chunk
@@ -507,7 +473,6 @@ class SubagentExecutor:
                            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} captured AI message #{len(ai_messages)}")

            logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} completed async execution")
-            result.token_usage_records = collector.snapshot_records()

            if final_state is None:
                logger.warning(f"[trace={self.trace_id}] Subagent {self.config.name} no final state")
@@ -587,8 +552,6 @@ class SubagentExecutor:
            result.status = SubagentStatus.FAILED
            result.error = str(e)
            result.completed_at = datetime.now()
-            if collector is not None:
-                result.token_usage_records = collector.snapshot_records()

        return result

@@ -1,63 +0,0 @@
-"""Callback handler that collects LLM token usage within a subagent.
-
-Each subagent execution creates its own collector. After the subagent
-finishes, the collected records are transferred to the parent RunJournal
-via :meth:`RunJournal.record_external_llm_usage_records`.
-"""
-
-from __future__ import annotations
-
-from typing import Any
-
-from langchain_core.callbacks import BaseCallbackHandler
-
-
-class SubagentTokenCollector(BaseCallbackHandler):
-    """Lightweight callback handler that collects LLM token usage within a subagent."""
-
-    def __init__(self, caller: str):
-        super().__init__()
-        self.caller = caller
-        self._records: list[dict[str, int | str]] = []
-        self._counted_run_ids: set[str] = set()
-
-    def on_llm_end(
-        self,
-        response: Any,
-        *,
-        run_id: Any,
-        tags: list[str] | None = None,
-        **kwargs: Any,
-    ) -> None:
-        rid = str(run_id)
-        if rid in self._counted_run_ids:
-            return
-
-        for generation in response.generations:
-            for gen in generation:
-                if not hasattr(gen, "message"):
-                    continue
-                usage = getattr(gen.message, "usage_metadata", None)
-                usage_dict = dict(usage) if usage else {}
-                input_tk = usage_dict.get("input_tokens", 0) or 0
-                output_tk = usage_dict.get("output_tokens", 0) or 0
-                total_tk = usage_dict.get("total_tokens", 0) or 0
-                if total_tk <= 0:
-                    total_tk = input_tk + output_tk
-                if total_tk <= 0:
-                    continue
-                self._counted_run_ids.add(rid)
-                self._records.append(
-                    {
-                        "source_run_id": rid,
-                        "caller": self.caller,
-                        "input_tokens": input_tk,
-                        "output_tokens": output_tk,
-                        "total_tokens": total_tk,
-                    }
-                )
-                return
-
-    def snapshot_records(self) -> list[dict[str, int | str]]:
-        """Return a copy of the accumulated usage records."""
-        return list(self._records)
@@ -2,12 +2,10 @@ from .clarification_tool import ask_clarification_tool
 from .present_file_tool import present_file_tool
 from .setup_agent_tool import setup_agent
 from .task_tool import task_tool
-from .update_agent_tool import update_agent
 from .view_image_tool import view_image_tool

 __all__ = [
    "setup_agent",
-    "update_agent",
    "present_file_tool",
    "ask_clarification_tool",
    "view_image_tool",
@@ -1,19 +1,20 @@
 from pathlib import Path
 from typing import Annotated

-from langchain.tools import InjectedToolCallId, tool
+from langchain.tools import InjectedToolCallId, ToolRuntime, tool
 from langchain_core.messages import ToolMessage
 from langgraph.config import get_config
 from langgraph.types import Command
+from langgraph.typing import ContextT

+from deerflow.agents.thread_state import ThreadState
 from deerflow.config.paths import VIRTUAL_PATH_PREFIX, get_paths
 from deerflow.runtime.user_context import get_effective_user_id
-from deerflow.tools.types import Runtime

 OUTPUTS_VIRTUAL_PREFIX = f"{VIRTUAL_PATH_PREFIX}/outputs"


-def _get_thread_id(runtime: Runtime) -> str | None:
+def _get_thread_id(runtime: ToolRuntime[ContextT, ThreadState]) -> str | None:
    """Resolve the current thread id from runtime context or RunnableConfig."""
    thread_id = runtime.context.get("thread_id") if runtime.context else None
    if thread_id:
@@ -31,7 +32,7 @@ def _get_thread_id(runtime: Runtime) -> str | None:


 def _normalize_presented_filepath(
-    runtime: Runtime,
+    runtime: ToolRuntime[ContextT, ThreadState],
    filepath: str,
 ) -> str:
    """Normalize a presented file path to the `/mnt/user-data/outputs/*` contract.
@@ -82,7 +83,7 @@ def _normalize_presented_filepath(

@tool("present_files", parse_docstring=True)
 def present_file_tool(
-    runtime: Runtime,
+    runtime: ToolRuntime[ContextT, ThreadState],
    filepaths: list[str],
    tool_call_id: Annotated[str, InjectedToolCallId],
 ) -> Command:
@@ -3,21 +3,20 @@ import logging
 import yaml
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool
+from langgraph.prebuilt import ToolRuntime
 from langgraph.types import Command

 from deerflow.config.agents_config import validate_agent_name
 from deerflow.config.paths import get_paths
-from deerflow.runtime.user_context import resolve_runtime_user_id
-from deerflow.tools.types import Runtime

 logger = logging.getLogger(__name__)


-@tool(parse_docstring=True)
+@tool
 def setup_agent(
    soul: str,
    description: str,
-    runtime: Runtime,
+    runtime: ToolRuntime,
    skills: list[str] | None = None,
 ) -> Command:
    """Setup the custom DeerFlow agent.
@@ -35,14 +34,7 @@ def setup_agent(
    try:
        agent_name = validate_agent_name(agent_name)
        paths = get_paths()
-        if agent_name:
-            # Custom agents are persisted under the current user's bucket so
-            # different users do not see each other's agents.
-            user_id = resolve_runtime_user_id(runtime)
-            agent_dir = paths.user_agent_dir(user_id, agent_name)
-        else:
-            # Default agent (no agent_name): SOUL.md lives at the global base dir.
-            agent_dir = paths.base_dir
+        agent_dir = paths.agent_dir(agent_name) if agent_name else paths.base_dir
        is_new_dir = not agent_dir.exists()
        agent_dir.mkdir(parents=True, exist_ok=True)

@@ -6,9 +6,11 @@ import uuid
 from dataclasses import replace
 from typing import TYPE_CHECKING, Annotated, Any, cast

-from langchain.tools import InjectedToolCallId, tool
+from langchain.tools import InjectedToolCallId, ToolRuntime, tool
 from langgraph.config import get_stream_writer
+from langgraph.typing import ContextT

+from deerflow.agents.thread_state import ThreadState
 from deerflow.config import get_app_config
 from deerflow.sandbox.security import LOCAL_BASH_SUBAGENT_DISABLED_MESSAGE, is_host_bash_allowed
 from deerflow.subagents import SubagentExecutor, get_available_subagent_names, get_subagent_config
@@ -19,7 +21,6 @@ from deerflow.subagents.executor import (
    get_background_task_result,
    request_cancel_background_task,
 )
-from deerflow.tools.types import Runtime

 if TYPE_CHECKING:
    from deerflow.config.app_config import AppConfig
@@ -27,92 +28,6 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)


-def _is_subagent_terminal(result: Any) -> bool:
-    """Return whether a background subagent result is safe to clean up."""
-    return result.status in {SubagentStatus.COMPLETED, SubagentStatus.FAILED, SubagentStatus.CANCELLED, SubagentStatus.TIMED_OUT} or getattr(result, "completed_at", None) is not None
-
-
-async def _await_subagent_terminal(task_id: str, max_polls: int) -> Any | None:
-    """Poll until the background subagent reaches a terminal status or we run out of polls."""
-    for _ in range(max_polls):
-        result = get_background_task_result(task_id)
-        if result is None:
-            return None
-        if _is_subagent_terminal(result):
-            return result
-        await asyncio.sleep(5)
-    return None
-
-
-async def _deferred_cleanup_subagent_task(task_id: str, trace_id: str, max_polls: int) -> None:
-    """Keep polling a cancelled subagent until it can be safely removed."""
-    cleanup_poll_count = 0
-    while True:
-        result = get_background_task_result(task_id)
-        if result is None:
-            return
-        if _is_subagent_terminal(result):
-            cleanup_background_task(task_id)
-            return
-        if cleanup_poll_count >= max_polls:
-            logger.warning(f"[trace={trace_id}] Deferred cleanup for task {task_id} timed out after {cleanup_poll_count} polls")
-            return
-        await asyncio.sleep(5)
-        cleanup_poll_count += 1
-
-
-def _log_cleanup_failure(cleanup_task: asyncio.Task[None], *, trace_id: str, task_id: str) -> None:
-    if cleanup_task.cancelled():
-        return
-
-    exc = cleanup_task.exception()
-    if exc is not None:
-        logger.error(f"[trace={trace_id}] Deferred cleanup failed for task {task_id}: {exc}")
-
-
-def _schedule_deferred_subagent_cleanup(task_id: str, trace_id: str, max_polls: int) -> None:
-    logger.debug(f"[trace={trace_id}] Scheduling deferred cleanup for cancelled task {task_id}")
-    cleanup_task = asyncio.create_task(_deferred_cleanup_subagent_task(task_id, trace_id, max_polls))
-    cleanup_task.add_done_callback(lambda task: _log_cleanup_failure(task, trace_id=trace_id, task_id=task_id))
-
-
-def _find_usage_recorder(runtime: Any) -> Any | None:
-    """Find a callback handler with ``record_external_llm_usage_records`` in the runtime config."""
-    if runtime is None:
-        return None
-    config = getattr(runtime, "config", None)
-    if not isinstance(config, dict):
-        return None
-    callbacks = config.get("callbacks", [])
-    if not callbacks:
-        return None
-    for cb in callbacks:
-        if hasattr(cb, "record_external_llm_usage_records"):
-            return cb
-    return None
-
-
-def _report_subagent_usage(runtime: Any, result: Any) -> None:
-    """Report subagent token usage to the parent RunJournal, if available.
-
-    Each subagent task must be reported only once (guarded by usage_reported).
-    """
-    if getattr(result, "usage_reported", True):
-        return
-    records = getattr(result, "token_usage_records", None) or []
-    if not records:
-        return
-    journal = _find_usage_recorder(runtime)
-    if journal is None:
-        logger.debug("No usage recorder found in runtime callbacks — subagent token usage not recorded")
-        return
-    try:
-        journal.record_external_llm_usage_records(records)
-        result.usage_reported = True
-    except Exception:
-        logger.warning("Failed to report subagent token usage", exc_info=True)
-
-
 def _get_runtime_app_config(runtime: Any) -> "AppConfig | None":
    context = getattr(runtime, "context", None)
    if isinstance(context, dict):
@@ -135,11 +50,12 @@ def _merge_skill_allowlists(parent: list[str] | None, child: list[str] | None) -

@tool("task", parse_docstring=True)
 async def task_tool(
-    runtime: Runtime,
+    runtime: ToolRuntime[ContextT, ThreadState],
    description: str,
    prompt: str,
    subagent_type: str,
    tool_call_id: Annotated[str, InjectedToolCallId],
+    max_turns: int | None = None,
 ) -> str:
    """Delegate a task to a specialized subagent that runs in its own context.

@@ -175,6 +91,7 @@ async def task_tool(
        description: A short (3-5 word) description of the task for logging/display. ALWAYS PROVIDE THIS PARAMETER FIRST.
        prompt: The task description for the subagent. Be specific and clear about what needs to be done. ALWAYS PROVIDE THIS PARAMETER SECOND.
        subagent_type: The type of subagent to use. ALWAYS PROVIDE THIS PARAMETER THIRD.
+        max_turns: Optional maximum number of agent turns. Defaults to subagent's configured max.
    """
    runtime_app_config = _get_runtime_app_config(runtime)
    available_subagent_names = get_available_subagent_names(app_config=runtime_app_config) if runtime_app_config is not None else get_available_subagent_names()
@@ -196,6 +113,9 @@ async def task_tool(
    # each subagent loads its own skills based on config, injected as conversation items).
    # No longer appended to system_prompt here.

+    if max_turns is not None:
+        overrides["max_turns"] = max_turns
+
    # Extract parent context from runtime
    sandbox_state = None
    thread_data = None
@@ -313,25 +233,21 @@ async def task_tool(

            # Check if task completed, failed, or timed out
            if result.status == SubagentStatus.COMPLETED:
-                _report_subagent_usage(runtime, result)
                writer({"type": "task_completed", "task_id": task_id, "result": result.result})
                logger.info(f"[trace={trace_id}] Task {task_id} completed after {poll_count} polls")
                cleanup_background_task(task_id)
                return f"Task Succeeded. Result: {result.result}"
            elif result.status == SubagentStatus.FAILED:
-                _report_subagent_usage(runtime, result)
                writer({"type": "task_failed", "task_id": task_id, "error": result.error})
                logger.error(f"[trace={trace_id}] Task {task_id} failed: {result.error}")
                cleanup_background_task(task_id)
                return f"Task failed. Error: {result.error}"
            elif result.status == SubagentStatus.CANCELLED:
-                _report_subagent_usage(runtime, result)
                writer({"type": "task_cancelled", "task_id": task_id, "error": result.error})
                logger.info(f"[trace={trace_id}] Task {task_id} cancelled: {result.error}")
                cleanup_background_task(task_id)
                return "Task cancelled by user."
            elif result.status == SubagentStatus.TIMED_OUT:
-                _report_subagent_usage(runtime, result)
                writer({"type": "task_timed_out", "task_id": task_id, "error": result.error})
                logger.warning(f"[trace={trace_id}] Task {task_id} timed out: {result.error}")
                cleanup_background_task(task_id)
@@ -350,28 +266,43 @@ async def task_tool(
            if poll_count > max_poll_count:
                timeout_minutes = config.timeout_seconds // 60
                logger.error(f"[trace={trace_id}] Task {task_id} polling timed out after {poll_count} polls (should have been caught by thread pool timeout)")
-                _report_subagent_usage(runtime, result)
                writer({"type": "task_timed_out", "task_id": task_id})
                return f"Task polling timed out after {timeout_minutes} minutes. This may indicate the background task is stuck. Status: {result.status.value}"
    except asyncio.CancelledError:
        # Signal the background subagent thread to stop cooperatively.
+        # Without this, the thread (running in ThreadPoolExecutor with its
+        # own event loop via asyncio.run) would continue executing even
+        # after the parent task is cancelled.
        request_cancel_background_task(task_id)

-        # Wait (shielded) for the subagent to reach a terminal state so the
-        # final token usage snapshot is reported to the parent RunJournal
-        # before the parent worker persists get_completion_data().
-        terminal_result = None
-        try:
-            terminal_result = await asyncio.shield(_await_subagent_terminal(task_id, max_poll_count))
-        except asyncio.CancelledError:
-            pass
+        async def cleanup_when_done() -> None:
+            max_cleanup_polls = max_poll_count
+            cleanup_poll_count = 0

-        # Report whatever the subagent collected (even if we timed out).
-        final_result = terminal_result or get_background_task_result(task_id)
-        if final_result is not None:
-            _report_subagent_usage(runtime, final_result)
-        if final_result is not None and _is_subagent_terminal(final_result):
-            cleanup_background_task(task_id)
-        else:
-            _schedule_deferred_subagent_cleanup(task_id, trace_id, max_poll_count)
+            while True:
+                result = get_background_task_result(task_id)
+                if result is None:
+                    return
+
+                if result.status in {SubagentStatus.COMPLETED, SubagentStatus.FAILED, SubagentStatus.CANCELLED, SubagentStatus.TIMED_OUT} or getattr(result, "completed_at", None) is not None:
+                    cleanup_background_task(task_id)
+                    return
+
+                if cleanup_poll_count > max_cleanup_polls:
+                    logger.warning(f"[trace={trace_id}] Deferred cleanup for task {task_id} timed out after {cleanup_poll_count} polls")
+                    return
+
+                await asyncio.sleep(5)
+                cleanup_poll_count += 1
+
+        def log_cleanup_failure(cleanup_task: asyncio.Task[None]) -> None:
+            if cleanup_task.cancelled():
+                return
+
+            exc = cleanup_task.exception()
+            if exc is not None:
+                logger.error(f"[trace={trace_id}] Deferred cleanup failed for task {task_id}: {exc}")
+
+        logger.debug(f"[trace={trace_id}] Scheduling deferred cleanup for cancelled task {task_id}")
+        asyncio.create_task(cleanup_when_done()).add_done_callback(log_cleanup_failure)
        raise
@@ -1,245 +0,0 @@
-"""update_agent tool — let a custom agent persist updates to its own SOUL.md / config.
-
-Bound to the lead agent only when ``runtime.context['agent_name']`` is set
-(i.e. inside an existing custom agent's chat). The default agent does not see
-this tool, and the bootstrap flow continues to use ``setup_agent`` for the
-initial creation handshake.
-
-The tool writes back to ``{base_dir}/users/{user_id}/agents/{agent_name}/{config.yaml,SOUL.md}``
-so an agent created by one user is never visible to (or mutable by) another.
-Writes are staged into temp files first; both files are renamed into place only
-after both temp files are successfully written, so a partial failure cannot leave
-config.yaml updated while SOUL.md still holds stale content.
-"""
-
-from __future__ import annotations
-
-import logging
-import tempfile
-from pathlib import Path
-from typing import Any
-
-import yaml
-from langchain_core.messages import ToolMessage
-from langchain_core.tools import tool
-from langgraph.types import Command
-
-from deerflow.config.agents_config import load_agent_config, validate_agent_name
-from deerflow.config.app_config import get_app_config
-from deerflow.config.paths import get_paths
-from deerflow.runtime.user_context import resolve_runtime_user_id
-from deerflow.tools.types import Runtime
-
-logger = logging.getLogger(__name__)
-
-
-def _stage_temp(path: Path, text: str) -> Path:
-    """Write ``text`` into a sibling temp file and return its path.
-
-    The caller is responsible for ``Path.replace``-ing the temp into the target
-    once every staged file is ready, or for unlinking it on failure.
-    """
-    path.parent.mkdir(parents=True, exist_ok=True)
-    fd = tempfile.NamedTemporaryFile(
-        mode="w",
-        dir=path.parent,
-        suffix=".tmp",
-        delete=False,
-        encoding="utf-8",
-    )
-    try:
-        fd.write(text)
-        fd.flush()
-        fd.close()
-        return Path(fd.name)
-    except BaseException:
-        fd.close()
-        Path(fd.name).unlink(missing_ok=True)
-        raise
-
-
-def _cleanup_temps(temps: list[Path]) -> None:
-    """Best-effort removal of staged temp files."""
-    for tmp in temps:
-        try:
-            tmp.unlink(missing_ok=True)
-        except OSError:
-            logger.debug("Failed to clean up temp file %s", tmp, exc_info=True)
-
-
-@tool(parse_docstring=True)
-def update_agent(
-    runtime: Runtime,
-    soul: str | None = None,
-    description: str | None = None,
-    skills: list[str] | None = None,
-    tool_groups: list[str] | None = None,
-    model: str | None = None,
-) -> Command:
-    """Persist updates to the current custom agent's SOUL.md and config.yaml.
-
-    Use this when the user asks to refine the agent's identity, description,
-    skill whitelist, tool-group whitelist, or default model. Only the fields
-    you explicitly pass are updated; omitted fields keep their existing values.
-
-    Pass ``soul`` as the FULL replacement SOUL.md content — there is no patch
-    semantics, so always start from the current SOUL and apply your edits.
-
-    Pass ``skills=[]`` to disable all skills for this agent. Omit ``skills``
-    entirely to keep the existing whitelist.
-
-    Args:
-        soul: Optional full replacement SOUL.md content.
-        description: Optional new one-line description.
-        skills: Optional skill whitelist. ``[]`` = no skills, omit = unchanged.
-        tool_groups: Optional tool-group whitelist. ``[]`` = empty, omit = unchanged.
-        model: Optional model override (must match a configured model name).
-
-    Returns:
-        Command with a ToolMessage describing the result. Changes take effect
-        on the next user turn (when the lead agent is rebuilt with the fresh
-        SOUL.md and config.yaml).
-    """
-    tool_call_id = runtime.tool_call_id
-    agent_name_raw: str | None = runtime.context.get("agent_name") if runtime.context else None
-
-    def _err(message: str) -> Command:
-        return Command(update={"messages": [ToolMessage(content=f"Error: {message}", tool_call_id=tool_call_id)]})
-
-    if soul is None and description is None and skills is None and tool_groups is None and model is None:
-        return _err("No fields provided. Pass at least one of: soul, description, skills, tool_groups, model.")
-
-    try:
-        agent_name = validate_agent_name(agent_name_raw)
-    except ValueError as e:
-        return _err(str(e))
-
-    if not agent_name:
-        return _err("update_agent is only available inside a custom agent's chat. There is no agent_name in the current runtime context, so there is nothing to update. If you are inside the bootstrap flow, use setup_agent instead.")
-
-    # Resolve the active user so that updates only affect this user's agent.
-    # ``resolve_runtime_user_id`` prefers ``runtime.context["user_id"]`` (set by
-    # the gateway from the auth-validated request) and falls back to the
-    # contextvar, then DEFAULT_USER_ID. This matches setup_agent so a user
-    # creating an agent and later refining it always touches the same files,
-    # even if the contextvar gets lost across an async/thread boundary
-    # (issue #2782 / #2862 class of bugs).
-    user_id = resolve_runtime_user_id(runtime)
-
-    # Reject an unknown ``model`` *before* touching the filesystem. Otherwise
-    # ``_resolve_model_name`` silently falls back to the default at runtime
-    # and the user sees confusing repeated warnings on every later turn.
-    if model is not None and get_app_config().get_model_config(model) is None:
-        return _err(f"Unknown model '{model}'. Pass a model name that exists in config.yaml's models section.")
-
-    paths = get_paths()
-    agent_dir = paths.user_agent_dir(user_id, agent_name)
-    if not agent_dir.exists() and paths.agent_dir(agent_name).exists():
-        return _err(f"Agent '{agent_name}' only exists in the legacy shared layout and is not scoped to a user. Run scripts/migrate_user_isolation.py to move legacy agents into the per-user layout before updating.")
-
-    try:
-        existing_cfg = load_agent_config(agent_name, user_id=user_id)
-    except FileNotFoundError:
-        return _err(f"Agent '{agent_name}' does not exist for the current user. Use setup_agent to create a new agent first.")
-    except ValueError as e:
-        return _err(f"Agent '{agent_name}' has an unreadable config: {e}")
-
-    if existing_cfg is None:
-        return _err(f"Agent '{agent_name}' could not be loaded.")
-
-    updated_fields: list[str] = []
-
-    # Force the on-disk ``name`` to match the directory we are writing into,
-    # even if ``existing_cfg.name`` had drifted (e.g. from manual yaml edits).
-    config_data: dict[str, Any] = {"name": agent_name}
-    new_description = description if description is not None else existing_cfg.description
-    config_data["description"] = new_description
-    if description is not None and description != existing_cfg.description:
-        updated_fields.append("description")
-
-    new_model = model if model is not None else existing_cfg.model
-    if new_model is not None:
-        config_data["model"] = new_model
-    if model is not None and model != existing_cfg.model:
-        updated_fields.append("model")
-
-    new_tool_groups = tool_groups if tool_groups is not None else existing_cfg.tool_groups
-    if new_tool_groups is not None:
-        config_data["tool_groups"] = new_tool_groups
-    if tool_groups is not None and tool_groups != existing_cfg.tool_groups:
-        updated_fields.append("tool_groups")
-
-    new_skills = skills if skills is not None else existing_cfg.skills
-    if new_skills is not None:
-        config_data["skills"] = new_skills
-    if skills is not None and skills != existing_cfg.skills:
-        updated_fields.append("skills")
-
-    config_changed = bool({"description", "model", "tool_groups", "skills"} & set(updated_fields))
-
-    # Stage every file we intend to rewrite into a temp sibling. Only after
-    # *all* temp files exist do we rename them into place — so a failure on
-    # SOUL.md cannot leave config.yaml already replaced.
-    pending: list[tuple[Path, Path]] = []
-    staged_temps: list[Path] = []
-
-    try:
-        agent_dir.mkdir(parents=True, exist_ok=True)
-
-        if config_changed:
-            yaml_text = yaml.dump(config_data, default_flow_style=False, allow_unicode=True, sort_keys=False)
-            config_target = agent_dir / "config.yaml"
-            config_tmp = _stage_temp(config_target, yaml_text)
-            staged_temps.append(config_tmp)
-            pending.append((config_tmp, config_target))
-
-        if soul is not None:
-            soul_target = agent_dir / "SOUL.md"
-            soul_tmp = _stage_temp(soul_target, soul)
-            staged_temps.append(soul_tmp)
-            pending.append((soul_tmp, soul_target))
-            updated_fields.append("soul")
-
-        # Commit phase. ``Path.replace`` is atomic per file on POSIX/NTFS and
-        # the staging step above means any earlier failure has already been
-        # reported. The remaining failure mode is a crash *between* two
-        # ``replace`` calls, which is reported via the partial-write error
-        # branch below so the caller knows which files are now on disk.
-        committed: list[Path] = []
-        try:
-            for tmp, target in pending:
-                tmp.replace(target)
-                committed.append(target)
-        except Exception as e:
-            _cleanup_temps([t for t, _ in pending if t not in committed])
-            if committed:
-                logger.error(
-                    "[update_agent] Partial write for agent '%s' (user=%s): committed=%s, failed during rename: %s",
-                    agent_name,
-                    user_id,
-                    [p.name for p in committed],
-                    e,
-                    exc_info=True,
-                )
-                return _err(f"Partial update for agent '{agent_name}': {[p.name for p in committed]} were updated, but the rest failed ({e}). Re-run update_agent to retry the remaining fields.")
-            raise
-
-    except Exception as e:
-        _cleanup_temps(staged_temps)
-        logger.error("[update_agent] Failed to update agent '%s' (user=%s): %s", agent_name, user_id, e, exc_info=True)
-        return _err(f"Failed to update agent '{agent_name}': {e}")
-
-    if not updated_fields:
-        return Command(update={"messages": [ToolMessage(content=f"No changes applied to agent '{agent_name}'. The provided values matched the existing config.", tool_call_id=tool_call_id)]})
-
-    logger.info("[update_agent] Updated agent '%s' (user=%s) fields: %s", agent_name, user_id, updated_fields)
-    return Command(
-        update={
-            "messages": [
-                ToolMessage(
-                    content=(f"Agent '{agent_name}' updated successfully. Changed: {', '.join(updated_fields)}. The new configuration takes effect on the next user turn."),
-                    tool_call_id=tool_call_id,
-                )
-            ]
-        }
-    )
@@ -3,13 +3,13 @@ import mimetypes
 from pathlib import Path
 from typing import Annotated

-from langchain.tools import InjectedToolCallId, tool
+from langchain.tools import InjectedToolCallId, ToolRuntime, tool
 from langchain_core.messages import ToolMessage
 from langgraph.types import Command
+from langgraph.typing import ContextT

-from deerflow.agents.thread_state import ThreadDataState
+from deerflow.agents.thread_state import ThreadDataState, ThreadState
 from deerflow.config.paths import VIRTUAL_PATH_PREFIX
-from deerflow.tools.types import Runtime

 _ALLOWED_IMAGE_VIRTUAL_ROOTS = (
    f"{VIRTUAL_PATH_PREFIX}/workspace",
@@ -48,7 +48,7 @@ def _sanitize_image_error(error: Exception, thread_data: ThreadDataState | None)

@tool("view_image", parse_docstring=True)
 def view_image_tool(
-    runtime: Runtime,
+    runtime: ToolRuntime[ContextT, ThreadState],
    image_path: str,
    tool_call_id: Annotated[str, InjectedToolCallId],
 ) -> Command:
@@ -7,15 +7,16 @@ import logging
 from typing import Any
 from weakref import WeakValueDictionary

-from langchain.tools import tool
+from langchain.tools import ToolRuntime, tool
+from langgraph.typing import ContextT

 from deerflow.agents.lead_agent.prompt import refresh_skills_system_prompt_cache_async
+from deerflow.agents.thread_state import ThreadState
+from deerflow.mcp.tools import _make_sync_tool_wrapper
 from deerflow.skills.security_scanner import scan_skill_content
 from deerflow.skills.storage import get_or_new_skill_storage
 from deerflow.skills.storage.skill_storage import SkillStorage
 from deerflow.skills.types import SKILL_MD_FILE
-from deerflow.tools.sync import make_sync_tool_wrapper
-from deerflow.tools.types import Runtime

 logger = logging.getLogger(__name__)

@@ -30,7 +31,7 @@ def _get_lock(name: str) -> asyncio.Lock:
    return lock


-def _get_thread_id(runtime: Runtime | None) -> str | None:
+def _get_thread_id(runtime: ToolRuntime[ContextT, ThreadState] | None) -> str | None:
    if runtime is None:
        return None
    if runtime.context and runtime.context.get("thread_id"):
@@ -64,7 +65,7 @@ async def _to_thread(func, /, *args, **kwargs):


 async def _skill_manage_impl(
-    runtime: Runtime,
+    runtime: ToolRuntime[ContextT, ThreadState],
    action: str,
    name: str,
    content: str | None = None,
@@ -203,7 +204,7 @@ async def _skill_manage_impl(

@tool("skill_manage", parse_docstring=True)
 async def skill_manage_tool(
-    runtime: Runtime,
+    runtime: ToolRuntime[ContextT, ThreadState],
    action: str,
    name: str,
    content: str | None = None,
@@ -235,4 +236,4 @@ async def skill_manage_tool(
    )


-skill_manage_tool.func = make_sync_tool_wrapper(_skill_manage_impl, "skill_manage")
+skill_manage_tool.func = _make_sync_tool_wrapper(_skill_manage_impl, "skill_manage")
@@ -1,36 +0,0 @@
-"""Utilities for invoking async tools from synchronous agent paths."""
-
-import asyncio
-import atexit
-import concurrent.futures
-import logging
-from collections.abc import Callable
-from typing import Any
-
-logger = logging.getLogger(__name__)
-
-# Shared thread pool for sync tool invocation in async environments.
-_SYNC_TOOL_EXECUTOR = concurrent.futures.ThreadPoolExecutor(max_workers=10, thread_name_prefix="tool-sync")
-
-atexit.register(lambda: _SYNC_TOOL_EXECUTOR.shutdown(wait=False))
-
-
-def make_sync_tool_wrapper(coro: Callable[..., Any], tool_name: str) -> Callable[..., Any]:
-    """Build a synchronous wrapper for an asynchronous tool coroutine."""
-
-    def sync_wrapper(*args: Any, **kwargs: Any) -> Any:
-        try:
-            loop = asyncio.get_running_loop()
-        except RuntimeError:
-            loop = None
-
-        try:
-            if loop is not None and loop.is_running():
-                future = _SYNC_TOOL_EXECUTOR.submit(asyncio.run, coro(*args, **kwargs))
-                return future.result()
-            return asyncio.run(coro(*args, **kwargs))
-        except Exception as e:
-            logger.error("Error invoking tool %r via sync wrapper: %s", tool_name, e, exc_info=True)
-            raise
-
-    return sync_wrapper
@@ -8,7 +8,6 @@ from deerflow.reflection import resolve_variable
 from deerflow.sandbox.security import is_host_bash_allowed
 from deerflow.tools.builtins import ask_clarification_tool, present_file_tool, task_tool, view_image_tool
 from deerflow.tools.builtins.tool_search import reset_deferred_registry
-from deerflow.tools.sync import make_sync_tool_wrapper

 logger = logging.getLogger(__name__)

@@ -34,13 +33,6 @@ def _is_host_bash_tool(tool: object) -> bool:
    return False


-def _ensure_sync_invocable_tool(tool: BaseTool) -> BaseTool:
-    """Attach a sync wrapper to async-only tools used by sync agent callers."""
-    if getattr(tool, "func", None) is None and getattr(tool, "coroutine", None) is not None:
-        tool.func = make_sync_tool_wrapper(tool.coroutine, tool.name)
-    return tool
-
-
 def get_available_tools(
    groups: list[str] | None = None,
    include_mcp: bool = True,
@@ -85,7 +77,7 @@ def get_available_tools(
                cfg.use,
            )

-    loaded_tools = [_ensure_sync_invocable_tool(t) for _, t in loaded_tools_raw]
+    loaded_tools = [t for _, t in loaded_tools_raw]

    # Conditionally add tools based on config
    builtin_tools = BUILTIN_TOOLS.copy()
@@ -1,11 +0,0 @@
-from typing import Any
-
-from langchain.tools import ToolRuntime
-
-from deerflow.agents.thread_state import ThreadState
-
-# Concrete runtime type used by all DeerFlow tools.
-# Using dict[str, Any] for the context parameter instead of the unbound ContextT
-# TypeVar prevents PydanticSerializationUnexpectedValue warnings when LangChain
-# calls model_dump() on a tool's auto-generated args_schema.
-Runtime = ToolRuntime[dict[str, Any], ThreadState]
@@ -4,10 +4,8 @@ Pure business logic — no FastAPI/HTTP dependencies.
 Both Gateway and Client delegate to these functions.
 """

-import errno
 import os
 import re
-import stat
 from pathlib import Path
 from urllib.parse import quote

@@ -19,10 +17,6 @@ class PathTraversalError(ValueError):
    """Raised when a path escapes its allowed base directory."""


-class UnsafeUploadPathError(ValueError):
-    """Raised when an upload destination is not a safe regular file path."""
-
-
 # thread_id must be alphanumeric, hyphens, underscores, or dots only.
 _SAFE_THREAD_ID = re.compile(r"^[a-zA-Z0-9._-]+$")

@@ -115,108 +109,6 @@ def validate_path_traversal(path: Path, base: Path) -> None:
        raise PathTraversalError("Path traversal detected") from None


-def open_upload_file_no_symlink(base_dir: Path, filename: str) -> tuple[Path, object]:
-    """Open an upload destination for safe streaming writes.
-
-    Upload directories may be mounted into local sandboxes. A sandbox process can
-    therefore leave a symlink at a future upload filename. Normal ``Path.write_bytes``
-    follows that link and can overwrite files outside the uploads directory with
-    gateway privileges. This helper rejects symlink destinations using ``O_NOFOLLOW``
-    on POSIX. On Windows (which lacks ``O_NOFOLLOW``), it uses dual ``lstat`` checks
-    and ``fstat`` validation after ``open()`` to reduce the TOCTOU window; this does
-    not eliminate all races but makes exploitation significantly harder. Path-traversal
-    validation prevents escapes from *base_dir* in both cases.
-    """
-    safe_name = normalize_filename(filename)
-    dest = base_dir / safe_name
-
-    try:
-        st = os.lstat(dest)
-    except FileNotFoundError:
-        st = None
-
-    if st is not None and not stat.S_ISREG(st.st_mode):
-        raise UnsafeUploadPathError(f"Upload destination is not a regular file: {safe_name}")
-
-    validate_path_traversal(dest, base_dir)
-
-    has_nofollow = hasattr(os, "O_NOFOLLOW")
-
-    if has_nofollow:
-        # POSIX: O_NOFOLLOW makes open() fail with ELOOP if dest is a symlink.
-        flags = os.O_WRONLY | os.O_CREAT | os.O_NOFOLLOW
-        if hasattr(os, "O_NONBLOCK"):
-            flags |= os.O_NONBLOCK
-
-        try:
-            fd = os.open(dest, flags, 0o600)
-        except OSError as exc:
-            if exc.errno in {errno.ELOOP, errno.EISDIR, errno.ENOTDIR, errno.ENXIO, errno.EAGAIN}:
-                raise UnsafeUploadPathError(f"Unsafe upload destination: {safe_name}") from exc
-            raise
-
-        try:
-            opened_stat = os.fstat(fd)
-            if not stat.S_ISREG(opened_stat.st_mode) or opened_stat.st_nlink != 1:
-                raise UnsafeUploadPathError(f"Upload destination is not an exclusive regular file: {safe_name}")
-            os.ftruncate(fd, 0)
-            fh = os.fdopen(fd, "wb")
-            fd = -1
-        finally:
-            if fd >= 0:
-                os.close(fd)
-        return dest, fh
-
-    # Windows: no O_NOFOLLOW available. Uses a second lstat immediately before open()
-    # to narrow the TOCTOU window, then fstat after open() as a further defence.
-    # Note: a narrow race window remains between the pre-open lstat and open(); the
-    # path-traversal check mitigates escapes from base_dir but cannot prevent an
-    # attacker who can atomically replace dest with a symlink after the check.
-    if st is not None and st.st_nlink > 1:
-        raise UnsafeUploadPathError(f"Upload destination has multiple links: {safe_name}")
-
-    flags = os.O_WRONLY | os.O_CREAT
-    if hasattr(os, "O_BINARY"):
-        flags |= os.O_BINARY
-
-    try:
-        pre_open_st = os.lstat(dest)
-    except FileNotFoundError:
-        pre_open_st = None
-
-    if pre_open_st is not None and not stat.S_ISREG(pre_open_st.st_mode):
-        raise UnsafeUploadPathError(f"Upload destination is not a regular file: {safe_name}")
-    if pre_open_st is not None and pre_open_st.st_nlink > 1:
-        raise UnsafeUploadPathError(f"Upload destination has multiple links: {safe_name}")
-
-    try:
-        fd = os.open(dest, flags, 0o600)
-    except OSError as exc:
-        if exc.errno in {errno.EISDIR, errno.ENOTDIR, errno.ENXIO, errno.EAGAIN}:
-            raise UnsafeUploadPathError(f"Unsafe upload destination: {safe_name}") from exc
-        raise
-
-    try:
-        opened_stat = os.fstat(fd)
-        if not stat.S_ISREG(opened_stat.st_mode) or opened_stat.st_nlink > 1:
-            raise UnsafeUploadPathError(f"Upload destination is not an exclusive regular file: {safe_name}")
-        os.ftruncate(fd, 0)
-        fh = os.fdopen(fd, "wb")
-        fd = -1
-    finally:
-        if fd >= 0:
-            os.close(fd)
-    return dest, fh
-
-
-def write_upload_file_no_symlink(base_dir: Path, filename: str, data: bytes) -> Path:
-    """Write upload bytes without following a pre-existing destination symlink."""
-    dest, fh = open_upload_file_no_symlink(base_dir, filename)
-    with fh:
-        fh.write(data)
-    return dest
-
-
 def list_files_in_dir(directory: Path) -> dict:
    """List files (not directories) in *directory*.

@@ -1,75 +0,0 @@
-"""ISO 8601 timestamp helpers for the Gateway and embedded runtime.
-
-DeerFlow stores and serializes thread/run timestamps as ISO 8601 UTC
-strings to match the LangGraph Platform schema (see
-``langgraph_sdk.schema.Thread``, where ``created_at`` / ``updated_at``
-are ``datetime`` and JSON-encode to ISO 8601). All timestamp generation
-should funnel through :func:`now_iso` so the wire format stays
-consistent across endpoints, the embedded ``RunManager``, and the
-checkpoint metadata written by the Gateway.
-
-:func:`coerce_iso` provides a forward-compatible read path for legacy
-records that historically stored ``str(time.time())`` floats.
-"""
-
-from __future__ import annotations
-
-import re
-from datetime import UTC, datetime
-
-__all__ = ["coerce_iso", "now_iso"]
-
-_UNIX_TIMESTAMP_PATTERN = re.compile(r"^\d{10}(?:\.\d+)?$")
-"""Matches the unix-timestamp string shape historically written by
-``str(time.time())`` (10-digit seconds with optional fractional part).
-The 10-digit anchor avoids accidentally rewriting ISO years like
-``"2026"`` and stays valid until the year 2286.
-"""
-
-
-def now_iso() -> str:
-    """Return the current UTC time as an ISO 8601 string.
-
-    Example: ``"2026-04-27T03:19:46.511479+00:00"``.
-    """
-    return datetime.now(UTC).isoformat()
-
-
-def coerce_iso(value: object) -> str:
-    """Best-effort coerce a stored timestamp to an ISO 8601 string.
-
-    Translates legacy unix-timestamp floats / strings written by older
-    DeerFlow versions into ISO without a one-shot migration. ISO strings
-    pass through unchanged; ``datetime`` instances are normalised to UTC
-    (tz-naive values are assumed to be UTC) and emitted via
-    ``isoformat()`` so the wire format always uses the ``T`` separator;
-    empty values become ``""``; unrecognised values are stringified as a
-    last resort.
-    """
-    if value is None or value == "":
-        return ""
-    if isinstance(value, bool):
-        # ``bool`` is a subclass of ``int`` — treat as garbage, not 0/1.
-        return str(value)
-    if isinstance(value, datetime):
-        # ``datetime`` must be handled before the ``int``/``float`` check;
-        # str(datetime) would produce ``"YYYY-MM-DD HH:MM:SS+00:00"``
-        # (space separator), which breaks strict ISO 8601 consumers.
-        if value.tzinfo is None:
-            value = value.replace(tzinfo=UTC)
-        else:
-            value = value.astimezone(UTC)
-        return value.isoformat()
-    if isinstance(value, (int, float)):
-        try:
-            return datetime.fromtimestamp(float(value), UTC).isoformat()
-        except (ValueError, OverflowError, OSError):
-            return str(value)
-    if isinstance(value, str):
-        if _UNIX_TIMESTAMP_PATTERN.match(value):
-            try:
-                return datetime.fromtimestamp(float(value), UTC).isoformat()
-            except (ValueError, OverflowError, OSError):
-                return value
-        return value
-    return str(value)
@@ -8,7 +8,7 @@ dependencies = [
    "deerflow-harness",
    "fastapi>=0.115.0",
    "httpx>=0.28.0",
-    "python-multipart>=0.0.27",
+    "python-multipart>=0.0.26",
    "sse-starlette>=2.1.0",
    "uvicorn[standard]>=0.34.0",
    "lark-oapi>=1.4.0",
@@ -47,3 +47,4 @@ members = ["packages/harness"]

 [tool.uv.sources]
 deerflow-harness = { workspace = true }
+
--- a/Show More
+++ b/Show More