fix(sandbox): cleanup dead containers and avoid lock-held liveness checks

Agent-Logs-Url: https://github.com/bytedance/deer-flow/sessions/96707445-0f8b-4901-8ef3-d8e5667f8a05 Co-authored-by: WillemJiang <219644+WillemJiang@users.noreply.github.com>
fix(sandbox): auto-restart crashed containers transparently (#2788 )
2026-05-11 00:09:09 +00:00 · 2026-05-10 22:53:58 +08:00 · 2026-05-10 22:28:29 +08:00 · 2026-05-10 22:00:57 +08:00 · 2026-05-10 15:10:44 +08:00 · 2026-05-09 23:40:46 +08:00
377 changed files with 24606 additions and 9040 deletions
@@ -1,3 +1,6 @@
+# Serper API Key (Google Search) - https://serper.dev
+SERPER_API_KEY=your-serper-api-key
+
 # TAVILY API Key
 TAVILY_API_KEY=your-tavily-api-key

@@ -40,3 +43,19 @@ INFOQUEST_API_KEY=your-infoquest-api-key
 #
 # WECOM_BOT_ID=your-wecom-bot-id
 # WECOM_BOT_SECRET=your-wecom-bot-secret
+# DINGTALK_CLIENT_ID=your-dingtalk-client-id
+# DINGTALK_CLIENT_SECRET=your-dingtalk-client-secret
+
+# Set to "false" to disable Swagger UI, ReDoc, and OpenAPI schema in production
+# GATEWAY_ENABLE_DOCS=false
+
+# ── Frontend SSR → Gateway wiring ─────────────────────────────────────────────
+# The Next.js server uses these to reach the Gateway during SSR (auth checks,
+# /api/* rewrites). They default to localhost values that match `make dev` and
+# `make start`, so most local users do not need to set them.
+#
+# Override only when the Gateway is not on localhost:8001 (e.g. when the
+# frontend and gateway run on different hosts, in containers with a service
+# alias, or behind a different port). docker-compose already sets these.
+# DEER_FLOW_INTERNAL_GATEWAY_BASE_URL=http://localhost:8001
+# DEER_FLOW_TRUSTED_ORIGINS=http://localhost:3000,http://localhost:2026
@@ -0,0 +1,101 @@
+name: Publish Containers
+
+on:
+  push:
+    tags:
+      - "v*"
+
+jobs:
+
+  backend-container:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+      attestations: write
+      id-token: write
+    env:
+      REGISTRY: ghcr.io
+      IMAGE_NAME: ${{ github.repository }}-backend
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+      - name: Log in to the Container registry
+        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 #v3.4.0
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 #v5.7.0
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=ref,event=tag
+            type=ref,event=branch
+            type=sha
+            type=raw,value=latest,enable={{is_default_branch}}
+      - name: Build and push Docker image
+        id: push
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 #v6.18.0
+        with:
+          context: .
+          file: backend/Dockerfile
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+      - name: Generate artifact attestation
+        uses: actions/attest-build-provenance@v2
+        with:
+          subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
+          subject-digest: ${{ steps.push.outputs.digest }}
+          push-to-registry: true
+
+  frontend-container:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+      attestations: write
+      id-token: write
+    env:
+      REGISTRY: ghcr.io
+      IMAGE_NAME: ${{ github.repository }}-frontend
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v6
+      - name: Log in to the Container registry
+        uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 #v3.4.0
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Extract metadata (tags, labels) for Docker
+        id: meta
+        uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 #v5.7.0
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          tags: |
+            type=ref,event=tag
+            type=ref,event=branch
+            type=sha
+            type=raw,value=latest,enable={{is_default_branch}}
+      - name: Build and push Docker image
+        id: push
+        uses: docker/build-push-action@263435318d21b8e681c14492fe198d362a7d2c83 #v6.18.0
+        with:
+          context: .
+          file: frontend/Dockerfile
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+      - name: Generate artifact attestation
+        uses: actions/attest-build-provenance@v2
+        with:
+          subject-name: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME}}
+          subject-digest: ${{ steps.push.outputs.digest }}
+          push-to-registry: true
@@ -251,7 +251,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for detailed Docker development guide.

 If you prefer running services locally:

-Prerequisite: complete the "Configuration" steps above first (`make setup`). `make dev` requires a valid `config.yaml` in the project root (can be overridden via `DEER_FLOW_CONFIG_PATH`). Run `make doctor` to verify your setup before starting.
+Prerequisite: complete the "Configuration" steps above first (`make setup`). `make dev` requires a valid `config.yaml` in the project root. Set `DEER_FLOW_PROJECT_ROOT` to define that root explicitly, or `DEER_FLOW_CONFIG_PATH` to point at a specific config file. Runtime state defaults to `.deer-flow` under the project root and can be moved with `DEER_FLOW_HOME`; skills default to `skills/` under the project root and can be moved with `DEER_FLOW_SKILLS_PATH`. Run `make doctor` to verify your setup before starting.
 On Windows, run the local development flow from Git Bash. Native `cmd.exe` and PowerShell shells are not supported for the bash-based service scripts, and WSL is not guaranteed because some scripts rely on Git for Windows utilities such as `cygpath`.

 1. **Check prerequisites**:
@@ -345,6 +345,7 @@ DeerFlow supports receiving tasks from messaging apps. Channels auto-start when
 | Feishu / Lark | WebSocket | Moderate |
 | WeChat | Tencent iLink (long-polling) | Moderate |
 | WeCom | WebSocket | Moderate |
+| DingTalk | Stream Push (WebSocket) | Moderate |

 **Configuration in `config.yaml`:**

@@ -414,6 +415,13 @@ channels:
          context:
            thinking_enabled: true
            subagent_enabled: true
+
+  dingtalk:
+    enabled: true
+    client_id: $DINGTALK_CLIENT_ID             # Client ID of your DingTalk application
+    client_secret: $DINGTALK_CLIENT_SECRET     # Client Secret of your DingTalk application
+    allowed_users: []                          # empty = allow all
+    card_template_id: ""                       # Optional: AI Card template ID for streaming typewriter effect
 ```

 Notes:
@@ -442,6 +450,10 @@ WECHAT_ILINK_BOT_ID=your_ilink_bot_id
 # WeCom
 WECOM_BOT_ID=your_bot_id
 WECOM_BOT_SECRET=your_bot_secret
+
+# DingTalk
+DINGTALK_CLIENT_ID=your_client_id
+DINGTALK_CLIENT_SECRET=your_client_secret
 ```

 **Telegram Setup**
@@ -480,6 +492,14 @@ WECOM_BOT_SECRET=your_bot_secret
 4. Make sure backend dependencies include `wecom-aibot-python-sdk`. The channel uses a WebSocket long connection and does not require a public callback URL.
 5. The current integration supports inbound text, image, and file messages. Final images/files generated by the agent are also sent back to the WeCom conversation.

+**DingTalk Setup**
+
+1. Create a DingTalk application in the [DingTalk Developer Console](https://open.dingtalk.com/) and enable **Robot** capability.
+2. Set the message receiving mode to **Stream Mode** in the robot configuration page.
+3. Copy the `Client ID` and `Client Secret`, set `DINGTALK_CLIENT_ID` and `DINGTALK_CLIENT_SECRET` in `.env`, and enable the channel in `config.yaml`.
+4. *(Optional)* To enable streaming AI Card replies (typewriter effect), create an **AI Card** template on the [DingTalk Card Platform](https://open.dingtalk.com/document/dingstart/typewriter-effect-streaming-ai-card), then set `card_template_id` in `config.yaml` to the template ID. You also need to apply for the `Card.Streaming.Write` and `Card.Instance.Write` permissions.
+
+
 When DeerFlow runs in Docker Compose, IM channels execute inside the `gateway` container. In that case, do not point `channels.langgraph_url` or `channels.gateway_url` at `localhost`; use container service names such as `http://gateway:8001/api` and `http://gateway:8001`, or set `DEER_FLOW_CHANNELS_LANGGRAPH_URL` and `DEER_FLOW_CHANNELS_GATEWAY_URL`.

 **Commands**
@@ -290,6 +290,7 @@ DeerFlow peut recevoir des tâches depuis des applications de messagerie. Les ca
 | Telegram | Bot API (long-polling) | Facile |
 | Slack | Socket Mode | Modérée |
 | Feishu / Lark | WebSocket | Modérée |
+| DingTalk | Stream Push (WebSocket) | Modérée |

 **Configuration dans `config.yaml` :**

@@ -341,6 +342,13 @@ channels:
          context:
            thinking_enabled: true
            subagent_enabled: true
+
+  dingtalk:
+    enabled: true
+    client_id: $DINGTALK_CLIENT_ID             # ClientId depuis DingTalk Open Platform
+    client_secret: $DINGTALK_CLIENT_SECRET     # ClientSecret depuis DingTalk Open Platform
+    allowed_users: []                          # vide = tout le monde autorisé
+    card_template_id: ""                       # Optionnel : ID de modèle AI Card pour l'effet machine à écrire en streaming
 ```

 Définissez les clés API correspondantes dans votre fichier `.env` :
@@ -356,6 +364,10 @@ SLACK_APP_TOKEN=xapp-...
 # Feishu / Lark
 FEISHU_APP_ID=cli_xxxx
 FEISHU_APP_SECRET=your_app_secret
+
+# DingTalk
+DINGTALK_CLIENT_ID=your_client_id
+DINGTALK_CLIENT_SECRET=your_client_secret
 ```

 **Configuration Telegram**
@@ -378,6 +390,13 @@ FEISHU_APP_SECRET=your_app_secret
 3. Dans **Events**, abonnez-vous à `im.message.receive_v1` et sélectionnez le mode **Long Connection**.
 4. Copiez l'App ID et l'App Secret. Définissez `FEISHU_APP_ID` et `FEISHU_APP_SECRET` dans `.env` et activez le canal dans `config.yaml`.

+**Configuration DingTalk**
+
+1. Créez une application sur [DingTalk Open Platform](https://open.dingtalk.com/) et activez la capacité **Robot**.
+2. Dans la page de configuration du robot, définissez le mode de réception des messages sur **Stream**.
+3. Copiez le `Client ID` et le `Client Secret`. Définissez `DINGTALK_CLIENT_ID` et `DINGTALK_CLIENT_SECRET` dans `.env` et activez le canal dans `config.yaml`.
+4. *(Optionnel)* Pour activer les réponses en streaming AI Card (effet machine à écrire), créez un modèle **AI Card** sur la [plateforme de cartes DingTalk](https://open.dingtalk.com/document/dingstart/typewriter-effect-streaming-ai-card), puis définissez `card_template_id` dans `config.yaml` avec l'ID du modèle. Vous devez également demander les permissions `Card.Streaming.Write` et `Card.Instance.Write`.
+
 **Commandes**

 Une fois un canal connecté, vous pouvez interagir avec DeerFlow directement depuis le chat :
@@ -243,6 +243,7 @@ DeerFlowはメッセージングアプリからのタスク受信をサポート
 | Telegram | Bot API（ロングポーリング） | 簡単 |
 | Slack | Socket Mode | 中程度 |
 | Feishu / Lark | WebSocket | 中程度 |
+| DingTalk | Stream Push（WebSocket） | 中程度 |

 **`config.yaml`での設定：**

@@ -294,6 +295,13 @@ channels:
          context:
            thinking_enabled: true
            subagent_enabled: true
+
+  dingtalk:
+    enabled: true
+    client_id: $DINGTALK_CLIENT_ID             # DingTalk Open PlatformのClientId
+    client_secret: $DINGTALK_CLIENT_SECRET     # DingTalk Open PlatformのClientSecret
+    allowed_users: []                          # 空 = 全員許可
+    card_template_id: ""                       # オプション：ストリーミングタイプライター効果用のAIカードテンプレートID
 ```

 対応するAPIキーを`.env`ファイルに設定します：
@@ -309,6 +317,10 @@ SLACK_APP_TOKEN=xapp-...
 # Feishu / Lark
 FEISHU_APP_ID=cli_xxxx
 FEISHU_APP_SECRET=your_app_secret
+
+# DingTalk
+DINGTALK_CLIENT_ID=your_client_id
+DINGTALK_CLIENT_SECRET=your_client_secret
 ```

 **Telegramのセットアップ**
@@ -331,6 +343,13 @@ FEISHU_APP_SECRET=your_app_secret
 3. **イベント**で`im.message.receive_v1`を購読し、**ロングコネクション**モードを選択。
 4. App IDとApp Secretをコピー。`.env`に`FEISHU_APP_ID`と`FEISHU_APP_SECRET`を設定し、`config.yaml`でチャネルを有効にします。

+**DingTalkのセットアップ**
+
+1. [DingTalk Open Platform](https://open.dingtalk.com/)でアプリを作成し、**ロボット**機能を有効化します。
+2. ロボット設定ページでメッセージ受信モードを**Streamモード**に設定します。
+3. `Client ID`と`Client Secret`をコピー。`.env`に`DINGTALK_CLIENT_ID`と`DINGTALK_CLIENT_SECRET`を設定し、`config.yaml`でチャネルを有効にします。
+4. *（オプション）* ストリーミングAIカード返信（タイプライター効果）を有効にするには、[DingTalkカードプラットフォーム](https://open.dingtalk.com/document/dingstart/typewriter-effect-streaming-ai-card)で**AIカード**テンプレートを作成し、`config.yaml`の`card_template_id`にテンプレートIDを設定します。`Card.Streaming.Write` および `Card.Instance.Write` 権限の申請も必要です。
+
 **コマンド**

 チャネル接続後、チャットから直接DeerFlowと対話できます：
@@ -256,6 +256,7 @@ DeerFlow принимает задачи прямо из мессенджеро
 | Telegram | Bot API (long-polling) | Просто |
 | Slack | Socket Mode | Средне |
 | Feishu / Lark | WebSocket | Средне |
+| DingTalk | Stream Push (WebSocket) | Средне |

 **Конфигурация в `config.yaml`:**

@@ -278,6 +279,13 @@ channels:
    enabled: true
    bot_token: $TELEGRAM_BOT_TOKEN
    allowed_users: []
+
+  dingtalk:
+    enabled: true
+    client_id: $DINGTALK_CLIENT_ID             # ClientId с DingTalk Open Platform
+    client_secret: $DINGTALK_CLIENT_SECRET     # ClientSecret с DingTalk Open Platform
+    allowed_users: []                          # пусто = разрешить всем
+    card_template_id: ""                       # Опционально: ID шаблона AI Card для потокового эффекта печатной машинки
 ```

 **Настройка Telegram**
@@ -285,6 +293,13 @@ channels:
 1. Напишите [@BotFather](https://t.me/BotFather), отправьте `/newbot` и скопируйте HTTP API-токен.
 2. Укажите `TELEGRAM_BOT_TOKEN` в `.env` и включите канал в `config.yaml`.

+**Настройка DingTalk**
+
+1. Создайте приложение на [DingTalk Open Platform](https://open.dingtalk.com/) и включите возможность **Робот**.
+2. На странице настроек робота установите режим приёма сообщений на **Stream**.
+3. Скопируйте `Client ID` и `Client Secret`. Укажите `DINGTALK_CLIENT_ID` и `DINGTALK_CLIENT_SECRET` в `.env` и включите канал в `config.yaml`.
+4. *(Опционально)* Для включения потоковых ответов AI Card (эффект печатной машинки) создайте шаблон **AI Card** на [платформе карточек DingTalk](https://open.dingtalk.com/document/dingstart/typewriter-effect-streaming-ai-card), затем укажите `card_template_id` в `config.yaml` с ID шаблона. Также необходимо запросить разрешения `Card.Streaming.Write` и `Card.Instance.Write`.
+
 **Доступные команды**

 | Команда | Описание |
@@ -194,7 +194,7 @@ make down   # 停止并移除容器

 如果你更希望直接在本地启动各个服务：

-前提：先完成上面的“配置”步骤（`make config` 和模型 API key 配置）。`make dev` 需要有效配置文件，默认读取项目根目录下的 `config.yaml`，也可以通过 `DEER_FLOW_CONFIG_PATH` 覆盖。
+前提：先完成上面的“配置”步骤（`make config` 和模型 API key 配置）。`make dev` 需要有效配置文件，默认读取项目根目录下的 `config.yaml`。可以用 `DEER_FLOW_PROJECT_ROOT` 显式指定项目根目录，也可以用 `DEER_FLOW_CONFIG_PATH` 指向某个具体配置文件。运行期状态默认写到项目根目录下的 `.deer-flow`，可用 `DEER_FLOW_HOME` 覆盖；skills 默认读取项目根目录下的 `skills/`，可用 `DEER_FLOW_SKILLS_PATH` 覆盖。
 在 Windows 上，请使用 Git Bash 运行本地开发流程。基于 bash 的服务脚本不支持直接在原生 `cmd.exe` 或 PowerShell 中执行，且 WSL 也不保证可用，因为部分脚本依赖 Git for Windows 的 `cygpath` 等工具。

 1. **检查依赖环境**：
@@ -248,6 +248,7 @@ DeerFlow 支持从即时通讯应用接收任务。只要配置完成，对应
 | Slack | Socket Mode | 中等 |
 | Feishu / Lark | WebSocket | 中等 |
 | 企业微信智能机器人 | WebSocket | 中等 |
+| 钉钉 | Stream Push（WebSocket） | 中等 |

 **`config.yaml` 中的配置示例：**

@@ -304,6 +305,13 @@ channels:
          context:
            thinking_enabled: true
            subagent_enabled: true
+
+  dingtalk:
+    enabled: true
+    client_id: $DINGTALK_CLIENT_ID             # 钉钉开放平台 ClientId
+    client_secret: $DINGTALK_CLIENT_SECRET     # 钉钉开放平台 ClientSecret
+    allowed_users: []                          # 留空表示允许所有人
+    card_template_id: ""                       # 可选：AI 卡片模板 ID，用于流式打字机效果
 ```

 说明：
@@ -327,6 +335,10 @@ FEISHU_APP_SECRET=your_app_secret
 # 企业微信智能机器人
 WECOM_BOT_ID=your_bot_id
 WECOM_BOT_SECRET=your_bot_secret
+
+# 钉钉
+DINGTALK_CLIENT_ID=your_client_id
+DINGTALK_CLIENT_SECRET=your_client_secret
 ```

 **Telegram 配置**
@@ -357,6 +369,13 @@ WECOM_BOT_SECRET=your_bot_secret
 4. 安装后端依赖时确保包含 `wecom-aibot-python-sdk`，渠道会通过 WebSocket 长连接接收消息，无需公网回调地址。
 5. 当前支持文本、图片和文件入站消息；agent 生成的最终图片/文件也会回传到企业微信会话中。

+**钉钉配置**
+
+1. 在 [钉钉开放平台](https://open.dingtalk.com/) 创建应用，并启用 **机器人** 能力。
+2. 在机器人配置页面设置消息接收模式为 **Stream模式**。
+3. 复制 `Client ID` 和 `Client Secret`，在 `.env` 中设置 `DINGTALK_CLIENT_ID` 和 `DINGTALK_CLIENT_SECRET`，并在 `config.yaml` 中启用该渠道。
+4. *（可选）* 如需开启流式 AI 卡片回复（打字机效果），请在[钉钉卡片平台](https://open.dingtalk.com/document/dingstart/typewriter-effect-streaming-ai-card)创建 **AI 卡片**模板，然后在 `config.yaml` 中将 `card_template_id` 设为该模板 ID。同时需要申请 `Card.Streaming.Write` 和 `Card.Instance.Write` 权限。
+
 **命令**

 渠道连接完成后，你可以直接在聊天窗口里和 DeerFlow 交互：
@@ -112,7 +112,7 @@ CI runs these regression tests for every pull request via [.github/workflows/bac
 The backend is split into two layers with a strict dependency direction:

 - **Harness** (`packages/harness/deerflow/`): Publishable agent framework package (`deerflow-harness`). Import prefix: `deerflow.*`. Contains agent orchestration, tools, sandbox, models, MCP, skills, config — everything needed to build and run agents.
- **App** (`app/`): Unpublished application code. Import prefix: `app.*`. Contains the FastAPI Gateway API and IM channel integrations (Feishu, Slack, Telegram).
+- **App** (`app/`): Unpublished application code. Import prefix: `app.*`. Contains the FastAPI Gateway API and IM channel integrations (Feishu, Slack, Telegram, DingTalk).

 **Dependency rule**: App imports deerflow, but deerflow never imports app. This boundary is enforced by `tests/test_harness_boundary.py` which runs in CI.

@@ -127,7 +127,7 @@ from app.gateway.app import app
 from app.channels.service import start_channel_service

 # App → Harness (allowed)
-from deerflow.config.app_config import AppConfig
+from deerflow.config import get_app_config

 # Harness → App (FORBIDDEN — enforced by test_harness_boundary.py)
 # from app.gateway.routers.uploads import ...  # ← will fail CI
@@ -182,16 +182,7 @@ Setup: Copy `config.example.yaml` to `config.yaml` in the **project root** direc

 **Config Versioning**: `config.example.yaml` has a `config_version` field. On startup, `AppConfig.from_file()` compares user version vs example version and emits a warning if outdated. Missing `config_version` = version 0. Run `make config-upgrade` to auto-merge missing fields. When changing the config schema, bump `config_version` in `config.example.yaml`.

-**Config Lifecycle**: All config models are `frozen=True` (immutable after construction). `AppConfig.from_file()` is a pure function — no side effects, no process-global state. The resolved `AppConfig` is passed as an explicit parameter down every consumer lane:
-
- **Gateway**: `app.state.config` populated in lifespan; routers receive it via `Depends(get_config)` from `app/gateway/deps.py`.
- **Client**: `DeerFlowClient._app_config` captured in the constructor; every method reads `self._app_config`.
- **Agent run**: wrapped in `DeerFlowContext(app_config=…)` and injected via LangGraph `Runtime[DeerFlowContext].context`. Middleware and tools read `runtime.context.app_config` directly or via `resolve_context(runtime)`.
- **LangGraph Server bootstrap**: `make_lead_agent` (registered in `langgraph.json`) calls `AppConfig.from_file()` itself — the only place in production that loads from disk at agent-build time.
-
-To update config at runtime (Gateway API mutations for MCP/Skills), write the new file and call `AppConfig.from_file()` to build a fresh snapshot, then swap `app.state.config`. No mtime detection, no auto-reload, no ambient ContextVar lookup (`AppConfig.current()` has been removed).
-
-**DeerFlowContext**: Per-invocation typed context for the agent execution path, injected via LangGraph `Runtime[DeerFlowContext]`. Holds `app_config: AppConfig`, `thread_id: str`, `agent_name: str | None`. Gateway runtime and `DeerFlowClient` construct full `DeerFlowContext` at invoke time; the LangGraph Server boundary builds one inside `make_lead_agent`. Middleware and tools access context through `resolve_context(runtime)` which returns the typed `DeerFlowContext` — legacy dict/None shapes are rejected. Mutable runtime state (`sandbox_id`) flows through `ThreadState.sandbox`, not context.
+**Config Caching**: `get_app_config()` caches the parsed config, but automatically reloads it when the resolved config path changes or the file's mtime increases. This keeps Gateway and LangGraph reads aligned with `config.yaml` edits without requiring a manual process restart.

 Configuration priority:
 1. Explicit `config_path` argument
@@ -214,7 +205,7 @@ Configuration priority:

 ### Gateway API (`app/gateway/`)

-FastAPI application on port 8001 with health check at `GET /health`.
+FastAPI application on port 8001 with health check at `GET /health`. Set `GATEWAY_ENABLE_DOCS=false` to disable `/docs`, `/redoc`, and `/openapi.json` in production (default: enabled).

 **Routers**:

@@ -272,8 +263,10 @@ Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` →
   - `present_files` - Make output files visible to user (only `/mnt/user-data/outputs`)
   - `ask_clarification` - Request clarification (intercepted by ClarificationMiddleware → interrupts)
   - `view_image` - Read image as base64 (added only if model supports vision)
+   - `setup_agent` - Bootstrap-only: persist a brand-new custom agent's `SOUL.md` and `config.yaml`. Bound only when `is_bootstrap=True`.
+   - `update_agent` - Custom-agent-only: persist self-updates to the current agent's `SOUL.md` / `config.yaml` from inside a normal chat (partial update + atomic write). Bound when `agent_name` is set and `is_bootstrap=False`.
 4. **Subagent tool** (if enabled):
-   - `task` - Delegate to subagent (description, prompt, subagent_type, max_turns)
+   - `task` - Delegate to subagent (description, prompt, subagent_type)

 **Community tools** (`packages/harness/deerflow/community/`):
 - `tavily/` - Web search (5 results default) and web fetch (4KB limit)
@@ -321,7 +314,8 @@ Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` →

 ### IM Channels System (`app/channels/`)

-Bridges external messaging platforms (Feishu, Slack, Telegram) to the DeerFlow agent via Gateway's LangGraph-compatible API.
+Bridges external messaging platforms (Feishu, Slack, Telegram, DingTalk) to the DeerFlow agent via the LangGraph Server.
+

 **Architecture**: Channels communicate with Gateway through the `langgraph-sdk` HTTP client (same as the frontend), ensuring threads are created and managed server-side. The internal SDK client injects process-local internal auth plus a matching CSRF cookie/header pair so Gateway accepts state-changing thread/run requests from channel workers without relying on browser session cookies.

@@ -331,7 +325,7 @@ Bridges external messaging platforms (Feishu, Slack, Telegram) to the DeerFlow a
 - `manager.py` - Core dispatcher: creates threads via `client.threads.create()`, routes commands, keeps Slack/Telegram on `client.runs.wait()`, and uses `client.runs.stream(["messages-tuple", "values"])` for Feishu incremental outbound updates
 - `base.py` - Abstract `Channel` base class (start/stop/send lifecycle)
 - `service.py` - Manages lifecycle of all configured channels from `config.yaml`
- `slack.py` / `feishu.py` / `telegram.py` - Platform-specific implementations (`feishu.py` tracks the running card `message_id` in memory and patches the same card in place)
+- `slack.py` / `feishu.py` / `telegram.py` / `dingtalk.py` - Platform-specific implementations (`feishu.py` tracks the running card `message_id` in memory and patches the same card in place; `dingtalk.py` optionally uses AI Card streaming for in-place updates when `card_template_id` is configured)

 **Message Flow**:
 1. External platform -> Channel impl -> `MessageBus.publish_inbound()`
@@ -340,14 +334,16 @@ Bridges external messaging platforms (Feishu, Slack, Telegram) to the DeerFlow a
 4. Feishu chat: `runs.stream()` → accumulate AI text → publish multiple outbound updates (`is_final=False`) → publish final outbound (`is_final=True`)
 5. Slack/Telegram chat: `runs.wait()` → extract final response → publish outbound
 6. Feishu channel sends one running reply card up front, then patches the same card for each outbound update (card JSON sets `config.update_multi=true` for Feishu's patch API requirement)
-7. For commands (`/new`, `/status`, `/models`, `/memory`, `/help`): handle locally or query Gateway API
-8. Outbound → channel callbacks → platform reply
+7. DingTalk AI Card mode (when `card_template_id` configured): `runs.stream()` → create card with initial text → stream updates via `PUT /v1.0/card/streaming` → finalize on `is_final=True`. Falls back to `sampleMarkdown` if card creation or streaming fails
+8. For commands (`/new`, `/status`, `/models`, `/memory`, `/help`): handle locally or query Gateway API
+9. Outbound → channel callbacks → platform reply

 **Configuration** (`config.yaml` -> `channels`):
 - `langgraph_url` - LangGraph-compatible Gateway API base URL (default: `http://localhost:8001/api`)
 - `gateway_url` - Gateway API URL for auxiliary commands (default: `http://localhost:8001`)
 - In Docker Compose, IM channels run inside the `gateway` container, so `localhost` points back to that container. Use `http://gateway:8001/api` for `langgraph_url` and `http://gateway:8001` for `gateway_url`, or set `DEER_FLOW_CHANNELS_LANGGRAPH_URL` / `DEER_FLOW_CHANNELS_GATEWAY_URL`.
- Per-channel configs: `feishu` (app_id, app_secret), `slack` (bot_token, app_token), `telegram` (bot_token)
+- Per-channel configs: `feishu` (app_id, app_secret), `slack` (bot_token, app_token), `telegram` (bot_token), `dingtalk` (client_id, client_secret, optional `card_template_id` for AI Card streaming)
+

 ### Memory System (`packages/harness/deerflow/agents/memory/`)

@@ -360,10 +356,11 @@ Bridges external messaging platforms (Feishu, Slack, Telegram) to the DeerFlow a
 **Per-User Isolation**:
 - Memory is stored per-user at `{base_dir}/users/{user_id}/memory.json`
 - Per-agent per-user memory at `{base_dir}/users/{user_id}/agents/{agent_name}/memory.json`
+- Custom agent definitions (`SOUL.md` + `config.yaml`) are also per-user at `{base_dir}/users/{user_id}/agents/{agent_name}/`. The legacy shared layout `{base_dir}/agents/{agent_name}/` remains read-only fallback for unmigrated installations
 - `user_id` is resolved via `get_effective_user_id()` from `deerflow.runtime.user_context`
 - In no-auth mode, `user_id` defaults to `"default"` (constant `DEFAULT_USER_ID`)
 - Absolute `storage_path` in config opts out of per-user isolation
- **Migration**: Run `PYTHONPATH=. python scripts/migrate_user_isolation.py` to move legacy `memory.json` and `threads/` into per-user layout; supports `--dry-run`
+- **Migration**: Run `PYTHONPATH=. python scripts/migrate_user_isolation.py` to move legacy `memory.json`, `threads/`, and `agents/` into per-user layout. Supports `--dry-run` (preview changes) and `--user-id USER_ID` (assign unowned legacy data to a user, defaults to `default`).

 **Data Structure** (stored in `{base_dir}/users/{user_id}/memory.json`):
 - **User Context**: `workContext`, `personalContext`, `topOfMind` (1-3 sentence summaries)
@@ -523,6 +520,7 @@ Multi-file upload with automatic document conversion:
 - Rejects directory inputs before copying so uploads stay all-or-nothing
 - Reuses one conversion worker per request when called from an active event loop
 - Files stored in thread-isolated directories
+- Duplicate filenames in a single upload request are auto-renamed with `_N` suffixes so later files do not truncate earlier files
 - Agent receives uploaded file list via `UploadsMiddleware`

 See [docs/FILE_UPLOAD.md](docs/FILE_UPLOAD.md) for details.
@@ -50,6 +50,12 @@ COPY backend ./backend
 RUN --mount=type=cache,target=/root/.cache/uv \
    sh -c "cd backend && UV_INDEX_URL=${UV_INDEX_URL:-https://pypi.org/simple} uv sync ${UV_EXTRAS:+--extra $UV_EXTRAS}"

+# UTF-8 locale prevents UnicodeEncodeError on Chinese/emoji content in minimal
+# containers where locale configuration may be missing and the default encoding is not UTF-8.
+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+ENV PYTHONIOENCODING=utf-8
+
 # ── Stage 2: Dev ──────────────────────────────────────────────────────────────
 # Retains compiler toolchain from builder so startup-time `uv sync` can build
 # source distributions in development containers.
@@ -66,6 +72,10 @@ CMD ["sh", "-c", "cd backend && PYTHONPATH=. uv run uvicorn app.gateway.app:app
 # Clean image without build-essential — reduces size (~200 MB) and attack surface.
 FROM python:3.12-slim-bookworm

+ENV LANG=C.UTF-8
+ENV LC_ALL=C.UTF-8
+ENV PYTHONIOENCODING=utf-8
+
 # Copy Node.js runtime from builder (provides npx for MCP servers)
 COPY --from=builder /usr/bin/node /usr/bin/node
 COPY --from=builder /usr/lib/node_modules /usr/lib/node_modules
@@ -124,7 +124,7 @@ FastAPI application providing REST endpoints for frontend integration:
 | `POST /api/memory/reload` | Force memory reload |
 | `GET /api/memory/config` | Memory configuration |
 | `GET /api/memory/status` | Combined config + data |
-| `POST /api/threads/{id}/uploads` | Upload files (auto-converts PDF/PPT/Excel/Word to Markdown, rejects directory paths) |
+| `POST /api/threads/{id}/uploads` | Upload files (auto-converts PDF/PPT/Excel/Word to Markdown, rejects directory paths, auto-renames duplicate filenames in one request) |
 | `GET /api/threads/{id}/uploads/list` | List uploaded files |
 | `DELETE /api/threads/{id}` | Delete DeerFlow-managed local thread data after LangGraph thread deletion; unexpected failures are logged server-side and return a generic 500 detail |
 | `GET /api/threads/{id}/artifacts/{path}` | Serve generated artifacts |
@@ -31,6 +31,10 @@ class Channel(ABC):
    def is_running(self) -> bool:
        return self._running

+    @property
+    def supports_streaming(self) -> bool:
+        return False
+
    # -- lifecycle ---------------------------------------------------------

    @abstractmethod
@@ -0,0 +1,740 @@
+"""DingTalk channel implementation."""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import re
+import threading
+import time
+from pathlib import Path
+from typing import Any
+
+import httpx
+
+from app.channels.base import Channel
+from app.channels.commands import KNOWN_CHANNEL_COMMANDS
+from app.channels.message_bus import InboundMessage, InboundMessageType, MessageBus, OutboundMessage, ResolvedAttachment
+
+logger = logging.getLogger(__name__)
+
+DINGTALK_API_BASE = "https://api.dingtalk.com"
+
+_TOKEN_REFRESH_MARGIN_SECONDS = 300
+
+_CONVERSATION_TYPE_P2P = "1"
+_CONVERSATION_TYPE_GROUP = "2"
+
+_MAX_UPLOAD_SIZE_BYTES = 20 * 1024 * 1024
+
+
+def _normalize_conversation_type(raw: Any) -> str:
+    """Normalize ``conversationType`` to ``"1"`` (P2P) or ``"2"`` (group).
+
+    Stream payloads may send int or string values.
+    """
+    if raw is None:
+        return _CONVERSATION_TYPE_P2P
+    s = str(raw).strip()
+    if s == _CONVERSATION_TYPE_GROUP:
+        return _CONVERSATION_TYPE_GROUP
+    return _CONVERSATION_TYPE_P2P
+
+
+def _normalize_allowed_users(allowed_users: Any) -> set[str]:
+    if allowed_users is None:
+        return set()
+    if isinstance(allowed_users, str):
+        values = [allowed_users]
+    elif isinstance(allowed_users, (list, tuple, set)):
+        values = allowed_users
+    else:
+        logger.warning(
+            "DingTalk allowed_users should be a list of user IDs; treating %s as one string value",
+            type(allowed_users).__name__,
+        )
+        values = [allowed_users]
+    return {str(uid) for uid in values if str(uid)}
+
+
+def _is_dingtalk_command(text: str) -> bool:
+    if not text.startswith("/"):
+        return False
+    return text.split(maxsplit=1)[0].lower() in KNOWN_CHANNEL_COMMANDS
+
+
+def _extract_text_from_rich_text(rich_text_list: list) -> str:
+    parts: list[str] = []
+    for item in rich_text_list:
+        if isinstance(item, dict) and "text" in item:
+            parts.append(item["text"])
+    return " ".join(parts)
+
+
+_FENCED_CODE_BLOCK_RE = re.compile(r"```(\w*)\n(.*?)```", re.DOTALL)
+_INLINE_CODE_RE = re.compile(r"`([^`\n]+)`")
+_HORIZONTAL_RULE_RE = re.compile(r"^-{3,}$", re.MULTILINE)
+_TABLE_SEPARATOR_RE = re.compile(r"^\|[-:| ]+\|$", re.MULTILINE)
+
+
+def _convert_markdown_table(text: str) -> str:
+    # DingTalk sampleMarkdown does not render pipe-delimited tables.
+    lines = text.split("\n")
+    result: list[str] = []
+    i = 0
+    while i < len(lines):
+        line = lines[i]
+        # Detect table: header row followed by separator row
+        if i + 1 < len(lines) and line.strip().startswith("|") and _TABLE_SEPARATOR_RE.match(lines[i + 1].strip()):
+            headers = [h.strip() for h in line.strip().strip("|").split("|")]
+            i += 2  # skip header + separator
+            while i < len(lines) and lines[i].strip().startswith("|"):
+                cells = [c.strip() for c in lines[i].strip().strip("|").split("|")]
+                for h, c in zip(headers, cells):
+                    result.append(f"> **{h}**: {c}")
+                result.append("")
+                i += 1
+        else:
+            result.append(line)
+            i += 1
+    return "\n".join(result)
+
+
+def _adapt_markdown_for_dingtalk(text: str) -> str:
+    """Adapt markdown for DingTalk's limited sampleMarkdown renderer."""
+
+    def _code_block_to_quote(match: re.Match) -> str:
+        lang = match.group(1)
+        code = match.group(2).rstrip("\n")
+        prefix = f"> **{lang}**\n" if lang else ""
+        quoted_lines = "\n".join(f"> {line}" for line in code.split("\n"))
+        return f"{prefix}{quoted_lines}\n"
+
+    text = _FENCED_CODE_BLOCK_RE.sub(_code_block_to_quote, text)
+    text = _INLINE_CODE_RE.sub(r"**\1**", text)
+    text = _convert_markdown_table(text)
+    text = _HORIZONTAL_RULE_RE.sub("───────────", text)
+    return text
+
+
+class DingTalkChannel(Channel):
+    """DingTalk IM channel using Stream Push (WebSocket, no public IP needed)."""
+
+    def __init__(self, bus: MessageBus, config: dict[str, Any]) -> None:
+        super().__init__(name="dingtalk", bus=bus, config=config)
+        self._thread: threading.Thread | None = None
+        self._main_loop: asyncio.AbstractEventLoop | None = None
+        self._client_id: str = ""
+        self._client_secret: str = ""
+        self._allowed_users: set[str] = _normalize_allowed_users(config.get("allowed_users"))
+        self._cached_token: str = ""
+        self._token_expires_at: float = 0.0
+        self._token_lock = asyncio.Lock()
+        self._card_template_id: str = config.get("card_template_id", "")
+        self._card_track_ids: dict[str, str] = {}
+        self._dingtalk_client: Any = None
+        self._stream_client: Any = None
+        self._incoming_messages: dict[str, Any] = {}
+        self._incoming_messages_lock = threading.Lock()
+        self._card_repliers: dict[str, Any] = {}
+
+    @property
+    def supports_streaming(self) -> bool:
+        return bool(self._card_template_id)
+
+    async def start(self) -> None:
+        if self._running:
+            return
+
+        try:
+            import dingtalk_stream  # noqa: F401
+        except ImportError:
+            logger.error("dingtalk-stream is not installed. Install it with: uv add dingtalk-stream")
+            return
+
+        client_id = self.config.get("client_id", "")
+        client_secret = self.config.get("client_secret", "")
+
+        if not client_id or not client_secret:
+            logger.error("DingTalk channel requires client_id and client_secret")
+            return
+
+        self._client_id = client_id
+        self._client_secret = client_secret
+        self._main_loop = asyncio.get_running_loop()
+
+        if self._card_template_id:
+            logger.info("[DingTalk] AI Card mode enabled (template=%s)", self._card_template_id)
+
+        self._running = True
+        self.bus.subscribe_outbound(self._on_outbound)
+
+        self._thread = threading.Thread(
+            target=self._run_stream,
+            args=(client_id, client_secret),
+            daemon=True,
+        )
+        self._thread.start()
+        logger.info("DingTalk channel started")
+
+    async def stop(self) -> None:
+        self._running = False
+        self.bus.unsubscribe_outbound(self._on_outbound)
+
+        stream_client = self._stream_client
+        if stream_client is not None:
+            try:
+                if hasattr(stream_client, "disconnect"):
+                    stream_client.disconnect()
+            except Exception:
+                logger.debug("[DingTalk] error disconnecting stream client", exc_info=True)
+
+        self._dingtalk_client = None
+        self._stream_client = None
+        with self._incoming_messages_lock:
+            self._incoming_messages.clear()
+        self._card_repliers.clear()
+        self._card_track_ids.clear()
+        if self._thread:
+            self._thread.join(timeout=5)
+            self._thread = None
+        logger.info("DingTalk channel stopped")
+
+    def _resolve_routing(self, msg: OutboundMessage) -> tuple[str, str, str]:
+        """Return (conversation_type, sender_staff_id, conversation_id).
+
+        Uses msg.chat_id as the primary routing key; metadata as fallback.
+        """
+        conversation_type = _normalize_conversation_type(msg.metadata.get("conversation_type"))
+        sender_staff_id = msg.metadata.get("sender_staff_id", "")
+        conversation_id = msg.metadata.get("conversation_id", "")
+        if conversation_type == _CONVERSATION_TYPE_GROUP:
+            conversation_id = msg.chat_id or conversation_id
+        else:
+            sender_staff_id = msg.chat_id or sender_staff_id
+        return conversation_type, sender_staff_id, conversation_id
+
+    async def send(self, msg: OutboundMessage, *, _max_retries: int = 3) -> None:
+        conversation_type, sender_staff_id, conversation_id = self._resolve_routing(msg)
+        robot_code = self._client_id
+
+        # Card mode: stream update to existing AI card
+        source_key = self._make_card_source_key_from_outbound(msg)
+        out_track_id = self._card_track_ids.get(source_key)
+
+        # ``card_template_id`` enables ``runs.stream`` (non-final + final outbounds).
+        # If card creation failed, skip non-final chunks to avoid duplicate messages.
+        if self._card_template_id and not out_track_id and not msg.is_final:
+            return
+
+        if out_track_id:
+            try:
+                await self._stream_update_card(
+                    out_track_id,
+                    msg.text,
+                    is_finalize=msg.is_final,
+                )
+            except Exception:
+                logger.warning("[DingTalk] card stream failed, falling back to sampleMarkdown")
+                if msg.is_final:
+                    self._card_track_ids.pop(source_key, None)
+                    self._card_repliers.pop(out_track_id, None)
+                    await self._send_markdown_fallback(robot_code, conversation_type, sender_staff_id, conversation_id, msg.text)
+                    return
+            if msg.is_final:
+                self._card_track_ids.pop(source_key, None)
+                self._card_repliers.pop(out_track_id, None)
+            return
+
+        # Non-card mode: send sampleMarkdown with retry
+        last_exc: Exception | None = None
+        for attempt in range(_max_retries):
+            try:
+                if conversation_type == _CONVERSATION_TYPE_GROUP:
+                    await self._send_group_message(robot_code, conversation_id, msg.text, at_user_ids=[sender_staff_id] if sender_staff_id else None)
+                else:
+                    await self._send_p2p_message(robot_code, sender_staff_id, msg.text)
+                return
+            except Exception as exc:
+                last_exc = exc
+                if attempt < _max_retries - 1:
+                    delay = 2**attempt
+                    logger.warning(
+                        "[DingTalk] send failed (attempt %d/%d), retrying in %ds: %s",
+                        attempt + 1,
+                        _max_retries,
+                        delay,
+                        exc,
+                    )
+                    await asyncio.sleep(delay)
+
+        logger.error("[DingTalk] send failed after %d attempts: %s", _max_retries, last_exc)
+        if last_exc is None:
+            raise RuntimeError("DingTalk send failed without an exception from any attempt")
+        raise last_exc
+
+    async def _send_markdown_fallback(
+        self,
+        robot_code: str,
+        conversation_type: str,
+        sender_staff_id: str,
+        conversation_id: str,
+        text: str,
+    ) -> None:
+        try:
+            if conversation_type == _CONVERSATION_TYPE_GROUP:
+                await self._send_group_message(robot_code, conversation_id, text)
+            else:
+                await self._send_p2p_message(robot_code, sender_staff_id, text)
+        except Exception:
+            logger.exception("[DingTalk] markdown fallback also failed")
+            raise
+
+    async def send_file(self, msg: OutboundMessage, attachment: ResolvedAttachment) -> bool:
+        if attachment.size > _MAX_UPLOAD_SIZE_BYTES:
+            logger.warning("[DingTalk] file too large (%d bytes), skipping: %s", attachment.size, attachment.filename)
+            return False
+
+        conversation_type, sender_staff_id, conversation_id = self._resolve_routing(msg)
+        robot_code = self._client_id
+
+        try:
+            media_id = await self._upload_media(attachment.actual_path, "image" if attachment.is_image else "file")
+            if not media_id:
+                return False
+
+            if attachment.is_image:
+                msg_key = "sampleImageMsg"
+                msg_param = json.dumps({"photoURL": media_id})
+            else:
+                msg_key = "sampleFile"
+                msg_param = json.dumps(
+                    {
+                        "fileUrl": media_id,
+                        "fileName": attachment.filename,
+                        "fileSize": str(attachment.size),
+                    }
+                )
+
+            token = await self._get_access_token()
+            async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
+                if conversation_type == _CONVERSATION_TYPE_GROUP:
+                    response = await client.post(
+                        f"{DINGTALK_API_BASE}/v1.0/robot/groupMessages/send",
+                        headers=self._api_headers(token),
+                        json={
+                            "msgKey": msg_key,
+                            "msgParam": msg_param,
+                            "robotCode": robot_code,
+                            "openConversationId": conversation_id,
+                        },
+                    )
+                else:
+                    response = await client.post(
+                        f"{DINGTALK_API_BASE}/v1.0/robot/oToMessages/batchSend",
+                        headers=self._api_headers(token),
+                        json={
+                            "msgKey": msg_key,
+                            "msgParam": msg_param,
+                            "robotCode": robot_code,
+                            "userIds": [sender_staff_id],
+                        },
+                    )
+                response.raise_for_status()
+
+            logger.info("[DingTalk] file sent: %s", attachment.filename)
+            return True
+        except (httpx.HTTPError, OSError, ValueError, TypeError, AttributeError):
+            logger.exception("[DingTalk] failed to send file: %s", attachment.filename)
+            return False
+
+    # -- stream client (runs in dedicated thread) --------------------------
+
+    def _run_stream(self, client_id: str, client_secret: str) -> None:
+        try:
+            import dingtalk_stream
+
+            credential = dingtalk_stream.Credential(client_id, client_secret)
+            client = dingtalk_stream.DingTalkStreamClient(credential)
+            self._stream_client = client
+            client.register_callback_handler(
+                dingtalk_stream.chatbot.ChatbotMessage.TOPIC,
+                _DingTalkMessageHandler(self),
+            )
+            client.start_forever()
+        except Exception:
+            if self._running:
+                logger.exception("DingTalk Stream Push error")
+        finally:
+            self._stream_client = None
+
+    def _on_chatbot_message(self, message: Any) -> None:
+        if not self._running:
+            return
+        try:
+            sender_staff_id = message.sender_staff_id or ""
+            conversation_type = _normalize_conversation_type(message.conversation_type)
+            conversation_id = message.conversation_id or ""
+            msg_id = message.message_id or ""
+            sender_nick = message.sender_nick or ""
+
+            if self._allowed_users and sender_staff_id not in self._allowed_users:
+                logger.debug("[DingTalk] ignoring message from non-allowed user: %s", sender_staff_id)
+                return
+
+            text = self._extract_text(message)
+            if not text:
+                logger.info("[DingTalk] empty text, ignoring message")
+                return
+
+            logger.info(
+                "[DingTalk] parsed message: conv_type=%s, msg_id=%s, sender=%s(%s), text=%r",
+                conversation_type,
+                msg_id,
+                sender_staff_id,
+                sender_nick,
+                text[:100],
+            )
+
+            if _is_dingtalk_command(text):
+                msg_type = InboundMessageType.COMMAND
+            else:
+                msg_type = InboundMessageType.CHAT
+
+            # P2P: topic_id=None (single thread per user, like Telegram private chat)
+            # Group: topic_id=msg_id (each new message starts a new topic, like Feishu)
+            topic_id: str | None = msg_id if conversation_type == _CONVERSATION_TYPE_GROUP else None
+
+            # chat_id uses conversation_id for groups, sender_staff_id for P2P
+            chat_id = conversation_id if conversation_type == _CONVERSATION_TYPE_GROUP else sender_staff_id
+
+            inbound = self._make_inbound(
+                chat_id=chat_id,
+                user_id=sender_staff_id,
+                text=text,
+                msg_type=msg_type,
+                thread_ts=msg_id,
+                metadata={
+                    "conversation_type": conversation_type,
+                    "conversation_id": conversation_id,
+                    "sender_staff_id": sender_staff_id,
+                    "sender_nick": sender_nick,
+                    "message_id": msg_id,
+                },
+            )
+            inbound.topic_id = topic_id
+
+            if self._card_template_id:
+                source_key = self._make_card_source_key(inbound)
+                with self._incoming_messages_lock:
+                    self._incoming_messages[source_key] = message
+
+            if self._main_loop and self._main_loop.is_running():
+                logger.info("[DingTalk] publishing inbound message to bus (type=%s, msg_id=%s)", msg_type.value, msg_id)
+                fut = asyncio.run_coroutine_threadsafe(
+                    self._prepare_inbound(chat_id, inbound),
+                    self._main_loop,
+                )
+                fut.add_done_callback(lambda f, mid=msg_id: self._log_future_error(f, "prepare_inbound", mid))
+            else:
+                logger.warning("[DingTalk] main loop not running, cannot publish inbound message")
+        except Exception:
+            logger.exception("[DingTalk] error processing chatbot message")
+
+    @staticmethod
+    def _extract_text(message: Any) -> str:
+        msg_type = message.message_type
+        if msg_type == "text" and message.text:
+            return message.text.content.strip()
+        if msg_type == "richText" and message.rich_text_content:
+            return _extract_text_from_rich_text(message.rich_text_content.rich_text_list).strip()
+        return ""
+
+    async def _prepare_inbound(self, chat_id: str, inbound: InboundMessage) -> None:
+        # Running reply must finish before publish_inbound so AI card tracks are
+        # registered before the manager emits streaming outbounds.
+        await self._send_running_reply(chat_id, inbound)
+        await self.bus.publish_inbound(inbound)
+
+    async def _send_running_reply(self, chat_id: str, inbound: InboundMessage) -> None:
+        conversation_type = inbound.metadata.get("conversation_type", _CONVERSATION_TYPE_P2P)
+        sender_staff_id = inbound.metadata.get("sender_staff_id", "")
+        conversation_id = inbound.metadata.get("conversation_id", "")
+        text = "\u23f3 Working on it..."
+
+        try:
+            if self._card_template_id:
+                source_key = self._make_card_source_key(inbound)
+                with self._incoming_messages_lock:
+                    chatbot_message = self._incoming_messages.pop(source_key, None)
+                out_track_id = await self._create_and_deliver_card(
+                    text,
+                    chatbot_message=chatbot_message,
+                )
+                if out_track_id:
+                    self._card_track_ids[source_key] = out_track_id
+                    logger.info("[DingTalk] AI card running reply sent for chat=%s", chat_id)
+                    return
+
+            robot_code = self._client_id
+            if conversation_type == _CONVERSATION_TYPE_GROUP:
+                await self._send_text_message_to_group(robot_code, conversation_id, text)
+            else:
+                await self._send_text_message_to_user(robot_code, sender_staff_id, text)
+            logger.info("[DingTalk] 'Working on it...' reply sent for chat=%s", chat_id)
+        except Exception:
+            logger.exception("[DingTalk] failed to send running reply for chat=%s", chat_id)
+
+    # -- DingTalk API helpers ----------------------------------------------
+
+    async def _get_access_token(self) -> str:
+        if self._cached_token and time.monotonic() < self._token_expires_at:
+            return self._cached_token
+        async with self._token_lock:
+            if self._cached_token and time.monotonic() < self._token_expires_at:
+                return self._cached_token
+            async with httpx.AsyncClient(timeout=httpx.Timeout(10.0)) as client:
+                response = await client.post(
+                    f"{DINGTALK_API_BASE}/v1.0/oauth2/accessToken",
+                    json={"appKey": self._client_id, "appSecret": self._client_secret},  # DingTalk API field names
+                )
+                response.raise_for_status()
+                data = response.json()
+
+                if not isinstance(data, dict):
+                    raise ValueError(f"DingTalk access token response must be a JSON object, got {type(data).__name__}")
+
+                access_token = data.get("accessToken")
+                if not isinstance(access_token, str) or not access_token.strip():
+                    raise ValueError("DingTalk access token response did not contain a usable accessToken")
+
+                raw_expires_in = data.get("expireIn", 7200)
+                try:
+                    expires_in = int(raw_expires_in)
+                except (TypeError, ValueError):
+                    logger.warning("[DingTalk] invalid expireIn value %r, using default 7200s", raw_expires_in)
+                    expires_in = 7200
+
+                self._cached_token = access_token.strip()
+                self._token_expires_at = time.monotonic() + expires_in - _TOKEN_REFRESH_MARGIN_SECONDS
+                return self._cached_token
+
+    @staticmethod
+    def _api_headers(token: str) -> dict[str, str]:
+        return {
+            "x-acs-dingtalk-access-token": token,
+            "Content-Type": "application/json",
+        }
+
+    async def _send_text_message_to_user(self, robot_code: str, user_id: str, text: str) -> None:
+        token = await self._get_access_token()
+        async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
+            response = await client.post(
+                f"{DINGTALK_API_BASE}/v1.0/robot/oToMessages/batchSend",
+                headers=self._api_headers(token),
+                json={
+                    "msgKey": "sampleText",
+                    "msgParam": json.dumps({"content": text}),
+                    "robotCode": robot_code,
+                    "userIds": [user_id],
+                },
+            )
+            response.raise_for_status()
+
+    async def _send_text_message_to_group(self, robot_code: str, conversation_id: str, text: str) -> None:
+        token = await self._get_access_token()
+        async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
+            response = await client.post(
+                f"{DINGTALK_API_BASE}/v1.0/robot/groupMessages/send",
+                headers=self._api_headers(token),
+                json={
+                    "msgKey": "sampleText",
+                    "msgParam": json.dumps({"content": text}),
+                    "robotCode": robot_code,
+                    "openConversationId": conversation_id,
+                },
+            )
+            response.raise_for_status()
+
+    async def _send_p2p_message(self, robot_code: str, user_id: str, text: str) -> None:
+        text = _adapt_markdown_for_dingtalk(text)
+        token = await self._get_access_token()
+        async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
+            response = await client.post(
+                f"{DINGTALK_API_BASE}/v1.0/robot/oToMessages/batchSend",
+                headers=self._api_headers(token),
+                json={
+                    "msgKey": "sampleMarkdown",
+                    "msgParam": json.dumps({"title": "DeerFlow", "text": text}),
+                    "robotCode": robot_code,
+                    "userIds": [user_id],
+                },
+            )
+            response.raise_for_status()
+            data = response.json()
+            if data.get("processQueryKey"):
+                logger.info("[DingTalk] P2P message sent to user=%s", user_id)
+            else:
+                logger.warning("[DingTalk] P2P send response: %s", data)
+
+    async def _send_group_message(
+        self,
+        robot_code: str,
+        conversation_id: str,
+        text: str,
+        *,
+        at_user_ids: list[str] | None = None,  # noqa: ARG002
+    ) -> None:
+        # at_user_ids accepted for call-site compatibility but not passed to the API
+        # (sampleMarkdown does not support @mentions).
+        text = _adapt_markdown_for_dingtalk(text)
+        token = await self._get_access_token()
+
+        async with httpx.AsyncClient(timeout=httpx.Timeout(30.0)) as client:
+            response = await client.post(
+                f"{DINGTALK_API_BASE}/v1.0/robot/groupMessages/send",
+                headers=self._api_headers(token),
+                json={
+                    "msgKey": "sampleMarkdown",
+                    "msgParam": json.dumps({"title": "DeerFlow", "text": text}),
+                    "robotCode": robot_code,
+                    "openConversationId": conversation_id,
+                },
+            )
+            response.raise_for_status()
+            data = response.json()
+            if data.get("processQueryKey"):
+                logger.info("[DingTalk] group message sent to conversation=%s", conversation_id)
+            else:
+                logger.warning("[DingTalk] group send response: %s", data)
+
+    # -- AI Card streaming helpers -------------------------------------------
+
+    def _make_card_source_key(self, inbound: InboundMessage) -> str:
+        m = inbound.metadata
+        return f"{m.get('conversation_type', '')}:{m.get('sender_staff_id', '')}:{m.get('conversation_id', '')}:{m.get('message_id', '')}"
+
+    def _make_card_source_key_from_outbound(self, msg: OutboundMessage) -> str:
+        m = msg.metadata
+        correlation_id = m.get("message_id") or msg.thread_ts or ""
+        return f"{m.get('conversation_type', '')}:{m.get('sender_staff_id', '')}:{m.get('conversation_id', '')}:{correlation_id}"
+
+    async def _create_and_deliver_card(
+        self,
+        initial_text: str,
+        *,
+        chatbot_message: Any = None,
+    ) -> str | None:
+        if self._dingtalk_client is None or chatbot_message is None:
+            logger.warning("[DingTalk] SDK client or chatbot_message unavailable, skipping AI card")
+            return None
+
+        try:
+            from dingtalk_stream.card_replier import AICardReplier
+        except ImportError:
+            logger.warning("[DingTalk] dingtalk-stream card_replier not available")
+            return None
+
+        try:
+            replier = AICardReplier(self._dingtalk_client, chatbot_message)
+            card_instance_id = await replier.async_create_and_deliver_card(
+                card_template_id=self._card_template_id,
+                card_data={"content": initial_text},
+            )
+            if not card_instance_id:
+                return None
+
+            self._card_repliers[card_instance_id] = replier
+            logger.info("[DingTalk] AI card created: outTrackId=%s", card_instance_id)
+            return card_instance_id
+        except Exception:
+            logger.exception("[DingTalk] failed to create AI card")
+            return None
+
+    async def _stream_update_card(
+        self,
+        out_track_id: str,
+        content: str,
+        *,
+        is_finalize: bool = False,
+        is_error: bool = False,
+    ) -> None:
+        replier = self._card_repliers.get(out_track_id)
+        if not replier:
+            raise RuntimeError(f"No AICardReplier found for track ID {out_track_id}")
+
+        await replier.async_streaming(
+            card_instance_id=out_track_id,
+            content_key="content",
+            content_value=content,
+            append=False,
+            finished=is_finalize,
+            failed=is_error,
+        )
+
+    # -- media upload --------------------------------------------------------
+
+    async def _upload_media(self, file_path: str | Path, media_type: str) -> str | None:
+        try:
+            file_bytes = await asyncio.to_thread(Path(file_path).read_bytes)
+            token = await self._get_access_token()
+            async with httpx.AsyncClient(timeout=httpx.Timeout(60.0)) as client:
+                response = await client.post(
+                    f"{DINGTALK_API_BASE}/v1.0/files/upload",
+                    headers={"x-acs-dingtalk-access-token": token},
+                    files={"file": ("upload", file_bytes)},
+                    data={"type": media_type},
+                )
+                response.raise_for_status()
+                try:
+                    payload = response.json()
+                except json.JSONDecodeError:
+                    logger.exception("[DingTalk] failed to decode upload response JSON: %s", file_path)
+                    return None
+                if not isinstance(payload, dict):
+                    logger.warning("[DingTalk] unexpected upload response type %s for %s", type(payload).__name__, file_path)
+                    return None
+                return payload.get("mediaId")
+        except (httpx.HTTPError, OSError):
+            logger.exception("[DingTalk] failed to upload media: %s", file_path)
+            return None
+
+    @staticmethod
+    def _log_future_error(fut: Any, name: str, msg_id: str) -> None:
+        try:
+            exc = fut.exception()
+            if exc:
+                logger.error("[DingTalk] %s failed for msg_id=%s: %s", name, msg_id, exc)
+        except (asyncio.CancelledError, asyncio.InvalidStateError):
+            pass
+
+
+class _DingTalkMessageHandler:
+    """Callback handler registered with dingtalk-stream."""
+
+    def __init__(self, channel: DingTalkChannel) -> None:
+        self._channel = channel
+
+    def pre_start(self) -> None:
+        if hasattr(self, "dingtalk_client") and self.dingtalk_client is not None:
+            self._channel._dingtalk_client = self.dingtalk_client
+
+    async def raw_process(self, callback_message: Any) -> Any:
+        import dingtalk_stream
+        from dingtalk_stream.frames import Headers
+
+        code, message = await self.process(callback_message)
+        ack_message = dingtalk_stream.AckMessage()
+        ack_message.code = code
+        ack_message.headers.message_id = callback_message.headers.message_id
+        ack_message.headers.content_type = Headers.CONTENT_TYPE_APPLICATION_JSON
+        ack_message.data = {"response": message}
+        return ack_message
+
+    async def process(self, callback: Any) -> tuple[int, str]:
+        import dingtalk_stream
+
+        incoming_message = dingtalk_stream.ChatbotMessage.from_dict(callback.data)
+        self._channel._on_chatbot_message(incoming_message)
+        return dingtalk_stream.AckMessage.STATUS_OK, "OK"
@@ -63,6 +63,10 @@ class FeishuChannel(Channel):
        self._GetMessageResourceRequest = None
        self._thread_lock = threading.Lock()

+    @property
+    def supports_streaming(self) -> bool:
+        return True
+
    async def start(self) -> None:
        if self._running:
            return
@@ -375,9 +379,7 @@ class FeishuChannel(Channel):
        virtual_path = f"{VIRTUAL_PATH_PREFIX}/uploads/{resolved_target.name}"

        try:
-            from deerflow.config.app_config import AppConfig
-
-            sandbox_provider = get_sandbox_provider(AppConfig.from_file())
+            sandbox_provider = get_sandbox_provider()
            sandbox_id = sandbox_provider.acquire(thread_id)
            if sandbox_id != "local":
                sandbox = sandbox_provider.get(sandbox_id)
@@ -17,6 +17,8 @@ from langgraph_sdk.errors import ConflictError
 from app.channels.commands import KNOWN_CHANNEL_COMMANDS
 from app.channels.message_bus import InboundMessage, InboundMessageType, MessageBus, OutboundMessage, ResolvedAttachment
 from app.channels.store import ChannelStore
+from app.gateway.csrf_middleware import CSRF_COOKIE_NAME, CSRF_HEADER_NAME, generate_csrf_token
+from app.gateway.internal_auth import create_internal_auth_headers
 from deerflow.runtime.user_context import get_effective_user_id

 logger = logging.getLogger(__name__)
@@ -36,6 +38,7 @@ STREAM_UPDATE_MIN_INTERVAL_SECONDS = 0.35
 THREAD_BUSY_MESSAGE = "This conversation is already processing another request. Please wait for it to finish and try again."

 CHANNEL_CAPABILITIES = {
+    "dingtalk": {"supports_streaming": False},
    "discord": {"supports_streaming": False},
    "feishu": {"supports_streaming": True},
    "slack": {"supports_streaming": False},
@@ -46,6 +49,13 @@ CHANNEL_CAPABILITIES = {

 InboundFileReader = Callable[[dict[str, Any], httpx.AsyncClient], Awaitable[bytes | None]]

+_METADATA_DROP_KEYS = frozenset({"raw_message", "ref_msg"})
+
+
+def _slim_metadata(meta: dict[str, Any]) -> dict[str, Any]:
+    """Return a shallow copy of *meta* with known-large keys removed."""
+    return {k: v for k, v in meta.items() if k not in _METADATA_DROP_KEYS}
+

 INBOUND_FILE_READERS: dict[str, InboundFileReader] = {}

@@ -136,6 +146,13 @@ def _normalize_custom_agent_name(raw_value: str) -> str:
    return normalized


+def _strip_loop_warning_text(text: str) -> str:
+    """Remove middleware-authored loop warning lines from display text."""
+    if "[LOOP DETECTED]" not in text:
+        return text
+    return "\n".join(line for line in text.splitlines() if "[LOOP DETECTED]" not in line).strip()
+
+
 def _extract_response_text(result: dict | list) -> str:
    """Extract the last AI message text from a LangGraph runs.wait result.

@@ -145,7 +162,7 @@ def _extract_response_text(result: dict | list) -> str:
    Handles special cases:
    - Regular AI text responses
    - Clarification interrupts (``ask_clarification`` tool messages)
-    - AI messages with tool_calls but no text content
+    - Strips loop-detection warnings attached to tool-call AI messages
    """
    if isinstance(result, list):
        messages = result
@@ -175,7 +192,12 @@ def _extract_response_text(result: dict | list) -> str:
        # Regular AI message with text content
        if msg_type == "ai":
            content = msg.get("content", "")
+            has_tool_calls = bool(msg.get("tool_calls"))
            if isinstance(content, str) and content:
+                if has_tool_calls:
+                    content = _strip_loop_warning_text(content)
+                    if not content:
+                        continue
                return content
            # content can be a list of content blocks
            if isinstance(content, list):
@@ -186,6 +208,8 @@ def _extract_response_text(result: dict | list) -> str:
                    elif isinstance(block, str):
                        parts.append(block)
                text = "".join(parts)
+                if has_tool_calls:
+                    text = _strip_loop_warning_text(text)
                if text:
                    return text
    return ""
@@ -410,7 +434,13 @@ async def _ingest_inbound_files(thread_id: str, msg: InboundMessage) -> list[dic
    if not msg.files:
        return []

-    from deerflow.uploads.manager import claim_unique_filename, ensure_uploads_dir, normalize_filename
+    from deerflow.uploads.manager import (
+        UnsafeUploadPathError,
+        claim_unique_filename,
+        ensure_uploads_dir,
+        normalize_filename,
+        write_upload_file_no_symlink,
+    )

    uploads_dir = ensure_uploads_dir(thread_id)
    seen_names = {entry.name for entry in uploads_dir.iterdir() if entry.is_file()}
@@ -461,7 +491,10 @@ async def _ingest_inbound_files(thread_id: str, msg: InboundMessage) -> list[dic

            dest = uploads_dir / safe_name
            try:
-                dest.write_bytes(data)
+                dest = write_upload_file_no_symlink(uploads_dir, safe_name, data)
+            except UnsafeUploadPathError:
+                logger.warning("[Manager] skipping inbound file with unsafe destination: %s", safe_name)
+                continue
            except Exception:
                logger.exception("[Manager] failed to write inbound file: %s", dest)
                continue
@@ -541,6 +574,13 @@ class ChannelManager:

    @staticmethod
    def _channel_supports_streaming(channel_name: str) -> bool:
+        from .service import get_channel_service
+
+        service = get_channel_service()
+        if service:
+            channel = service.get_channel(channel_name)
+            if channel is not None:
+                return channel.supports_streaming
        return CHANNEL_CAPABILITIES.get(channel_name, {}).get("supports_streaming", False)

    def _resolve_session_layer(self, msg: InboundMessage) -> tuple[dict[str, Any], dict[str, Any]]:
@@ -563,6 +603,17 @@ class ChannelManager:
            user_layer.get("config"),
        )

+        configurable = run_config.get("configurable")
+        if isinstance(configurable, Mapping):
+            configurable = dict(configurable)
+        else:
+            configurable = {}
+        run_config["configurable"] = configurable
+        # Pin channel-triggered runs to the root graph namespace so follow-up
+        # turns continue from the same conversation checkpoint.
+        configurable["checkpoint_ns"] = ""
+        configurable["thread_id"] = thread_id
+
        run_context = _merge_dicts(
            DEFAULT_RUN_CONTEXT,
            self._default_session.get("context"),
@@ -770,6 +821,7 @@ class ChannelManager:
            artifacts=artifacts,
            attachments=attachments,
            thread_ts=msg.thread_ts,
+            metadata=_slim_metadata(msg.metadata),
        )
        logger.info("[Manager] publishing outbound message to bus: channel=%s, chat_id=%s", msg.channel_name, msg.chat_id)
        await self.bus.publish_outbound(outbound)
@@ -831,6 +883,7 @@ class ChannelManager:
                        text=latest_text,
                        is_final=False,
                        thread_ts=msg.thread_ts,
+                        metadata=_slim_metadata(msg.metadata),
                    )
                )
                last_published_text = latest_text
@@ -875,6 +928,7 @@ class ChannelManager:
                    attachments=attachments,
                    is_final=True,
                    thread_ts=msg.thread_ts,
+                    metadata=_slim_metadata(msg.metadata),
                )
            )

@@ -933,6 +987,7 @@ class ChannelManager:
            thread_id=self.store.get_thread_id(msg.channel_name, msg.chat_id) or "",
            text=reply,
            thread_ts=msg.thread_ts,
+            metadata=_slim_metadata(msg.metadata),
        )
        await self.bus.publish_outbound(outbound)

@@ -942,7 +997,11 @@ class ChannelManager:

        try:
            async with httpx.AsyncClient() as http:
-                resp = await http.get(f"{self._gateway_url}{path}", timeout=10)
+                resp = await http.get(
+                    f"{self._gateway_url}{path}",
+                    timeout=10,
+                    headers=create_internal_auth_headers(),
+                )
                resp.raise_for_status()
                data = resp.json()
        except Exception:
@@ -966,5 +1025,6 @@ class ChannelManager:
            thread_id=self.store.get_thread_id(msg.channel_name, msg.chat_id) or "",
            text=error_text,
            thread_ts=msg.thread_ts,
+            metadata=_slim_metadata(msg.metadata),
        )
        await self.bus.publish_outbound(outbound)
@@ -11,13 +11,14 @@ from app.channels.manager import DEFAULT_GATEWAY_URL, DEFAULT_LANGGRAPH_URL, Cha
 from app.channels.message_bus import MessageBus
 from app.channels.store import ChannelStore

+logger = logging.getLogger(__name__)
+
 if TYPE_CHECKING:
    from deerflow.config.app_config import AppConfig

-logger = logging.getLogger(__name__)
-
 # Channel name → import path for lazy loading
 _CHANNEL_REGISTRY: dict[str, str] = {
+    "dingtalk": "app.channels.dingtalk:DingTalkChannel",
    "discord": "app.channels.discord:DiscordChannel",
    "feishu": "app.channels.feishu:FeishuChannel",
    "slack": "app.channels.slack:SlackChannel",
@@ -28,6 +29,7 @@ _CHANNEL_REGISTRY: dict[str, str] = {

 # Keys that indicate a user has configured credentials for a channel.
 _CHANNEL_CREDENTIAL_KEYS: dict[str, list[str]] = {
+    "dingtalk": ["client_id", "client_secret"],
    "discord": ["bot_token"],
    "feishu": ["app_id", "app_secret"],
    "slack": ["bot_token", "app_token"],
@@ -78,8 +80,12 @@ class ChannelService:
        self._running = False

    @classmethod
-    def from_app_config(cls, app_config: AppConfig) -> ChannelService:
-        """Create a ChannelService from an explicit application config."""
+    def from_app_config(cls, app_config: AppConfig | None = None) -> ChannelService:
+        """Create a ChannelService from the application config."""
+        if app_config is None:
+            from deerflow.config.app_config import get_app_config
+
+            app_config = get_app_config()
        channels_config = {}
        # extra fields are allowed by AppConfig (extra="allow")
        extra = app_config.model_extra or {}
@@ -162,11 +168,16 @@ class ChannelService:

        try:
            channel = channel_cls(bus=self.bus, config=config)
-            await channel.start()
            self._channels[name] = channel
+            await channel.start()
+            if not channel.is_running:
+                self._channels.pop(name, None)
+                logger.error("Channel %s did not enter a running state after start()", name)
+                return False
            logger.info("Channel %s started", name)
            return True
        except Exception:
+            self._channels.pop(name, None)
            logger.exception("Failed to start channel %s", name)
            return False

@@ -201,7 +212,7 @@ def get_channel_service() -> ChannelService | None:
    return _channel_service


-async def start_channel_service(app_config: AppConfig) -> ChannelService:
+async def start_channel_service(app_config: AppConfig | None = None) -> ChannelService:
    """Create and start the global ChannelService from app config."""
    global _channel_service
    if _channel_service is not None:
@@ -29,6 +29,10 @@ class WeComChannel(Channel):
        self._ws_stream_ids: dict[str, str] = {}
        self._working_message = "Working on it..."

+    @property
+    def supports_streaming(self) -> bool:
+        return True
+
    def _clear_ws_context(self, thread_ts: str | None) -> None:
        if not thread_ts:
            return
@@ -28,9 +28,13 @@ from app.gateway.routers import (
    threads,
    uploads,
 )
-from deerflow.config.app_config import AppConfig
+from deerflow.config import app_config as deerflow_app_config
+from deerflow.config.app_config import apply_logging_level

-# Configure logging
+AppConfig = deerflow_app_config.AppConfig
+get_app_config = deerflow_app_config.get_app_config
+
+# Default logging; lifespan overrides from config.yaml log_level.
 logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
@@ -72,7 +76,18 @@ async def _ensure_admin_user(app: FastAPI) -> None:
    from deerflow.persistence.engine import get_session_factory
    from deerflow.persistence.user.model import UserRow

-    provider = get_local_provider()
+    try:
+        provider = get_local_provider()
+    except RuntimeError:
+        # Auth persistence may not be initialized in some test/boot paths.
+        # Skip admin migration work rather than failing gateway startup.
+        logger.warning("Auth persistence not ready; skipping admin bootstrap check")
+        return
+
+    sf = get_session_factory()
+    if sf is None:
+        return
+
    admin_count = await provider.count_admin_users()

    if admin_count == 0:
@@ -84,10 +99,6 @@ async def _ensure_admin_user(app: FastAPI) -> None:

    # Admin already exists — run orphan thread migration for any
    # LangGraph thread metadata that pre-dates the auth module.
-    sf = get_session_factory()
-    if sf is None:
-        return
-
    async with sf() as session:
        stmt = select(UserRow).where(UserRow.system_role == "admin").limit(1)
        row = (await session.execute(stmt)).scalar_one_or_none()
@@ -151,11 +162,10 @@ async def _migrate_orphaned_threads(store, admin_user_id: str) -> int:
 async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
    """Application lifespan handler."""

+    # Load config and check necessary environment variables at startup
    try:
-        # ``app.state.config`` is the sole source of truth for
-        # ``Depends(get_config)``. Consumers that want AppConfig must receive
-        # it as an explicit parameter; there is no ambient singleton.
-        app.state.config = AppConfig.from_file()
+        app.state.config = get_app_config()
+        apply_logging_level(app.state.config.log_level)
        logger.info("Configuration loaded successfully")
    except Exception as e:
        error_msg = f"Failed to load configuration during gateway startup: {e}"
@@ -208,6 +218,8 @@ def create_app() -> FastAPI:
    Returns:
        Configured FastAPI application instance.
    """
+    config = get_gateway_config()
+    docs_kwargs = {"docs_url": "/docs", "redoc_url": "/redoc", "openapi_url": "/openapi.json"} if config.enable_docs else {"docs_url": None, "redoc_url": None, "openapi_url": None}

    app = FastAPI(
        title="DeerFlow API Gateway",
@@ -232,9 +244,7 @@ This gateway provides custom endpoints for models, MCP configuration, skills, an
        """,
        version="0.1.0",
        lifespan=lifespan,
-        docs_url="/docs",
-        redoc_url="/redoc",
-        openapi_url="/openapi.json",
+        **docs_kwargs,
        openapi_tags=[
            {
                "name": "models",
@@ -4,11 +4,8 @@ import logging
 import os
 import secrets

-from dotenv import load_dotenv
 from pydantic import BaseModel, Field

-load_dotenv()
-
 logger = logging.getLogger(__name__)


@@ -37,6 +34,9 @@ def get_auth_config() -> AuthConfig:
    """Get the global AuthConfig instance. Parses from env on first call."""
    global _auth_config
    if _auth_config is None:
+        from dotenv import load_dotenv
+
+        load_dotenv()
        jwt_secret = os.environ.get("AUTH_JWT_SECRET")
        if not jwt_secret:
            jwt_secret = secrets.token_urlsafe(32)
@@ -1,10 +1,14 @@
 """Local email/password authentication provider."""

+import logging
+
 from app.gateway.auth.models import User
-from app.gateway.auth.password import hash_password_async, verify_password_async
+from app.gateway.auth.password import hash_password_async, needs_rehash, verify_password_async
 from app.gateway.auth.providers import AuthProvider
 from app.gateway.auth.repositories.base import UserRepository

+logger = logging.getLogger(__name__)
+

 class LocalAuthProvider(AuthProvider):
    """Email/password authentication provider using local database."""
@@ -43,6 +47,15 @@ class LocalAuthProvider(AuthProvider):
        if not await verify_password_async(password, user.password_hash):
            return None

+        if needs_rehash(user.password_hash):
+            try:
+                user.password_hash = await hash_password_async(password)
+                await self._repo.update_user(user)
+            except Exception:
+                # Rehash is an opportunistic upgrade; a transient DB error must not
+                # prevent an otherwise-valid login from succeeding.
+                logger.warning("Failed to rehash password for user %s; login will still succeed", user.email, exc_info=True)
+
        return user

    async def get_user(self, user_id: str) -> User | None:
@@ -1,18 +1,66 @@
-"""Password hashing utilities using bcrypt directly."""
+"""Password hashing utilities with versioned hash format.
+
+Hash format: ``$dfv<N>$<bcrypt_hash>`` where ``<N>`` is the version.
+
+- **v1** (legacy): ``bcrypt(password)`` — plain bcrypt, susceptible to
+  72-byte silent truncation.
+- **v2** (current): ``bcrypt(b64(sha256(password)))`` — SHA-256 pre-hash
+  avoids the 72-byte truncation limit so the full password contributes
+  to the hash.
+
+Verification auto-detects the version and falls back to v1 for hashes
+without a prefix, so existing deployments upgrade transparently on next
+login.
+"""

 import asyncio
+import base64
+import hashlib

 import bcrypt

+_CURRENT_VERSION = 2
+_PREFIX_V2 = "$dfv2$"
+_PREFIX_V1 = "$dfv1$"
+
+
+def _pre_hash_v2(password: str) -> bytes:
+    """SHA-256 pre-hash to bypass bcrypt's 72-byte limit."""
+    return base64.b64encode(hashlib.sha256(password.encode("utf-8")).digest())
+

 def hash_password(password: str) -> str:
-    """Hash a password using bcrypt."""
-    return bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8")
+    """Hash a password (current version: v2 — SHA-256 + bcrypt)."""
+    raw = bcrypt.hashpw(_pre_hash_v2(password), bcrypt.gensalt()).decode("utf-8")
+    return f"{_PREFIX_V2}{raw}"


 def verify_password(plain_password: str, hashed_password: str) -> bool:
-    """Verify a password against its hash."""
-    return bcrypt.checkpw(plain_password.encode("utf-8"), hashed_password.encode("utf-8"))
+    """Verify a password, auto-detecting the hash version.
+
+    Accepts v2 (``$dfv2$…``), v1 (``$dfv1$…``), and bare bcrypt hashes
+    (treated as v1 for backward compatibility with pre-versioning data).
+    """
+    try:
+        if hashed_password.startswith(_PREFIX_V2):
+            bcrypt_hash = hashed_password[len(_PREFIX_V2) :]
+            return bcrypt.checkpw(_pre_hash_v2(plain_password), bcrypt_hash.encode("utf-8"))
+
+        if hashed_password.startswith(_PREFIX_V1):
+            bcrypt_hash = hashed_password[len(_PREFIX_V1) :]
+        else:
+            bcrypt_hash = hashed_password
+
+        return bcrypt.checkpw(plain_password.encode("utf-8"), bcrypt_hash.encode("utf-8"))
+    except ValueError:
+        # bcrypt raises ValueError for malformed or corrupt hashes (e.g., invalid salt).
+        # Fail closed rather than crashing the request.
+        return False
+
+
+def needs_rehash(hashed_password: str) -> bool:
+    """Return True if the hash uses an older version and should be rehashed."""
+    return not hashed_password.startswith(_PREFIX_V2)


 async def hash_password_async(password: str) -> str:
@@ -12,12 +12,12 @@ class AuthProvider(ABC):

        Returns User if authentication succeeds, None otherwise.
        """
-        ...
+        raise NotImplementedError

    @abstractmethod
    async def get_user(self, user_id: str) -> "User | None":
        """Retrieve user by ID."""
-        ...
+        raise NotImplementedError


 # Import User at runtime to avoid circular imports
@@ -35,7 +35,7 @@ class UserRepository(ABC):
        Raises:
            ValueError: If email already exists
        """
-        ...
+        raise NotImplementedError

    @abstractmethod
    async def get_user_by_id(self, user_id: str) -> User | None:
@@ -47,7 +47,7 @@ class UserRepository(ABC):
        Returns:
            User if found, None otherwise
        """
-        ...
+        raise NotImplementedError

    @abstractmethod
    async def get_user_by_email(self, email: str) -> User | None:
@@ -59,7 +59,7 @@ class UserRepository(ABC):
        Returns:
            User if found, None otherwise
        """
-        ...
+        raise NotImplementedError

    @abstractmethod
    async def update_user(self, user: User) -> User:
@@ -76,17 +76,17 @@ class UserRepository(ABC):
                a hard failure (not a no-op) so callers cannot mistake a
                concurrent-delete race for a successful update.
        """
-        ...
+        raise NotImplementedError

    @abstractmethod
    async def count_users(self) -> int:
        """Return total number of registered users."""
-        ...
+        raise NotImplementedError

    @abstractmethod
    async def count_admin_users(self) -> int:
        """Return number of users with system_role == 'admin'."""
-        ...
+        raise NotImplementedError

    @abstractmethod
    async def get_user_by_oauth(self, provider: str, oauth_id: str) -> User | None:
@@ -99,4 +99,4 @@ class UserRepository(ABC):
        Returns:
            User if found, None otherwise
        """
-        ...
+        raise NotImplementedError
@@ -25,15 +25,14 @@ from deerflow.persistence.user.model import UserRow


 async def _run(email: str | None) -> int:
-    from deerflow.config import AppConfig
+    from deerflow.config import get_app_config
    from deerflow.persistence.engine import (
        close_engine,
        get_session_factory,
        init_engine_from_config,
    )

-    # CLI entry: load config explicitly at the top, pass down through the closure.
-    config = AppConfig.from_file()
+    config = get_app_config()
    await init_engine_from_config(config.database)
    try:
        sf = get_session_factory()
@@ -18,6 +18,7 @@ from starlette.types import ASGIApp

 from app.gateway.auth.errors import AuthErrorCode, AuthErrorResponse
 from app.gateway.authz import _ALL_PERMISSIONS, AuthContext
+from app.gateway.internal_auth import INTERNAL_AUTH_HEADER_NAME, get_internal_user, is_valid_internal_auth_token
 from deerflow.runtime.user_context import reset_current_user, set_current_user

 # Paths that never require authentication.
@@ -75,8 +76,12 @@ class AuthMiddleware(BaseHTTPMiddleware):
        if _is_public(request.url.path):
            return await call_next(request)

+        internal_user = None
+        if is_valid_internal_auth_token(request.headers.get(INTERNAL_AUTH_HEADER_NAME)):
+            internal_user = get_internal_user()
+
        # Non-public path: require session cookie
-        if not request.cookies.get("access_token"):
+        if internal_user is None and not request.cookies.get("access_token"):
            return JSONResponse(
                status_code=401,
                content={
@@ -100,10 +105,13 @@ class AuthMiddleware(BaseHTTPMiddleware):
        # bubble up, so we catch and render it as JSONResponse here.
        from app.gateway.deps import get_current_user_from_request

-        try:
-            user = await get_current_user_from_request(request)
-        except HTTPException as exc:
-            return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
+        if internal_user is not None:
+            user = internal_user
+        else:
+            try:
+                user = await get_current_user_from_request(request)
+            except HTTPException as exc:
+                return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})

        # Stamp both request.state.user (for the contextvar pattern)
        # and request.state.auth (so @require_permission's "auth is
@@ -30,7 +30,9 @@ Inspired by LangGraph Auth system: https://github.com/langchain-ai/langgraph/blo
 from __future__ import annotations

 import functools
+import inspect
 from collections.abc import Callable
+from types import SimpleNamespace
 from typing import TYPE_CHECKING, Any, ParamSpec, TypeVar

 from fastapi import HTTPException, Request
@@ -117,6 +119,15 @@ _ALL_PERMISSIONS: list[str] = [
 ]


+def _make_test_request_stub() -> Any:
+    """Create a minimal request-like object for direct unit calls.
+
+    Used when decorated route handlers are invoked without FastAPI's
+    request injection. Includes fields accessed by auth helpers.
+    """
+    return SimpleNamespace(state=SimpleNamespace(), cookies={}, _deerflow_test_bypass_auth=True)
+
+
 async def _authenticate(request: Request) -> AuthContext:
    """Authenticate request and return AuthContext.

@@ -134,7 +145,11 @@ async def _authenticate(request: Request) -> AuthContext:


 def require_auth[**P, T](func: Callable[P, T]) -> Callable[P, T]:
-    """Decorator that authenticates the request and sets AuthContext.
+    """Decorator that authenticates the request and enforces authentication.
+
+    Independently raises HTTP 401 for unauthenticated requests, regardless of
+    whether ``AuthMiddleware`` is present in the ASGI stack. Sets the resolved
+    ``AuthContext`` on ``request.state.auth`` for downstream handlers.

    Must be placed ABOVE other decorators (executes after them).

@@ -147,19 +162,33 @@ def require_auth[**P, T](func: Callable[P, T]) -> Callable[P, T]:
            ...

    Raises:
-        ValueError: If 'request' parameter is missing
+        HTTPException: 401 if the request is unauthenticated.
+        ValueError: If 'request' parameter is missing.
    """

    @functools.wraps(func)
    async def wrapper(*args: Any, **kwargs: Any) -> Any:
        request = kwargs.get("request")
        if request is None:
-            raise ValueError("require_auth decorator requires 'request' parameter")
+            # Unit tests may call decorated handlers directly without a
+            # FastAPI Request object. Inject a minimal request stub when
+            # the wrapped function declares `request`.
+            if "request" in inspect.signature(func).parameters:
+                kwargs["request"] = _make_test_request_stub()
+            else:
+                raise ValueError("require_auth decorator requires 'request' parameter")
+            request = kwargs["request"]
+
+        if getattr(request, "_deerflow_test_bypass_auth", False):
+            return await func(*args, **kwargs)

        # Authenticate and set context
        auth_context = await _authenticate(request)
        request.state.auth = auth_context

+        if not auth_context.is_authenticated:
+            raise HTTPException(status_code=401, detail="Authentication required")
+
        return await func(*args, **kwargs)

    return wrapper
@@ -210,7 +239,17 @@ def require_permission(
        async def wrapper(*args: Any, **kwargs: Any) -> Any:
            request = kwargs.get("request")
            if request is None:
-                raise ValueError("require_permission decorator requires 'request' parameter")
+                # Unit tests may call decorated route handlers directly without
+                # constructing a FastAPI Request object. Inject a minimal stub
+                # when the wrapped function declares `request`.
+                if "request" in inspect.signature(func).parameters:
+                    kwargs["request"] = _make_test_request_stub()
+                else:
+                    return await func(*args, **kwargs)
+                request = kwargs["request"]
+
+            if getattr(request, "_deerflow_test_bypass_auth", False):
+                return await func(*args, **kwargs)

            auth: AuthContext = getattr(request.state, "auth", None)
            if auth is None:
@@ -9,6 +9,7 @@ class GatewayConfig(BaseModel):
    host: str = Field(default="0.0.0.0", description="Host to bind the gateway server")
    port: int = Field(default=8001, description="Port to bind the gateway server")
    cors_origins: list[str] = Field(default_factory=lambda: ["http://localhost:3000"], description="Allowed CORS origins")
+    enable_docs: bool = Field(default=True, description="Enable Swagger/ReDoc/OpenAPI endpoints")


 _gateway_config: GatewayConfig | None = None
@@ -23,5 +24,6 @@ def get_gateway_config() -> GatewayConfig:
            host=os.getenv("GATEWAY_HOST", "0.0.0.0"),
            port=int(os.getenv("GATEWAY_PORT", "8001")),
            cors_origins=cors_origins_str.split(","),
+            enable_docs=os.getenv("GATEWAY_ENABLE_DOCS", "true").lower() == "true",
        )
    return _gateway_config
@@ -4,8 +4,10 @@ Per RFC-001:
 State-changing operations require CSRF protection.
 """

+import os
 import secrets
 from collections.abc import Callable
+from urllib.parse import urlsplit

 from fastapi import Request, Response
 from starlette.middleware.base import BaseHTTPMiddleware
@@ -19,7 +21,7 @@ CSRF_TOKEN_LENGTH = 64  # bytes

 def is_secure_request(request: Request) -> bool:
    """Detect whether the original client request was made over HTTPS."""
-    return request.headers.get("x-forwarded-proto", request.url.scheme) == "https"
+    return _request_scheme(request) == "https"


 def generate_csrf_token() -> str:
@@ -61,6 +63,109 @@ def is_auth_endpoint(request: Request) -> bool:
    return request.url.path.rstrip("/") in _AUTH_EXEMPT_PATHS


+def _host_with_optional_port(hostname: str, port: int | None, scheme: str) -> str:
+    """Return normalized host[:port], omitting default ports."""
+    host = hostname.lower()
+    if ":" in host and not host.startswith("["):
+        host = f"[{host}]"
+
+    if port is None or (scheme == "http" and port == 80) or (scheme == "https" and port == 443):
+        return host
+    return f"{host}:{port}"
+
+
+def _normalize_origin(origin: str) -> str | None:
+    """Return a normalized scheme://host[:port] origin, or None for invalid input."""
+    try:
+        parsed = urlsplit(origin.strip())
+        port = parsed.port
+    except ValueError:
+        return None
+
+    scheme = parsed.scheme.lower()
+    if scheme not in {"http", "https"} or not parsed.hostname:
+        return None
+
+    # Browser Origin is only scheme/host/port. Reject URL-shaped or credentialed values.
+    if parsed.username or parsed.password or parsed.path or parsed.query or parsed.fragment:
+        return None
+
+    return f"{scheme}://{_host_with_optional_port(parsed.hostname, port, scheme)}"
+
+
+def _configured_cors_origins() -> set[str]:
+    """Return explicit configured browser origins that may call auth routes."""
+    origins = set()
+    for raw_origin in os.environ.get("GATEWAY_CORS_ORIGINS", "").split(","):
+        origin = raw_origin.strip()
+        if not origin or origin == "*":
+            continue
+        normalized = _normalize_origin(origin)
+        if normalized:
+            origins.add(normalized)
+    return origins
+
+
+def _first_header_value(value: str | None) -> str | None:
+    """Return the first value from a comma-separated proxy header."""
+    if not value:
+        return None
+    first = value.split(",", 1)[0].strip()
+    return first or None
+
+
+def _forwarded_param(request: Request, name: str) -> str | None:
+    """Extract a parameter from the first RFC 7239 Forwarded header entry."""
+    forwarded = _first_header_value(request.headers.get("forwarded"))
+    if not forwarded:
+        return None
+
+    for part in forwarded.split(";"):
+        key, sep, value = part.strip().partition("=")
+        if sep and key.lower() == name:
+            return value.strip().strip('"') or None
+    return None
+
+
+def _request_scheme(request: Request) -> str:
+    """Resolve the original request scheme from trusted proxy headers."""
+    scheme = _forwarded_param(request, "proto") or _first_header_value(request.headers.get("x-forwarded-proto")) or request.url.scheme
+    return scheme.lower()
+
+
+def _request_origin(request: Request) -> str | None:
+    """Build the origin for the URL the browser is targeting."""
+    scheme = _request_scheme(request)
+    host = _forwarded_param(request, "host") or _first_header_value(request.headers.get("x-forwarded-host")) or request.headers.get("host") or request.url.netloc
+
+    forwarded_port = _first_header_value(request.headers.get("x-forwarded-port"))
+    if forwarded_port and ":" not in host.rsplit("]", 1)[-1]:
+        host = f"{host}:{forwarded_port}"
+
+    return _normalize_origin(f"{scheme}://{host}")
+
+
+def is_allowed_auth_origin(request: Request) -> bool:
+    """Allow auth POSTs only from the same origin or explicit configured origins.
+
+    Login/register/initialize are exempt from the double-submit token because
+    first-time browser clients do not have a CSRF token yet. They still create
+    a session cookie, so browser requests with a hostile Origin header must be
+    rejected to prevent login CSRF / session fixation. Requests without Origin
+    are allowed for non-browser clients such as curl and mobile integrations.
+    """
+    origin = request.headers.get("origin")
+    if not origin:
+        return True
+
+    normalized_origin = _normalize_origin(origin)
+    if normalized_origin is None:
+        return False
+
+    request_origin = _request_origin(request)
+    return normalized_origin in _configured_cors_origins() or (request_origin is not None and normalized_origin == request_origin)
+
+
 class CSRFMiddleware(BaseHTTPMiddleware):
    """Middleware that implements CSRF protection using Double Submit Cookie pattern."""

@@ -70,6 +175,12 @@ class CSRFMiddleware(BaseHTTPMiddleware):
    async def dispatch(self, request: Request, call_next: Callable) -> Response:
        _is_auth = is_auth_endpoint(request)

+        if should_check_csrf(request) and _is_auth and not is_allowed_auth_origin(request):
+            return JSONResponse(
+                status_code=403,
+                content={"detail": "Cross-site auth request denied."},
+            )
+
        if should_check_csrf(request) and not _is_auth:
            cookie_token = request.cookies.get(CSRF_COOKIE_NAME)
            header_token = request.headers.get(CSRF_HEADER_NAME)
@@ -10,13 +10,16 @@ from __future__ import annotations

 from collections.abc import AsyncGenerator, Callable
 from contextlib import AsyncExitStack, asynccontextmanager
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, TypeVar, cast

 from fastapi import FastAPI, HTTPException, Request
 from langgraph.types import Checkpointer

 from deerflow.config.app_config import AppConfig
-from deerflow.runtime import RunContext, RunManager
+from deerflow.persistence.feedback import FeedbackRepository
+from deerflow.runtime import RunContext, RunManager, StreamBridge
+from deerflow.runtime.events.store.base import RunEventStore
+from deerflow.runtime.runs.store.base import RunStore

 if TYPE_CHECKING:
    from app.gateway.auth.local_provider import LocalAuthProvider
@@ -24,17 +27,15 @@ if TYPE_CHECKING:
    from deerflow.persistence.thread_meta.base import ThreadMetaStore


-def get_config(request: Request) -> AppConfig:
-    """FastAPI dependency returning the app-scoped ``AppConfig``.
+T = TypeVar("T")

-    Reads from ``request.app.state.config`` which is set at startup
-    (``app.py`` lifespan) and swapped on config reload (``routers/mcp.py``,
-    ``routers/skills.py``).
-    """
-    cfg = getattr(request.app.state, "config", None)
-    if cfg is None:
+
+def get_config(request: Request) -> AppConfig:
+    """Return the app-scoped ``AppConfig`` stored on ``app.state``."""
+    config = getattr(request.app.state, "config", None)
+    if config is None:
        raise HTTPException(status_code=503, detail="Configuration not available")
-    return cfg
+    return config


@asynccontextmanager
@@ -52,9 +53,9 @@ async def langgraph_runtime(app: FastAPI) -> AsyncGenerator[None, None]:
    from deerflow.runtime.events.store import make_run_event_store

    async with AsyncExitStack() as stack:
-        # app.state.config is populated earlier in lifespan(); thread it
-        # explicitly into every provider below.
-        config = app.state.config
+        config = getattr(app.state, "config", None)
+        if config is None:
+            raise RuntimeError("langgraph_runtime() requires app.state.config to be initialized")

        app.state.stream_bridge = await stack.enter_async_context(make_stream_bridge(config))

@@ -101,25 +102,25 @@ async def langgraph_runtime(app: FastAPI) -> AsyncGenerator[None, None]:
 # ---------------------------------------------------------------------------


-def _require(attr: str, label: str):
+def _require(attr: str, label: str) -> Callable[[Request], T]:
    """Create a FastAPI dependency that returns ``app.state.<attr>`` or 503."""

-    def dep(request: Request):
+    def dep(request: Request) -> T:
        val = getattr(request.app.state, attr, None)
        if val is None:
            raise HTTPException(status_code=503, detail=f"{label} not available")
-        return val
+        return cast(T, val)

    dep.__name__ = dep.__qualname__ = f"get_{attr}"
    return dep


-get_stream_bridge = _require("stream_bridge", "Stream bridge")
-get_run_manager = _require("run_manager", "Run manager")
-get_checkpointer = _require("checkpointer", "Checkpointer")
-get_run_event_store = _require("run_event_store", "Run event store")
-get_feedback_repo = _require("feedback_repo", "Feedback")
-get_run_store = _require("run_store", "Run store")
+get_stream_bridge: Callable[[Request], StreamBridge] = _require("stream_bridge", "Stream bridge")
+get_run_manager: Callable[[Request], RunManager] = _require("run_manager", "Run manager")
+get_checkpointer: Callable[[Request], Checkpointer] = _require("checkpointer", "Checkpointer")
+get_run_event_store: Callable[[Request], RunEventStore] = _require("run_event_store", "Run event store")
+get_feedback_repo: Callable[[Request], FeedbackRepository] = _require("feedback_repo", "Feedback")
+get_run_store: Callable[[Request], RunStore] = _require("run_store", "Run store")


 def get_store(request: Request):
@@ -138,10 +139,7 @@ def get_thread_store(request: Request) -> ThreadMetaStore:
 def get_run_context(request: Request) -> RunContext:
    """Build a :class:`RunContext` from ``app.state`` singletons.

-    Returns a *base* context with infrastructure dependencies.  Callers that
-    need per-run fields (e.g. ``follow_up_to_run_id``) should use
-    ``dataclasses.replace(ctx, follow_up_to_run_id=...)`` before passing it
-    to :func:`run_agent`.
+    Returns a *base* context with infrastructure dependencies.
    """
    config = get_config(request)
    return RunContext(
@@ -154,7 +152,6 @@ def get_run_context(request: Request) -> RunContext:
    )


-
 # ---------------------------------------------------------------------------
 # Auth helpers (used by authz.py and auth middleware)
 # ---------------------------------------------------------------------------
@@ -73,7 +73,7 @@ async def authenticate(request):
    if isinstance(payload, TokenError):
        raise Auth.exceptions.HTTPException(
            status_code=401,
-            detail=f"Token error: {payload.value}",
+            detail="Invalid token",
        )

    user = await get_local_provider().get_user(payload.sub)
@@ -5,13 +5,13 @@ import re
 import shutil

 import yaml
-from fastapi import APIRouter, Depends, HTTPException
+from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, Field

-from app.gateway.deps import get_config
+from deerflow.config.agents_api_config import get_agents_api_config
 from deerflow.config.agents_config import AgentConfig, list_custom_agents, load_agent_config, load_agent_soul
-from deerflow.config.app_config import AppConfig
 from deerflow.config.paths import get_paths
+from deerflow.runtime.user_context import get_effective_user_id

 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api", tags=["agents"])
@@ -78,20 +78,20 @@ def _normalize_agent_name(name: str) -> str:
    return name.lower()


-def _require_agents_api_enabled(app_config: AppConfig) -> None:
+def _require_agents_api_enabled() -> None:
    """Reject access unless the custom-agent management API is explicitly enabled."""
-    if not app_config.agents_api.enabled:
+    if not get_agents_api_config().enabled:
        raise HTTPException(
            status_code=403,
            detail=("Custom-agent management API is disabled. Set agents_api.enabled=true to expose agent and user-profile routes over HTTP."),
        )


-def _agent_config_to_response(agent_cfg: AgentConfig, include_soul: bool = False) -> AgentResponse:
+def _agent_config_to_response(agent_cfg: AgentConfig, include_soul: bool = False, *, user_id: str | None = None) -> AgentResponse:
    """Convert AgentConfig to AgentResponse."""
    soul: str | None = None
    if include_soul:
-        soul = load_agent_soul(agent_cfg.name) or ""
+        soul = load_agent_soul(agent_cfg.name, user_id=user_id) or ""

    return AgentResponse(
        name=agent_cfg.name,
@@ -109,17 +109,18 @@ def _agent_config_to_response(agent_cfg: AgentConfig, include_soul: bool = False
    summary="List Custom Agents",
    description="List all custom agents available in the agents directory, including their soul content.",
 )
-async def list_agents(app_config: AppConfig = Depends(get_config)) -> AgentsListResponse:
+async def list_agents() -> AgentsListResponse:
    """List all custom agents.

    Returns:
        List of all custom agents with their metadata and soul content.
    """
-    _require_agents_api_enabled(app_config)
+    _require_agents_api_enabled()

+    user_id = get_effective_user_id()
    try:
-        agents = list_custom_agents()
-        return AgentsListResponse(agents=[_agent_config_to_response(a, include_soul=True) for a in agents])
+        agents = list_custom_agents(user_id=user_id)
+        return AgentsListResponse(agents=[_agent_config_to_response(a, include_soul=True, user_id=user_id) for a in agents])
    except Exception as e:
        logger.error(f"Failed to list agents: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Failed to list agents: {str(e)}")
@@ -142,10 +143,15 @@ async def check_agent_name(name: str) -> dict:
    Raises:
        HTTPException: 422 if the name is invalid.
    """
-    _require_agents_api_enabled(app_config)
+    _require_agents_api_enabled()
    _validate_agent_name(name)
    normalized = _normalize_agent_name(name)
-    available = not get_paths().agent_dir(normalized).exists()
+    user_id = get_effective_user_id()
+    paths = get_paths()
+    # Treat the name as taken if either the per-user path or the legacy shared
+    # path holds an agent — picking a name that collides with an unmigrated
+    # legacy agent would shadow the legacy entry once migration runs.
+    available = not paths.user_agent_dir(user_id, normalized).exists() and not paths.agent_dir(normalized).exists()
    return {"available": available, "name": normalized}


@@ -155,7 +161,7 @@ async def check_agent_name(name: str) -> dict:
    summary="Get Custom Agent",
    description="Retrieve details and SOUL.md content for a specific custom agent.",
 )
-async def get_agent(name: str, app_config: AppConfig = Depends(get_config)) -> AgentResponse:
+async def get_agent(name: str) -> AgentResponse:
    """Get a specific custom agent by name.

    Args:
@@ -167,13 +173,14 @@ async def get_agent(name: str, app_config: AppConfig = Depends(get_config)) -> A
    Raises:
        HTTPException: 404 if agent not found.
    """
-    _require_agents_api_enabled(app_config)
+    _require_agents_api_enabled()
    _validate_agent_name(name)
    name = _normalize_agent_name(name)
+    user_id = get_effective_user_id()

    try:
-        agent_cfg = load_agent_config(name)
-        return _agent_config_to_response(agent_cfg, include_soul=True)
+        agent_cfg = load_agent_config(name, user_id=user_id)
+        return _agent_config_to_response(agent_cfg, include_soul=True, user_id=user_id)
    except FileNotFoundError:
        raise HTTPException(status_code=404, detail=f"Agent '{name}' not found")
    except Exception as e:
@@ -188,7 +195,7 @@ async def get_agent(name: str, app_config: AppConfig = Depends(get_config)) -> A
    summary="Create Custom Agent",
    description="Create a new custom agent with its config and SOUL.md.",
 )
-async def create_agent_endpoint(request: AgentCreateRequest, app_config: AppConfig = Depends(get_config)) -> AgentResponse:
+async def create_agent_endpoint(request: AgentCreateRequest) -> AgentResponse:
    """Create a new custom agent.

    Args:
@@ -200,13 +207,16 @@ async def create_agent_endpoint(request: AgentCreateRequest, app_config: AppConf
    Raises:
        HTTPException: 409 if agent already exists, 422 if name is invalid.
    """
-    _require_agents_api_enabled(app_config)
+    _require_agents_api_enabled()
    _validate_agent_name(request.name)
    normalized_name = _normalize_agent_name(request.name)
+    user_id = get_effective_user_id()
+    paths = get_paths()

-    agent_dir = get_paths().agent_dir(normalized_name)
+    agent_dir = paths.user_agent_dir(user_id, normalized_name)
+    legacy_dir = paths.agent_dir(normalized_name)

-    if agent_dir.exists():
+    if agent_dir.exists() or legacy_dir.exists():
        raise HTTPException(status_code=409, detail=f"Agent '{normalized_name}' already exists")

    try:
@@ -233,8 +243,8 @@ async def create_agent_endpoint(request: AgentCreateRequest, app_config: AppConf

        logger.info(f"Created agent '{normalized_name}' at {agent_dir}")

-        agent_cfg = load_agent_config(normalized_name)
-        return _agent_config_to_response(agent_cfg, include_soul=True)
+        agent_cfg = load_agent_config(normalized_name, user_id=user_id)
+        return _agent_config_to_response(agent_cfg, include_soul=True, user_id=user_id)

    except HTTPException:
        raise
@@ -252,7 +262,7 @@ async def create_agent_endpoint(request: AgentCreateRequest, app_config: AppConf
    summary="Update Custom Agent",
    description="Update an existing custom agent's config and/or SOUL.md.",
 )
-async def update_agent(name: str, request: AgentUpdateRequest, app_config: AppConfig = Depends(get_config)) -> AgentResponse:
+async def update_agent(name: str, request: AgentUpdateRequest) -> AgentResponse:
    """Update an existing custom agent.

    Args:
@@ -265,16 +275,23 @@ async def update_agent(name: str, request: AgentUpdateRequest, app_config: AppCo
    Raises:
        HTTPException: 404 if agent not found.
    """
-    _require_agents_api_enabled(app_config)
+    _require_agents_api_enabled()
    _validate_agent_name(name)
    name = _normalize_agent_name(name)
+    user_id = get_effective_user_id()

    try:
-        agent_cfg = load_agent_config(name)
+        agent_cfg = load_agent_config(name, user_id=user_id)
    except FileNotFoundError:
        raise HTTPException(status_code=404, detail=f"Agent '{name}' not found")

-    agent_dir = get_paths().agent_dir(name)
+    paths = get_paths()
+    agent_dir = paths.user_agent_dir(user_id, name)
+    if not agent_dir.exists() and paths.agent_dir(name).exists():
+        raise HTTPException(
+            status_code=409,
+            detail=(f"Agent '{name}' only exists in the legacy shared layout and is not scoped to a user. Run scripts/migrate_user_isolation.py to move legacy agents into the per-user layout before updating."),
+        )

    try:
        # Update config if any config fields changed
@@ -315,8 +332,8 @@ async def update_agent(name: str, request: AgentUpdateRequest, app_config: AppCo

        logger.info(f"Updated agent '{name}'")

-        refreshed_cfg = load_agent_config(name)
-        return _agent_config_to_response(refreshed_cfg, include_soul=True)
+        refreshed_cfg = load_agent_config(name, user_id=user_id)
+        return _agent_config_to_response(refreshed_cfg, include_soul=True, user_id=user_id)

    except HTTPException:
        raise
@@ -343,13 +360,13 @@ class UserProfileUpdateRequest(BaseModel):
    summary="Get User Profile",
    description="Read the global USER.md file that is injected into all custom agents.",
 )
-async def get_user_profile(app_config: AppConfig = Depends(get_config)) -> UserProfileResponse:
+async def get_user_profile() -> UserProfileResponse:
    """Return the current USER.md content.

    Returns:
        UserProfileResponse with content=None if USER.md does not exist yet.
    """
-    _require_agents_api_enabled(app_config)
+    _require_agents_api_enabled()

    try:
        user_md_path = get_paths().user_md_file
@@ -368,7 +385,7 @@ async def get_user_profile(app_config: AppConfig = Depends(get_config)) -> UserP
    summary="Update User Profile",
    description="Write the global USER.md file that is injected into all custom agents.",
 )
-async def update_user_profile(request: UserProfileUpdateRequest, app_config: AppConfig = Depends(get_config)) -> UserProfileResponse:
+async def update_user_profile(request: UserProfileUpdateRequest) -> UserProfileResponse:
    """Create or overwrite the global USER.md.

    Args:
@@ -377,7 +394,7 @@ async def update_user_profile(request: UserProfileUpdateRequest, app_config: App
    Returns:
        UserProfileResponse with the saved content.
    """
-    _require_agents_api_enabled(app_config)
+    _require_agents_api_enabled()

    try:
        paths = get_paths()
@@ -396,22 +413,29 @@ async def update_user_profile(request: UserProfileUpdateRequest, app_config: App
    summary="Delete Custom Agent",
    description="Delete a custom agent and all its files (config, SOUL.md, memory).",
 )
-async def delete_agent(name: str, app_config: AppConfig = Depends(get_config)) -> None:
+async def delete_agent(name: str) -> None:
    """Delete a custom agent.

    Args:
        name: The agent name.

    Raises:
-        HTTPException: 404 if agent not found.
+        HTTPException: 404 if no per-user copy exists; 409 if only a legacy
+            shared copy exists (suggesting the migration script).
    """
-    _require_agents_api_enabled(app_config)
+    _require_agents_api_enabled()
    _validate_agent_name(name)
    name = _normalize_agent_name(name)
-
-    agent_dir = get_paths().agent_dir(name)
+    user_id = get_effective_user_id()
+    paths = get_paths()
+    agent_dir = paths.user_agent_dir(user_id, name)

    if not agent_dir.exists():
+        if paths.agent_dir(name).exists():
+            raise HTTPException(
+                status_code=409,
+                detail=(f"Agent '{name}' only exists in the legacy shared layout and is not scoped to a user. Run scripts/migrate_user_isolation.py to move legacy agents into the per-user layout before deleting."),
+            )
        raise HTTPException(status_code=404, detail=f"Agent '{name}' not found")

    try:
@@ -146,7 +146,13 @@ def _set_session_cookie(response: Response, token: str, request: Request) -> Non


 # ── Rate Limiting ────────────────────────────────────────────────────────
-# In-process dict — not shared across workers. Sufficient for single-worker deployments.
+# In-process dict — not shared across workers.
+#
+# **Limitation**: with multi-worker deployments (e.g., gunicorn -w N), each
+# worker maintains its own lockout table, so an attacker effectively gets
+# N × _MAX_LOGIN_ATTEMPTS guesses before being locked out everywhere. For
+# production multi-worker setups, replace this with a shared store (Redis,
+# database-backed counter) to enforce a true per-IP limit.

 _MAX_LOGIN_ATTEMPTS = 5
 _LOCKOUT_SECONDS = 300  # 5 minutes
@@ -376,9 +382,37 @@ async def get_me(request: Request):
    return UserResponse(id=str(user.id), email=user.email, system_role=user.system_role, needs_setup=user.needs_setup)


+_SETUP_STATUS_COOLDOWN: dict[str, float] = {}
+_SETUP_STATUS_COOLDOWN_SECONDS = 60
+_MAX_TRACKED_SETUP_STATUS_IPS = 10000
+
+
@router.get("/setup-status")
-async def setup_status():
+async def setup_status(request: Request):
    """Check if an admin account exists. Returns needs_setup=True when no admin exists."""
+    client_ip = _get_client_ip(request)
+    now = time.time()
+    last_check = _SETUP_STATUS_COOLDOWN.get(client_ip, 0)
+    elapsed = now - last_check
+    if elapsed < _SETUP_STATUS_COOLDOWN_SECONDS:
+        retry_after = max(1, int(_SETUP_STATUS_COOLDOWN_SECONDS - elapsed))
+        raise HTTPException(
+            status_code=status.HTTP_429_TOO_MANY_REQUESTS,
+            detail="Setup status check is rate limited",
+            headers={"Retry-After": str(retry_after)},
+        )
+    # Evict stale entries when dict grows too large to bound memory usage.
+    if len(_SETUP_STATUS_COOLDOWN) >= _MAX_TRACKED_SETUP_STATUS_IPS:
+        cutoff = now - _SETUP_STATUS_COOLDOWN_SECONDS
+        stale = [k for k, t in _SETUP_STATUS_COOLDOWN.items() if t < cutoff]
+        for k in stale:
+            del _SETUP_STATUS_COOLDOWN[k]
+        # If still too large after evicting expired entries, remove oldest half.
+        if len(_SETUP_STATUS_COOLDOWN) >= _MAX_TRACKED_SETUP_STATUS_IPS:
+            by_time = sorted(_SETUP_STATUS_COOLDOWN.items(), key=lambda kv: kv[1])
+            for k, _ in by_time[: len(by_time) // 2]:
+                del _SETUP_STATUS_COOLDOWN[k]
+    _SETUP_STATUS_COOLDOWN[client_ip] = now
    admin_count = await get_local_provider().count_admin_users()
    return {"needs_setup": admin_count == 0}

@@ -3,12 +3,10 @@ import logging
 from pathlib import Path
 from typing import Literal

-from fastapi import APIRouter, Depends, HTTPException, Request
+from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, Field

-from app.gateway.deps import get_config
-from deerflow.config.app_config import AppConfig
-from deerflow.config.extensions_config import ExtensionsConfig
+from deerflow.config.extensions_config import ExtensionsConfig, get_extensions_config, reload_extensions_config

 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api", tags=["mcp"])
@@ -71,7 +69,7 @@ class McpConfigUpdateRequest(BaseModel):
    summary="Get MCP Configuration",
    description="Retrieve the current Model Context Protocol (MCP) server configurations.",
 )
-async def get_mcp_configuration(config: AppConfig = Depends(get_config)) -> McpConfigResponse:
+async def get_mcp_configuration() -> McpConfigResponse:
    """Get the current MCP configuration.

    Returns:
@@ -92,9 +90,9 @@ async def get_mcp_configuration(config: AppConfig = Depends(get_config)) -> McpC
        }
        ```
    """
-    ext = config.extensions
+    config = get_extensions_config()

-    return McpConfigResponse(mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in ext.mcp_servers.items()})
+    return McpConfigResponse(mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in config.mcp_servers.items()})


@router.put(
@@ -103,11 +101,7 @@ async def get_mcp_configuration(config: AppConfig = Depends(get_config)) -> McpC
    summary="Update MCP Configuration",
    description="Update Model Context Protocol (MCP) server configurations and save to file.",
 )
-async def update_mcp_configuration(
-    request: McpConfigUpdateRequest,
-    http_request: Request,
-    config: AppConfig = Depends(get_config),
-) -> McpConfigResponse:
+async def update_mcp_configuration(request: McpConfigUpdateRequest) -> McpConfigResponse:
    """Update the MCP configuration.

    This will:
@@ -148,13 +142,13 @@ async def update_mcp_configuration(
            config_path = Path.cwd().parent / "extensions_config.json"
            logger.info(f"No existing extensions config found. Creating new config at: {config_path}")

-        # Use injected config to preserve skills configuration
-        current_ext = config.extensions
+        # Load current config to preserve skills configuration
+        current_config = get_extensions_config()

        # Convert request to dict format for JSON serialization
        config_data = {
            "mcpServers": {name: server.model_dump() for name, server in request.mcp_servers.items()},
-            "skills": {name: {"enabled": skill.enabled} for name, skill in current_ext.skills.items()},
+            "skills": {name: {"enabled": skill.enabled} for name, skill in current_config.skills.items()},
        }

        # Write the configuration to file
@@ -166,11 +160,9 @@ async def update_mcp_configuration(
        # NOTE: No need to reload/reset cache here - LangGraph Server (separate process)
        # will detect config file changes via mtime and reinitialize MCP tools automatically

-        # Reload the configuration and swap ``app.state.config`` so subsequent
-        # ``Depends(get_config)`` calls see the refreshed value.
-        reloaded = AppConfig.from_file()
-        http_request.app.state.config = reloaded
-        return McpConfigResponse(mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in reloaded.extensions.mcp_servers.items()})
+        # Reload the configuration and update the global cache
+        reloaded_config = reload_extensions_config()
+        return McpConfigResponse(mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in reloaded_config.mcp_servers.items()})

    except Exception as e:
        logger.error(f"Failed to update MCP configuration: {e}", exc_info=True)
@@ -1,9 +1,8 @@
 """Memory API router for retrieving and managing global memory data."""

-from fastapi import APIRouter, Depends, HTTPException
+from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel, Field

-from app.gateway.deps import get_config
 from deerflow.agents.memory.updater import (
    clear_memory_data,
    create_memory_fact,
@@ -13,7 +12,7 @@ from deerflow.agents.memory.updater import (
    reload_memory_data,
    update_memory_fact,
 )
-from deerflow.config.app_config import AppConfig
+from deerflow.config.memory_config import get_memory_config
 from deerflow.runtime.user_context import get_effective_user_id

 router = APIRouter(prefix="/api", tags=["memory"])
@@ -115,7 +114,7 @@ class MemoryStatusResponse(BaseModel):
    summary="Get Memory Data",
    description="Retrieve the current global memory data including user context, history, and facts.",
 )
-async def get_memory(app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
+async def get_memory() -> MemoryResponse:
    """Get the current global memory data.

    Returns:
@@ -149,7 +148,7 @@ async def get_memory(app_config: AppConfig = Depends(get_config)) -> MemoryRespo
        }
        ```
    """
-    memory_data = get_memory_data(app_config.memory, user_id=get_effective_user_id())
+    memory_data = get_memory_data(user_id=get_effective_user_id())
    return MemoryResponse(**memory_data)


@@ -160,7 +159,7 @@ async def get_memory(app_config: AppConfig = Depends(get_config)) -> MemoryRespo
    summary="Reload Memory Data",
    description="Reload memory data from the storage file, refreshing the in-memory cache.",
 )
-async def reload_memory(app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
+async def reload_memory() -> MemoryResponse:
    """Reload memory data from file.

    This forces a reload of the memory data from the storage file,
@@ -169,7 +168,7 @@ async def reload_memory(app_config: AppConfig = Depends(get_config)) -> MemoryRe
    Returns:
        The reloaded memory data.
    """
-    memory_data = reload_memory_data(app_config.memory, user_id=get_effective_user_id())
+    memory_data = reload_memory_data(user_id=get_effective_user_id())
    return MemoryResponse(**memory_data)


@@ -180,10 +179,10 @@ async def reload_memory(app_config: AppConfig = Depends(get_config)) -> MemoryRe
    summary="Clear All Memory Data",
    description="Delete all saved memory data and reset the memory structure to an empty state.",
 )
-async def clear_memory(app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
+async def clear_memory() -> MemoryResponse:
    """Clear all persisted memory data."""
    try:
-        memory_data = clear_memory_data(app_config.memory, user_id=get_effective_user_id())
+        memory_data = clear_memory_data(user_id=get_effective_user_id())
    except OSError as exc:
        raise HTTPException(status_code=500, detail="Failed to clear memory data.") from exc

@@ -197,11 +196,10 @@ async def clear_memory(app_config: AppConfig = Depends(get_config)) -> MemoryRes
    summary="Create Memory Fact",
    description="Create a single saved memory fact manually.",
 )
-async def create_memory_fact_endpoint(request: FactCreateRequest, app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
+async def create_memory_fact_endpoint(request: FactCreateRequest) -> MemoryResponse:
    """Create a single fact manually."""
    try:
        memory_data = create_memory_fact(
-            app_config.memory,
            content=request.content,
            category=request.category,
            confidence=request.confidence,
@@ -222,10 +220,10 @@ async def create_memory_fact_endpoint(request: FactCreateRequest, app_config: Ap
    summary="Delete Memory Fact",
    description="Delete a single saved memory fact by its fact id.",
 )
-async def delete_memory_fact_endpoint(fact_id: str, app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
+async def delete_memory_fact_endpoint(fact_id: str) -> MemoryResponse:
    """Delete a single fact from memory by fact id."""
    try:
-        memory_data = delete_memory_fact(app_config.memory, fact_id, user_id=get_effective_user_id())
+        memory_data = delete_memory_fact(fact_id, user_id=get_effective_user_id())
    except KeyError as exc:
        raise HTTPException(status_code=404, detail=f"Memory fact '{fact_id}' not found.") from exc
    except OSError as exc:
@@ -241,11 +239,10 @@ async def delete_memory_fact_endpoint(fact_id: str, app_config: AppConfig = Depe
    summary="Patch Memory Fact",
    description="Partially update a single saved memory fact by its fact id while preserving omitted fields.",
 )
-async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest, app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
+async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest) -> MemoryResponse:
    """Partially update a single fact manually."""
    try:
        memory_data = update_memory_fact(
-            app_config.memory,
            fact_id=fact_id,
            content=request.content,
            category=request.category,
@@ -269,9 +266,9 @@ async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest, a
    summary="Export Memory Data",
    description="Export the current global memory data as JSON for backup or transfer.",
 )
-async def export_memory(app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
+async def export_memory() -> MemoryResponse:
    """Export the current memory data."""
-    memory_data = get_memory_data(app_config.memory, user_id=get_effective_user_id())
+    memory_data = get_memory_data(user_id=get_effective_user_id())
    return MemoryResponse(**memory_data)


@@ -282,10 +279,10 @@ async def export_memory(app_config: AppConfig = Depends(get_config)) -> MemoryRe
    summary="Import Memory Data",
    description="Import and overwrite the current global memory data from a JSON payload.",
 )
-async def import_memory(request: MemoryResponse, app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
+async def import_memory(request: MemoryResponse) -> MemoryResponse:
    """Import and persist memory data."""
    try:
-        memory_data = import_memory_data(app_config.memory, request.model_dump(), user_id=get_effective_user_id())
+        memory_data = import_memory_data(request.model_dump(), user_id=get_effective_user_id())
    except OSError as exc:
        raise HTTPException(status_code=500, detail="Failed to import memory data.") from exc

@@ -298,9 +295,7 @@ async def import_memory(request: MemoryResponse, app_config: AppConfig = Depends
    summary="Get Memory Configuration",
    description="Retrieve the current memory system configuration.",
 )
-async def get_memory_config_endpoint(
-    app_config: AppConfig = Depends(get_config),
-) -> MemoryConfigResponse:
+async def get_memory_config_endpoint() -> MemoryConfigResponse:
    """Get the memory system configuration.

    Returns:
@@ -319,7 +314,7 @@ async def get_memory_config_endpoint(
        }
        ```
    """
-    config = app_config.memory
+    config = get_memory_config()
    return MemoryConfigResponse(
        enabled=config.enabled,
        storage_path=config.storage_path,
@@ -338,16 +333,14 @@ async def get_memory_config_endpoint(
    summary="Get Memory Status",
    description="Retrieve both memory configuration and current data in a single request.",
 )
-async def get_memory_status(
-    app_config: AppConfig = Depends(get_config),
-) -> MemoryStatusResponse:
+async def get_memory_status() -> MemoryStatusResponse:
    """Get the memory system status including configuration and data.

    Returns:
        Combined memory configuration and current data.
    """
-    config = app_config.memory
-    memory_data = get_memory_data(config, user_id=get_effective_user_id())
+    config = get_memory_config()
+    memory_data = get_memory_data(user_id=get_effective_user_id())

    return MemoryStatusResponse(
        config=MemoryConfigResponse(
@@ -123,7 +123,8 @@ async def run_messages(
    run = await _resolve_run(run_id, request)
    event_store = get_run_event_store(request)
    rows = await event_store.list_messages_by_run(
-        run["thread_id"], run_id,
+        run["thread_id"],
+        run_id,
        limit=limit + 1,
        before_seq=before_seq,
        after_seq=after_seq,
@@ -1,32 +1,20 @@
-import errno
 import json
 import logging
-import shutil
 from pathlib import Path

-from fastapi import APIRouter, Depends, HTTPException, Request
+from fastapi import APIRouter, Depends, HTTPException
 from pydantic import BaseModel, Field

 from app.gateway.deps import get_config
 from app.gateway.path_utils import resolve_thread_virtual_path
 from deerflow.agents.lead_agent.prompt import refresh_skills_system_prompt_cache_async
 from deerflow.config.app_config import AppConfig
-from deerflow.config.extensions_config import ExtensionsConfig
-from deerflow.skills import Skill, load_skills
-from deerflow.skills.installer import SkillAlreadyExistsError, install_skill_from_archive
-from deerflow.skills.manager import (
-    append_history,
-    atomic_write,
-    custom_skill_exists,
-    ensure_custom_skill_is_editable,
-    get_custom_skill_dir,
-    get_custom_skill_file,
-    get_skill_history_file,
-    read_custom_skill_content,
-    read_history,
-    validate_skill_markdown_content,
-)
+from deerflow.config.extensions_config import ExtensionsConfig, SkillStateConfig, get_extensions_config, reload_extensions_config
+from deerflow.skills import Skill
+from deerflow.skills.installer import SkillAlreadyExistsError
 from deerflow.skills.security_scanner import scan_skill_content
+from deerflow.skills.storage import get_or_new_skill_storage
+from deerflow.skills.types import SKILL_MD_FILE, SkillCategory

 logger = logging.getLogger(__name__)

@@ -39,7 +27,7 @@ class SkillResponse(BaseModel):
    name: str = Field(..., description="Name of the skill")
    description: str = Field(..., description="Description of what the skill does")
    license: str | None = Field(None, description="License information")
-    category: str = Field(..., description="Category of the skill (public or custom)")
+    category: SkillCategory = Field(..., description="Category of the skill (public or custom)")
    enabled: bool = Field(default=True, description="Whether this skill is enabled")


@@ -103,9 +91,9 @@ def _skill_to_response(skill: Skill) -> SkillResponse:
    summary="List All Skills",
    description="Retrieve a list of all available skills from both public and custom directories.",
 )
-async def list_skills(app_config: AppConfig = Depends(get_config)) -> SkillsListResponse:
+async def list_skills(config: AppConfig = Depends(get_config)) -> SkillsListResponse:
    try:
-        skills = load_skills(app_config, enabled_only=False)
+        skills = get_or_new_skill_storage(app_config=config).load_skills(enabled_only=False)
        return SkillsListResponse(skills=[_skill_to_response(skill) for skill in skills])
    except Exception as e:
        logger.error(f"Failed to load skills: {e}", exc_info=True)
@@ -118,11 +106,11 @@ async def list_skills(app_config: AppConfig = Depends(get_config)) -> SkillsList
    summary="Install Skill",
    description="Install a skill from a .skill file (ZIP archive) located in the thread's user-data directory.",
 )
-async def install_skill(request: SkillInstallRequest, app_config: AppConfig = Depends(get_config)) -> SkillInstallResponse:
+async def install_skill(request: SkillInstallRequest, config: AppConfig = Depends(get_config)) -> SkillInstallResponse:
    try:
        skill_file_path = resolve_thread_virtual_path(request.thread_id, request.path)
-        result = install_skill_from_archive(skill_file_path)
-        await refresh_skills_system_prompt_cache_async(app_config)
+        result = await get_or_new_skill_storage(app_config=config).ainstall_skill_from_archive(skill_file_path)
+        await refresh_skills_system_prompt_cache_async()
        return SkillInstallResponse(**result)
    except FileNotFoundError as e:
        raise HTTPException(status_code=404, detail=str(e))
@@ -138,9 +126,9 @@ async def install_skill(request: SkillInstallRequest, app_config: AppConfig = De


@router.get("/skills/custom", response_model=SkillsListResponse, summary="List Custom Skills")
-async def list_custom_skills(app_config: AppConfig = Depends(get_config)) -> SkillsListResponse:
+async def list_custom_skills(config: AppConfig = Depends(get_config)) -> SkillsListResponse:
    try:
-        skills = [skill for skill in load_skills(app_config, enabled_only=False) if skill.category == "custom"]
+        skills = [skill for skill in get_or_new_skill_storage(app_config=config).load_skills(enabled_only=False) if skill.category == SkillCategory.CUSTOM]
        return SkillsListResponse(skills=[_skill_to_response(skill) for skill in skills])
    except Exception as e:
        logger.error("Failed to list custom skills: %s", e, exc_info=True)
@@ -148,13 +136,14 @@ async def list_custom_skills(app_config: AppConfig = Depends(get_config)) -> Ski


@router.get("/skills/custom/{skill_name}", response_model=CustomSkillContentResponse, summary="Get Custom Skill Content")
-async def get_custom_skill(skill_name: str, app_config: AppConfig = Depends(get_config)) -> CustomSkillContentResponse:
+async def get_custom_skill(skill_name: str, config: AppConfig = Depends(get_config)) -> CustomSkillContentResponse:
    try:
-        skills = load_skills(app_config, enabled_only=False)
-        skill = next((s for s in skills if s.name == skill_name and s.category == "custom"), None)
+        skill_name = skill_name.replace("\r\n", "").replace("\n", "")
+        skills = get_or_new_skill_storage(app_config=config).load_skills(enabled_only=False)
+        skill = next((s for s in skills if s.name == skill_name and s.category == SkillCategory.CUSTOM), None)
        if skill is None:
            raise HTTPException(status_code=404, detail=f"Custom skill '{skill_name}' not found")
-        return CustomSkillContentResponse(**_skill_to_response(skill).model_dump(), content=read_custom_skill_content(skill_name, app_config))
+        return CustomSkillContentResponse(**_skill_to_response(skill).model_dump(), content=get_or_new_skill_storage(app_config=config).read_custom_skill(skill_name))
    except HTTPException:
        raise
    except Exception as e:
@@ -163,35 +152,31 @@ async def get_custom_skill(skill_name: str, app_config: AppConfig = Depends(get_


@router.put("/skills/custom/{skill_name}", response_model=CustomSkillContentResponse, summary="Edit Custom Skill")
-async def update_custom_skill(
-    skill_name: str,
-    request: CustomSkillUpdateRequest,
-    app_config: AppConfig = Depends(get_config),
-) -> CustomSkillContentResponse:
+async def update_custom_skill(skill_name: str, request: CustomSkillUpdateRequest, config: AppConfig = Depends(get_config)) -> CustomSkillContentResponse:
    try:
-        ensure_custom_skill_is_editable(skill_name, app_config)
-        validate_skill_markdown_content(skill_name, request.content)
-        scan = await scan_skill_content(app_config, request.content, executable=False, location=f"{skill_name}/SKILL.md")
+        skill_name = skill_name.replace("\r\n", "").replace("\n", "")
+        storage = get_or_new_skill_storage(app_config=config)
+        storage.ensure_custom_skill_is_editable(skill_name)
+        storage.validate_skill_markdown_content(skill_name, request.content)
+        scan = await scan_skill_content(request.content, executable=False, location=f"{skill_name}/{SKILL_MD_FILE}", app_config=config)
        if scan.decision == "block":
            raise HTTPException(status_code=400, detail=f"Security scan blocked the edit: {scan.reason}")
-        skill_file = get_custom_skill_dir(skill_name, app_config) / "SKILL.md"
-        prev_content = skill_file.read_text(encoding="utf-8")
-        atomic_write(skill_file, request.content)
-        append_history(
+        prev_content = storage.read_custom_skill(skill_name)
+        storage.write_custom_skill(skill_name, SKILL_MD_FILE, request.content)
+        storage.append_history(
            skill_name,
            {
                "action": "human_edit",
                "author": "human",
                "thread_id": None,
-                "file_path": "SKILL.md",
+                "file_path": SKILL_MD_FILE,
                "prev_content": prev_content,
                "new_content": request.content,
                "scanner": {"decision": scan.decision, "reason": scan.reason},
            },
-            app_config,
        )
-        await refresh_skills_system_prompt_cache_async(app_config)
-        return await get_custom_skill(skill_name, app_config)
+        await refresh_skills_system_prompt_cache_async()
+        return await get_custom_skill(skill_name, config)
    except HTTPException:
        raise
    except FileNotFoundError as e:
@@ -204,31 +189,23 @@ async def update_custom_skill(


@router.delete("/skills/custom/{skill_name}", summary="Delete Custom Skill")
-async def delete_custom_skill(skill_name: str, app_config: AppConfig = Depends(get_config)) -> dict[str, bool]:
+async def delete_custom_skill(skill_name: str, config: AppConfig = Depends(get_config)) -> dict[str, bool]:
    try:
-        ensure_custom_skill_is_editable(skill_name, app_config)
-        skill_dir = get_custom_skill_dir(skill_name, app_config)
-        prev_content = read_custom_skill_content(skill_name, app_config)
-        try:
-            append_history(
-                skill_name,
-                {
-                    "action": "human_delete",
-                    "author": "human",
-                    "thread_id": None,
-                    "file_path": "SKILL.md",
-                    "prev_content": prev_content,
-                    "new_content": None,
-                    "scanner": {"decision": "allow", "reason": "Deletion requested."},
-                },
-                app_config,
-            )
-        except OSError as e:
-            if not isinstance(e, PermissionError) and e.errno not in {errno.EACCES, errno.EPERM, errno.EROFS}:
-                raise
-            logger.warning("Skipping delete history write for custom skill %s due to readonly/permission failure; continuing with skill directory removal: %s", skill_name, e)
-        shutil.rmtree(skill_dir)
-        await refresh_skills_system_prompt_cache_async(app_config)
+        skill_name = skill_name.replace("\r\n", "").replace("\n", "")
+        storage = get_or_new_skill_storage(app_config=config)
+        storage.delete_custom_skill(
+            skill_name,
+            history_meta={
+                "action": "human_delete",
+                "author": "human",
+                "thread_id": None,
+                "file_path": SKILL_MD_FILE,
+                "prev_content": None,
+                "new_content": None,
+                "scanner": {"decision": "allow", "reason": "Deletion requested."},
+            },
+        )
+        await refresh_skills_system_prompt_cache_async()
        return {"success": True}
    except FileNotFoundError as e:
        raise HTTPException(status_code=404, detail=str(e))
@@ -240,11 +217,13 @@ async def delete_custom_skill(skill_name: str, app_config: AppConfig = Depends(g


@router.get("/skills/custom/{skill_name}/history", response_model=CustomSkillHistoryResponse, summary="Get Custom Skill History")
-async def get_custom_skill_history(skill_name: str, app_config: AppConfig = Depends(get_config)) -> CustomSkillHistoryResponse:
+async def get_custom_skill_history(skill_name: str, config: AppConfig = Depends(get_config)) -> CustomSkillHistoryResponse:
    try:
-        if not custom_skill_exists(skill_name, app_config) and not get_skill_history_file(skill_name, app_config).exists():
+        skill_name = skill_name.replace("\r\n", "").replace("\n", "")
+        storage = get_or_new_skill_storage(app_config=config)
+        if not storage.custom_skill_exists(skill_name) and not storage.get_skill_history_file(skill_name).exists():
            raise HTTPException(status_code=404, detail=f"Custom skill '{skill_name}' not found")
-        return CustomSkillHistoryResponse(history=read_history(skill_name, app_config))
+        return CustomSkillHistoryResponse(history=storage.read_history(skill_name))
    except HTTPException:
        raise
    except Exception as e:
@@ -253,42 +232,39 @@ async def get_custom_skill_history(skill_name: str, app_config: AppConfig = Depe


@router.post("/skills/custom/{skill_name}/rollback", response_model=CustomSkillContentResponse, summary="Rollback Custom Skill")
-async def rollback_custom_skill(
-    skill_name: str,
-    request: SkillRollbackRequest,
-    app_config: AppConfig = Depends(get_config),
-) -> CustomSkillContentResponse:
+async def rollback_custom_skill(skill_name: str, request: SkillRollbackRequest, config: AppConfig = Depends(get_config)) -> CustomSkillContentResponse:
    try:
-        if not custom_skill_exists(skill_name, app_config) and not get_skill_history_file(skill_name, app_config).exists():
+        storage = get_or_new_skill_storage(app_config=config)
+        if not storage.custom_skill_exists(skill_name) and not storage.get_skill_history_file(skill_name).exists():
            raise HTTPException(status_code=404, detail=f"Custom skill '{skill_name}' not found")
-        history = read_history(skill_name, app_config)
+        history = storage.read_history(skill_name)
        if not history:
            raise HTTPException(status_code=400, detail=f"Custom skill '{skill_name}' has no history")
        record = history[request.history_index]
        target_content = record.get("prev_content")
        if target_content is None:
            raise HTTPException(status_code=400, detail="Selected history entry has no previous content to roll back to")
-        validate_skill_markdown_content(skill_name, target_content)
-        scan = await scan_skill_content(app_config, target_content, executable=False, location=f"{skill_name}/SKILL.md")
-        skill_file = get_custom_skill_file(skill_name, app_config)
+        storage.validate_skill_markdown_content(skill_name, target_content)
+        scan = await scan_skill_content(target_content, executable=False, location=f"{skill_name}/{SKILL_MD_FILE}", app_config=config)
+        skill_file = storage.get_custom_skill_file(skill_name)
        current_content = skill_file.read_text(encoding="utf-8") if skill_file.exists() else None
        history_entry = {
            "action": "rollback",
            "author": "human",
            "thread_id": None,
-            "file_path": "SKILL.md",
+            "file_path": SKILL_MD_FILE,
            "prev_content": current_content,
            "new_content": target_content,
            "rollback_from_ts": record.get("ts"),
            "scanner": {"decision": scan.decision, "reason": scan.reason},
        }
        if scan.decision == "block":
-            append_history(skill_name, history_entry, app_config)
+            storage.append_history(skill_name, history_entry)
            raise HTTPException(status_code=400, detail=f"Rollback blocked by security scanner: {scan.reason}")
-        atomic_write(skill_file, target_content)
-        append_history(skill_name, history_entry, app_config)
-        await refresh_skills_system_prompt_cache_async(app_config)
-        return await get_custom_skill(skill_name, app_config)
+        storage.write_custom_skill(skill_name, SKILL_MD_FILE, target_content)
+        storage.append_history(skill_name, history_entry)
+        await refresh_skills_system_prompt_cache_async()
+        return await get_custom_skill(skill_name, config)
    except HTTPException:
        raise
    except IndexError:
@@ -308,9 +284,10 @@ async def rollback_custom_skill(
    summary="Get Skill Details",
    description="Retrieve detailed information about a specific skill by its name.",
 )
-async def get_skill(skill_name: str, app_config: AppConfig = Depends(get_config)) -> SkillResponse:
+async def get_skill(skill_name: str, config: AppConfig = Depends(get_config)) -> SkillResponse:
    try:
-        skills = load_skills(app_config, enabled_only=False)
+        skill_name = skill_name.replace("\r\n", "").replace("\n", "")
+        skills = get_or_new_skill_storage(app_config=config).load_skills(enabled_only=False)
        skill = next((s for s in skills if s.name == skill_name), None)

        if skill is None:
@@ -330,14 +307,10 @@ async def get_skill(skill_name: str, app_config: AppConfig = Depends(get_config)
    summary="Update Skill",
    description="Update a skill's enabled status by modifying the extensions_config.json file.",
 )
-async def update_skill(
-    skill_name: str,
-    request: SkillUpdateRequest,
-    http_request: Request,
-    app_config: AppConfig = Depends(get_config),
-) -> SkillResponse:
+async def update_skill(skill_name: str, request: SkillUpdateRequest, config: AppConfig = Depends(get_config)) -> SkillResponse:
    try:
-        skills = load_skills(app_config, enabled_only=False)
+        skill_name = skill_name.replace("\r\n", "").replace("\n", "")
+        skills = get_or_new_skill_storage(app_config=config).load_skills(enabled_only=False)
        skill = next((s for s in skills if s.name == skill_name), None)

        if skill is None:
@@ -348,29 +321,22 @@ async def update_skill(
            config_path = Path.cwd().parent / "extensions_config.json"
            logger.info(f"No existing extensions config found. Creating new config at: {config_path}")

-        # Do not mutate the frozen AppConfig in place. Compose the new skills
-        # state in a fresh dict, write to disk, and reload AppConfig below so
-        # every subsequent Depends(get_config) sees the refreshed snapshot.
-        ext = app_config.extensions
-        updated_skills = {name: {"enabled": skill_config.enabled} for name, skill_config in ext.skills.items()}
-        updated_skills[skill_name] = {"enabled": request.enabled}
+        extensions_config = get_extensions_config()
+        extensions_config.skills[skill_name] = SkillStateConfig(enabled=request.enabled)

        config_data = {
-            "mcpServers": {name: server.model_dump() for name, server in ext.mcp_servers.items()},
-            "skills": updated_skills,
+            "mcpServers": {name: server.model_dump() for name, server in extensions_config.mcp_servers.items()},
+            "skills": {name: {"enabled": skill_config.enabled} for name, skill_config in extensions_config.skills.items()},
        }

        with open(config_path, "w", encoding="utf-8") as f:
            json.dump(config_data, f, indent=2)

        logger.info(f"Skills configuration updated and saved to: {config_path}")
-        # Reload AppConfig and swap ``app.state.config`` so subsequent
-        # ``Depends(get_config)`` sees the refreshed value.
-        reloaded = AppConfig.from_file()
-        http_request.app.state.config = reloaded
-        await refresh_skills_system_prompt_cache_async(reloaded)
+        reload_extensions_config()
+        await refresh_skills_system_prompt_cache_async()

-        skills = load_skills(reloaded, enabled_only=False)
+        skills = get_or_new_skill_storage(app_config=config).load_skills(enabled_only=False)
        updated_skill = next((s for s in skills if s.name == skill_name), None)

        if updated_skill is None:
@@ -102,7 +102,12 @@ def _format_conversation(messages: list[SuggestionMessage]) -> str:
    description="Generate short follow-up questions a user might ask next, based on recent conversation context.",
 )
@require_permission("threads", "read", owner_check=True)
-async def generate_suggestions(thread_id: str, body: SuggestionsRequest, request: Request, app_config: AppConfig = Depends(get_config)) -> SuggestionsResponse:
+async def generate_suggestions(
+    thread_id: str,
+    body: SuggestionsRequest,
+    request: Request,
+    config: AppConfig = Depends(get_config),
+) -> SuggestionsResponse:
    if not body.messages:
        return SuggestionsResponse(suggestions=[])

@@ -124,7 +129,7 @@ async def generate_suggestions(thread_id: str, body: SuggestionsRequest, request
    user_content = f"Conversation Context:\n{conversation}\n\nGenerate {n} follow-up questions"

    try:
-        model = create_chat_model(name=body.model_name, thinking_enabled=False, app_config=app_config)
+        model = create_chat_model(name=body.model_name, thinking_enabled=False, app_config=config)
        response = await model.ainvoke([SystemMessage(content=system_instruction), HumanMessage(content=user_content)], config={"run_name": "suggest_agent"})
        raw = _extract_response_text(response.content)
        suggestions = _parse_json_string_list(raw) or []
@@ -54,7 +54,6 @@ class RunCreateRequest(BaseModel):
    after_seconds: float | None = Field(default=None, description="Delayed execution")
    if_not_exists: Literal["reject", "create"] = Field(default="create", description="Thread creation policy")
    feedback_keys: list[str] | None = Field(default=None, description="LangSmith feedback keys")
-    follow_up_to_run_id: str | None = Field(default=None, description="Run ID this message follows up on. Auto-detected from latest successful run if not provided.")


 class RunResponse(BaseModel):
@@ -69,6 +68,27 @@ class RunResponse(BaseModel):
    updated_at: str = ""


+class ThreadTokenUsageModelBreakdown(BaseModel):
+    tokens: int = 0
+    runs: int = 0
+
+
+class ThreadTokenUsageCallerBreakdown(BaseModel):
+    lead_agent: int = 0
+    subagent: int = 0
+    middleware: int = 0
+
+
+class ThreadTokenUsageResponse(BaseModel):
+    thread_id: str
+    total_tokens: int = 0
+    total_input_tokens: int = 0
+    total_output_tokens: int = 0
+    total_runs: int = 0
+    by_model: dict[str, ThreadTokenUsageModelBreakdown] = Field(default_factory=dict)
+    by_caller: ThreadTokenUsageCallerBreakdown = Field(default_factory=ThreadTokenUsageCallerBreakdown)
+
+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -312,11 +332,15 @@ async def list_thread_messages(
        if i in last_ai_indices:
            run_id = msg["run_id"]
            fb = feedback_map.get(run_id)
-            msg["feedback"] = {
-                "feedback_id": fb["feedback_id"],
-                "rating": fb["rating"],
-                "comment": fb.get("comment"),
-            } if fb else None
+            msg["feedback"] = (
+                {
+                    "feedback_id": fb["feedback_id"],
+                    "rating": fb["rating"],
+                    "comment": fb.get("comment"),
+                }
+                if fb
+                else None
+            )
        else:
            msg["feedback"] = None

@@ -339,7 +363,8 @@ async def list_run_messages(
    """
    event_store = get_run_event_store(request)
    rows = await event_store.list_messages_by_run(
-        thread_id, run_id,
+        thread_id,
+        run_id,
        limit=limit + 1,
        before_seq=before_seq,
        after_seq=after_seq,
@@ -364,10 +389,10 @@ async def list_run_events(
    return await event_store.list_events(thread_id, run_id, event_types=types, limit=limit)


-@router.get("/{thread_id}/token-usage")
+@router.get("/{thread_id}/token-usage", response_model=ThreadTokenUsageResponse)
@require_permission("threads", "read", owner_check=True)
-async def thread_token_usage(thread_id: str, request: Request) -> dict:
+async def thread_token_usage(thread_id: str, request: Request) -> ThreadTokenUsageResponse:
    """Thread-level token usage aggregation."""
    run_store = get_run_store(request)
    agg = await run_store.aggregate_tokens_by_thread(thread_id)
-    return {"thread_id": thread_id, **agg}
+    return ThreadTokenUsageResponse(thread_id=thread_id, **agg)
@@ -13,12 +13,11 @@ matching the LangGraph Platform wire format expected by the
 from __future__ import annotations

 import logging
-import re
-import time
 import uuid
 from typing import Any

 from fastapi import APIRouter, HTTPException, Request
+from langgraph.checkpoint.base import empty_checkpoint
 from pydantic import BaseModel, Field, field_validator

 from app.gateway.authz import require_permission
@@ -27,6 +26,7 @@ from app.gateway.utils import sanitize_log_param
 from deerflow.config.paths import Paths, get_paths
 from deerflow.runtime import serialize_channel_values
 from deerflow.runtime.user_context import get_effective_user_id
+from deerflow.utils.time import coerce_iso, now_iso

 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api/threads", tags=["threads"])
@@ -234,7 +234,7 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe
    checkpointer = get_checkpointer(request)
    thread_store = get_thread_store(request)
    thread_id = body.thread_id or str(uuid.uuid4())
-    now = time.time()
+    now = now_iso()
    # ``body.metadata`` is already stripped of server-reserved keys by
    # ``ThreadCreateRequest._strip_reserved`` — see the model definition.

@@ -244,8 +244,8 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe
        return ThreadResponse(
            thread_id=thread_id,
            status=existing_record.get("status", "idle"),
-            created_at=str(existing_record.get("created_at", "")),
-            updated_at=str(existing_record.get("updated_at", "")),
+            created_at=coerce_iso(existing_record.get("created_at", "")),
+            updated_at=coerce_iso(existing_record.get("updated_at", "")),
            metadata=existing_record.get("metadata", {}),
        )

@@ -263,8 +263,6 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe
    # Write an empty checkpoint so state endpoints work immediately
    config = {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}}
    try:
-        from langgraph.checkpoint.base import empty_checkpoint
-
        ckpt_metadata = {
            "step": -1,
            "source": "input",
@@ -282,8 +280,8 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe
    return ThreadResponse(
        thread_id=thread_id,
        status="idle",
-        created_at=str(now),
-        updated_at=str(now),
+        created_at=now,
+        updated_at=now,
        metadata=body.metadata,
    )

@@ -308,8 +306,11 @@ async def search_threads(body: ThreadSearchRequest, request: Request) -> list[Th
        ThreadResponse(
            thread_id=r["thread_id"],
            status=r.get("status", "idle"),
-            created_at=r.get("created_at", ""),
-            updated_at=r.get("updated_at", ""),
+            # ``coerce_iso`` heals legacy unix-second values that
+            # ``MemoryThreadMetaStore`` historically wrote with ``time.time()``;
+            # SQL-backed rows already arrive as ISO strings and pass through.
+            created_at=coerce_iso(r.get("created_at", "")),
+            updated_at=coerce_iso(r.get("updated_at", "")),
            metadata=r.get("metadata", {}),
            values={"title": r["display_name"]} if r.get("display_name") else {},
            interrupts={},
@@ -341,8 +342,8 @@ async def patch_thread(thread_id: str, body: ThreadPatchRequest, request: Reques
    return ThreadResponse(
        thread_id=thread_id,
        status=record.get("status", "idle"),
-        created_at=str(record.get("created_at", "")),
-        updated_at=str(record.get("updated_at", "")),
+        created_at=coerce_iso(record.get("created_at", "")),
+        updated_at=coerce_iso(record.get("updated_at", "")),
        metadata=record.get("metadata", {}),
    )

@@ -382,8 +383,8 @@ async def get_thread(thread_id: str, request: Request) -> ThreadResponse:
        record = {
            "thread_id": thread_id,
            "status": "idle",
-            "created_at": ckpt_meta.get("created_at", ""),
-            "updated_at": ckpt_meta.get("updated_at", ckpt_meta.get("created_at", "")),
+            "created_at": coerce_iso(ckpt_meta.get("created_at", "")),
+            "updated_at": coerce_iso(ckpt_meta.get("updated_at", ckpt_meta.get("created_at", ""))),
            "metadata": {k: v for k, v in ckpt_meta.items() if k not in ("created_at", "updated_at", "step", "source", "writes", "parents")},
        }

@@ -397,8 +398,8 @@ async def get_thread(thread_id: str, request: Request) -> ThreadResponse:
    return ThreadResponse(
        thread_id=thread_id,
        status=status,
-        created_at=str(record.get("created_at", "")),
-        updated_at=str(record.get("updated_at", "")),
+        created_at=coerce_iso(record.get("created_at", "")),
+        updated_at=coerce_iso(record.get("updated_at", "")),
        metadata=record.get("metadata", {}),
        values=serialize_channel_values(channel_values),
    )
@@ -449,10 +450,10 @@ async def get_thread_state(thread_id: str, request: Request) -> ThreadStateRespo
        values=values,
        next=next_tasks,
        metadata=metadata,
-        checkpoint={"id": checkpoint_id, "ts": str(metadata.get("created_at", ""))},
+        checkpoint={"id": checkpoint_id, "ts": coerce_iso(metadata.get("created_at", ""))},
        checkpoint_id=checkpoint_id,
        parent_checkpoint_id=parent_checkpoint_id,
-        created_at=str(metadata.get("created_at", "")),
+        created_at=coerce_iso(metadata.get("created_at", "")),
        tasks=tasks,
    )

@@ -502,7 +503,7 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re
        channel_values.update(body.values)

    checkpoint["channel_values"] = channel_values
-    metadata["updated_at"] = time.time()
+    metadata["updated_at"] = now_iso()

    if body.as_node:
        metadata["source"] = "update"
@@ -543,7 +544,7 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re
        next=[],
        metadata=metadata,
        checkpoint_id=new_checkpoint_id,
-        created_at=str(metadata.get("created_at", "")),
+        created_at=coerce_iso(metadata.get("created_at", "")),
    )


@@ -610,7 +611,7 @@ async def get_thread_history(thread_id: str, body: ThreadHistoryRequest, request
                    parent_checkpoint_id=parent_id,
                    metadata=user_meta,
                    values=values,
-                    created_at=str(metadata.get("created_at", "")),
+                    created_at=coerce_iso(metadata.get("created_at", "")),
                    next=next_tasks,
                )
            )
@@ -5,7 +5,7 @@ import os
 import stat

 from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile
-from pydantic import BaseModel
+from pydantic import BaseModel, Field

 from app.gateway.authz import require_permission
 from app.gateway.deps import get_config
@@ -15,12 +15,15 @@ from deerflow.runtime.user_context import get_effective_user_id
 from deerflow.sandbox.sandbox_provider import SandboxProvider, get_sandbox_provider
 from deerflow.uploads.manager import (
    PathTraversalError,
+    UnsafeUploadPathError,
+    claim_unique_filename,
    delete_file_safe,
    enrich_file_listing,
    ensure_uploads_dir,
    get_uploads_dir,
    list_files_in_dir,
    normalize_filename,
+    open_upload_file_no_symlink,
    upload_artifact_url,
    upload_virtual_path,
 )
@@ -30,6 +33,11 @@ logger = logging.getLogger(__name__)

 router = APIRouter(prefix="/api/threads/{thread_id}/uploads", tags=["uploads"])

+UPLOAD_CHUNK_SIZE = 8192
+DEFAULT_MAX_FILES = 10
+DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024
+DEFAULT_MAX_TOTAL_SIZE = 100 * 1024 * 1024
+

 class UploadResponse(BaseModel):
    """Response model for file upload."""
@@ -37,6 +45,15 @@ class UploadResponse(BaseModel):
    success: bool
    files: list[dict[str, str]]
    message: str
+    skipped_files: list[str] = Field(default_factory=list)
+
+
+class UploadLimits(BaseModel):
+    """Application-level upload limits exposed to clients."""
+
+    max_files: int
+    max_file_size: int
+    max_total_size: int


 def _make_file_sandbox_writable(file_path: os.PathLike[str] | str) -> None:
@@ -69,6 +86,72 @@ def _get_uploads_config_value(app_config: AppConfig, key: str, default: object)
    return getattr(uploads_cfg, key, default)


+def _get_upload_limit(app_config: AppConfig, key: str, default: int, *, legacy_key: str | None = None) -> int:
+    try:
+        value = _get_uploads_config_value(app_config, key, None)
+        if value is None and legacy_key is not None:
+            value = _get_uploads_config_value(app_config, legacy_key, None)
+        if value is None:
+            value = default
+        limit = int(value)
+        if limit <= 0:
+            raise ValueError
+        return limit
+    except Exception:
+        logger.warning("Invalid uploads.%s value; falling back to %d", key, default)
+        return default
+
+
+def _get_upload_limits(app_config: AppConfig) -> UploadLimits:
+    return UploadLimits(
+        max_files=_get_upload_limit(app_config, "max_files", DEFAULT_MAX_FILES, legacy_key="max_file_count"),
+        max_file_size=_get_upload_limit(app_config, "max_file_size", DEFAULT_MAX_FILE_SIZE, legacy_key="max_single_file_size"),
+        max_total_size=_get_upload_limit(app_config, "max_total_size", DEFAULT_MAX_TOTAL_SIZE),
+    )
+
+
+def _cleanup_uploaded_paths(paths: list[os.PathLike[str] | str]) -> None:
+    for path in reversed(paths):
+        try:
+            os.unlink(path)
+        except FileNotFoundError:
+            pass
+        except Exception:
+            logger.warning("Failed to clean up upload path after rejected request: %s", path, exc_info=True)
+
+
+async def _write_upload_file_with_limits(
+    file: UploadFile,
+    *,
+    uploads_dir: os.PathLike[str] | str,
+    display_filename: str,
+    max_single_file_size: int,
+    max_total_size: int,
+    total_size: int,
+) -> tuple[os.PathLike[str] | str, int, int]:
+    file_size = 0
+    file_path, fh = open_upload_file_no_symlink(uploads_dir, display_filename)
+    try:
+        while chunk := await file.read(UPLOAD_CHUNK_SIZE):
+            file_size += len(chunk)
+            total_size += len(chunk)
+            if file_size > max_single_file_size:
+                raise HTTPException(status_code=413, detail=f"File too large: {display_filename}")
+            if total_size > max_total_size:
+                raise HTTPException(status_code=413, detail="Total upload size too large")
+            fh.write(chunk)
+    except Exception:
+        fh.close()
+        try:
+            os.unlink(file_path)
+        except FileNotFoundError:
+            pass
+        raise
+    else:
+        fh.close()
+    return file_path, file_size, total_size
+
+
 def _auto_convert_documents_enabled(app_config: AppConfig) -> bool:
    """Return whether automatic host-side document conversion is enabled.

@@ -85,72 +168,94 @@ def _auto_convert_documents_enabled(app_config: AppConfig) -> bool:


@router.post("", response_model=UploadResponse)
-@require_permission("threads", "write", owner_check=True, require_existing=True)
+@require_permission("threads", "write", owner_check=True, require_existing=False)
 async def upload_files(
    thread_id: str,
    request: Request,
    files: list[UploadFile] = File(...),
-    app_config: AppConfig = Depends(get_config),
+    config: AppConfig = Depends(get_config),
 ) -> UploadResponse:
    """Upload multiple files to a thread's uploads directory."""
    if not files:
        raise HTTPException(status_code=400, detail="No files provided")

+    limits = _get_upload_limits(config)
+    if len(files) > limits.max_files:
+        raise HTTPException(status_code=413, detail=f"Too many files: maximum is {limits.max_files}")
+
    try:
        uploads_dir = ensure_uploads_dir(thread_id)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    sandbox_uploads = get_paths().sandbox_uploads_dir(thread_id, user_id=get_effective_user_id())
    uploaded_files = []
+    written_paths = []
+    sandbox_sync_targets = []
+    skipped_files = []
+    total_size = 0
+    # Track filenames within this request so duplicate form parts do not
+    # silently truncate each other. Existing uploads keep the historical
+    # overwrite behavior for a single replacement upload.
+    seen_filenames: set[str] = set()

-    sandbox_provider = get_sandbox_provider(app_config)
+    sandbox_provider = get_sandbox_provider()
    sync_to_sandbox = not _uses_thread_data_mounts(sandbox_provider)
    sandbox = None
    if sync_to_sandbox:
        sandbox_id = sandbox_provider.acquire(thread_id)
        sandbox = sandbox_provider.get(sandbox_id)
-    auto_convert_documents = _auto_convert_documents_enabled(app_config)
+        if sandbox is None:
+            raise HTTPException(status_code=500, detail="Failed to acquire sandbox")
+    auto_convert_documents = _auto_convert_documents_enabled(config)

    for file in files:
        if not file.filename:
            continue

        try:
-            safe_filename = normalize_filename(file.filename)
+            original_filename = normalize_filename(file.filename)
+            safe_filename = claim_unique_filename(original_filename, seen_filenames)
        except ValueError:
            logger.warning(f"Skipping file with unsafe filename: {file.filename!r}")
            continue

        try:
-            content = await file.read()
-            file_path = uploads_dir / safe_filename
-            file_path.write_bytes(content)
+            file_path, file_size, total_size = await _write_upload_file_with_limits(
+                file,
+                uploads_dir=uploads_dir,
+                display_filename=safe_filename,
+                max_single_file_size=limits.max_file_size,
+                max_total_size=limits.max_total_size,
+                total_size=total_size,
+            )
+            written_paths.append(file_path)

            virtual_path = upload_virtual_path(safe_filename)

-            if sync_to_sandbox and sandbox is not None:
-                _make_file_sandbox_writable(file_path)
-                sandbox.update_file(virtual_path, content)
+            if sync_to_sandbox:
+                sandbox_sync_targets.append((file_path, virtual_path))

            file_info = {
                "filename": safe_filename,
-                "size": str(len(content)),
+                "size": str(file_size),
                "path": str(sandbox_uploads / safe_filename),
                "virtual_path": virtual_path,
                "artifact_url": upload_artifact_url(thread_id, safe_filename),
            }
+            if safe_filename != original_filename:
+                file_info["original_filename"] = original_filename

-            logger.info(f"Saved file: {safe_filename} ({len(content)} bytes) to {file_info['path']}")
+            logger.info(f"Saved file: {safe_filename} ({file_size} bytes) to {file_info['path']}")

            file_ext = file_path.suffix.lower()
            if auto_convert_documents and file_ext in CONVERTIBLE_EXTENSIONS:
                md_path = await convert_file_to_markdown(file_path)
                if md_path:
+                    written_paths.append(md_path)
                    md_virtual_path = upload_virtual_path(md_path.name)

-                    if sync_to_sandbox and sandbox is not None:
-                        _make_file_sandbox_writable(md_path)
-                        sandbox.update_file(md_virtual_path, md_path.read_bytes())
+                    if sync_to_sandbox:
+                        sandbox_sync_targets.append((md_path, md_virtual_path))

                    file_info["markdown_file"] = md_path.name
                    file_info["markdown_path"] = str(sandbox_uploads / md_path.name)
@@ -159,17 +264,46 @@ async def upload_files(

            uploaded_files.append(file_info)

+        except HTTPException as e:
+            _cleanup_uploaded_paths(written_paths)
+            raise e
+        except UnsafeUploadPathError as e:
+            logger.warning("Skipping upload with unsafe destination %s: %s", file.filename, e)
+            skipped_files.append(safe_filename)
+            continue
        except Exception as e:
            logger.error(f"Failed to upload {file.filename}: {e}")
+            _cleanup_uploaded_paths(written_paths)
            raise HTTPException(status_code=500, detail=f"Failed to upload {file.filename}: {str(e)}")

+    if sync_to_sandbox:
+        for file_path, virtual_path in sandbox_sync_targets:
+            _make_file_sandbox_writable(file_path)
+            sandbox.update_file(virtual_path, file_path.read_bytes())
+
+    message = f"Successfully uploaded {len(uploaded_files)} file(s)"
+    if skipped_files:
+        message += f"; skipped {len(skipped_files)} unsafe file(s)"
+
    return UploadResponse(
-        success=True,
+        success=not skipped_files,
        files=uploaded_files,
-        message=f"Successfully uploaded {len(uploaded_files)} file(s)",
+        message=message,
+        skipped_files=skipped_files,
    )


+@router.get("/limits", response_model=UploadLimits)
+@require_permission("threads", "read", owner_check=True)
+async def get_upload_limits(
+    thread_id: str,
+    request: Request,
+    config: AppConfig = Depends(get_config),
+) -> UploadLimits:
+    """Return upload limits used by the gateway for this thread."""
+    return _get_upload_limits(config)
+
+
@router.get("/list", response_model=dict)
@require_permission("threads", "read", owner_check=True)
 async def list_uploaded_files(thread_id: str, request: Request) -> dict:
@@ -8,7 +8,6 @@ frames, and consuming stream bridge events.  Router modules
 from __future__ import annotations

 import asyncio
-import dataclasses
 import json
 import logging
 import re
@@ -18,7 +17,7 @@ from typing import Any
 from fastapi import HTTPException, Request
 from langchain_core.messages import HumanMessage

-from app.gateway.deps import get_run_context, get_run_manager, get_run_store, get_stream_bridge
+from app.gateway.deps import get_run_context, get_run_manager, get_stream_bridge
 from app.gateway.utils import sanitize_log_param
 from deerflow.runtime import (
    END_SENTINEL,
@@ -99,6 +98,62 @@ def normalize_input(raw_input: dict[str, Any] | None) -> dict[str, Any]:
 _DEFAULT_ASSISTANT_ID = "lead_agent"


+# Whitelist of run-context keys that the langgraph-compat layer forwards from
+# ``body.context`` into the run config. ``config["context"]`` exists in
+# LangGraph >=0.6, but these values must be written to both ``configurable``
+# (for legacy ``_get_runtime_config`` consumers) and ``context`` because
+# LangGraph >=1.1.9 no longer makes ``ToolRuntime.context`` fall back to
+# ``configurable`` for consumers like ``setup_agent``.
+_CONTEXT_CONFIGURABLE_KEYS: frozenset[str] = frozenset(
+    {
+        "model_name",
+        "mode",
+        "thinking_enabled",
+        "reasoning_effort",
+        "is_plan_mode",
+        "subagent_enabled",
+        "max_concurrent_subagents",
+        "agent_name",
+        "is_bootstrap",
+    }
+)
+
+
+def merge_run_context_overrides(config: dict[str, Any], context: Mapping[str, Any] | None) -> None:
+    """Merge whitelisted keys from ``body.context`` into both ``config['configurable']``
+    and ``config['context']`` so they are visible to legacy configurable readers and
+    to LangGraph ``ToolRuntime.context`` consumers (e.g. the ``setup_agent`` tool —
+    see issue #2677)."""
+    if not context:
+        return
+    configurable = config.setdefault("configurable", {})
+    runtime_context = config.setdefault("context", {})
+    for key in _CONTEXT_CONFIGURABLE_KEYS:
+        if key in context:
+            if isinstance(configurable, dict):
+                configurable.setdefault(key, context[key])
+            if isinstance(runtime_context, dict):
+                runtime_context.setdefault(key, context[key])
+
+
+def inject_authenticated_user_context(config: dict[str, Any], request: Request) -> None:
+    """Stamp the authenticated user into the run context for background tools.
+
+    Tool execution may happen after the request handler has returned, so tools
+    that persist user-scoped files should not rely only on ambient ContextVars.
+    The value comes from server-side auth state, never from client context.
+    """
+
+    user = getattr(request.state, "user", None)
+    user_id = getattr(user, "id", None)
+    if user_id is None:
+        return
+
+    runtime_context = config.setdefault("context", {})
+    if isinstance(runtime_context, dict):
+        runtime_context["user_id"] = str(user_id)
+
+
 def resolve_agent_factory(assistant_id: str | None):
    """Resolve the agent factory callable from config.

@@ -212,21 +267,6 @@ async def start_run(

    disconnect = DisconnectMode.cancel if body.on_disconnect == "cancel" else DisconnectMode.continue_

-    # Resolve follow_up_to_run_id: explicit from request, or auto-detect from latest successful run
-    follow_up_to_run_id = getattr(body, "follow_up_to_run_id", None)
-    if follow_up_to_run_id is None:
-        run_store = get_run_store(request)
-        try:
-            recent_runs = await run_store.list_by_thread(thread_id, limit=1)
-            if recent_runs and recent_runs[0].get("status") == "success":
-                follow_up_to_run_id = recent_runs[0]["run_id"]
-        except Exception:
-            pass  # Don't block run creation
-
-    # Enrich base context with per-run field
-    if follow_up_to_run_id:
-        run_ctx = dataclasses.replace(run_ctx, follow_up_to_run_id=follow_up_to_run_id)
-
    try:
        record = await run_mgr.create_or_reject(
            thread_id,
@@ -235,7 +275,6 @@ async def start_run(
            metadata=body.metadata or {},
            kwargs={"input": body.input, "config": body.config},
            multitask_strategy=body.multitask_strategy,
-            follow_up_to_run_id=follow_up_to_run_id,
        )
    except ConflictError as exc:
        raise HTTPException(status_code=409, detail=str(exc)) from exc
@@ -262,27 +301,12 @@ async def start_run(
    graph_input = normalize_input(body.input)
    config = build_run_config(thread_id, body.config, body.metadata, assistant_id=body.assistant_id)

-    # Merge DeerFlow-specific context overrides into configurable.
+    # Merge DeerFlow-specific context overrides into both ``configurable`` and ``context``.
    # The ``context`` field is a custom extension for the langgraph-compat layer
    # that carries agent configuration (model_name, thinking_enabled, etc.).
    # Only agent-relevant keys are forwarded; unknown keys (e.g. thread_id) are ignored.
-    context = getattr(body, "context", None)
-    if context:
-        _CONTEXT_CONFIGURABLE_KEYS = {
-            "model_name",
-            "mode",
-            "thinking_enabled",
-            "reasoning_effort",
-            "is_plan_mode",
-            "subagent_enabled",
-            "max_concurrent_subagents",
-            "agent_name",
-            "is_bootstrap",
-        }
-        configurable = config.setdefault("configurable", {})
-        for key in _CONTEXT_CONFIGURABLE_KEYS:
-            if key in context:
-                configurable.setdefault(key, context[key])
+    merge_run_context_overrides(config, getattr(body, "context", None))
+    inject_authenticated_user_context(config, request)

    stream_modes = normalize_stream_modes(body.stream_mode)

@@ -34,50 +34,42 @@ _LOG_FMT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
 _LOG_DATEFMT = "%Y-%m-%d %H:%M:%S"


-def _logging_level_from_config(name: str) -> int:
-    """Map ``config.yaml`` ``log_level`` string to a ``logging`` level constant."""
-    mapping = logging.getLevelNamesMapping()
-    return mapping.get((name or "info").strip().upper(), logging.INFO)
+def _setup_logging(log_level: int = logging.INFO) -> None:
+    """Route logs to ``debug.log`` using *log_level* for the initial root/file setup.

+    This configures the root logger and the ``debug.log`` file handler so logs do
+    not print on the interactive console. It is idempotent: any pre-existing
+    handlers on the root logger (e.g. installed by ``logging.basicConfig`` in
+    transitively imported modules) are removed so the debug session output only
+    lands in ``debug.log``.

-def _setup_logging(log_level: str) -> None:
-    """Send application logs to ``debug.log`` at *log_level*; do not print them on the console.
-
-    Idempotent: any pre-existing handlers on the root logger (e.g. installed by
-    ``logging.basicConfig`` in transitively imported modules) are removed so the
-    debug session output only lands in ``debug.log``.
+    Note: later config-driven logging adjustments may change named logger
+    verbosity without raising the root logger or file-handler thresholds set
+    here, so the eventual contents of ``debug.log`` may not be filtered solely by
+    this function's ``log_level`` argument.
    """
-    level = _logging_level_from_config(log_level)
    root = logging.root
    for h in list(root.handlers):
        root.removeHandler(h)
        h.close()
-    root.setLevel(level)
+    root.setLevel(log_level)

    file_handler = logging.FileHandler("debug.log", mode="a", encoding="utf-8")
-    file_handler.setLevel(level)
+    file_handler.setLevel(log_level)
    file_handler.setFormatter(logging.Formatter(_LOG_FMT, datefmt=_LOG_DATEFMT))
    root.addHandler(file_handler)


-def _update_logging_level(log_level: str) -> None:
-    """Update the root logger and existing handlers to *log_level*."""
-    level = _logging_level_from_config(log_level)
-    root = logging.root
-    root.setLevel(level)
-    for handler in root.handlers:
-        handler.setLevel(level)
-
-
 async def main():
    # Install file logging first so warnings emitted while loading config do not
    # leak onto the interactive terminal via Python's lastResort handler.
-    _setup_logging("info")
+    _setup_logging()

    from deerflow.config import get_app_config
+    from deerflow.config.app_config import apply_logging_level

    app_config = get_app_config()
-    _update_logging_level(app_config.log_level)
+    apply_logging_level(app_config.log_level)

    # Delay the rest of the deerflow imports until *after* logging is installed
    # so that any import-time side effects (e.g. deerflow.agents starts a
@@ -87,7 +79,9 @@ async def main():
    from langgraph.runtime import Runtime

    from deerflow.agents import make_lead_agent
+    from deerflow.config.paths import get_paths
    from deerflow.mcp import initialize_mcp_tools
+    from deerflow.runtime.user_context import get_effective_user_id

    # Initialize MCP tools at startup
    try:
@@ -121,6 +115,8 @@ async def main():
        print("Tip: `uv sync --group dev` to enable arrow-key & history support")
    print("=" * 50)

+    seen_artifacts: set[str] = set()
+
    while True:
        try:
            if session:
@@ -142,6 +138,22 @@ async def main():
                last_message = result["messages"][-1]
                print(f"\nAgent: {last_message.content}")

+            # Show files presented to the user this turn (new artifacts only)
+            artifacts = result.get("artifacts") or []
+            new_artifacts = [p for p in artifacts if p not in seen_artifacts]
+            if new_artifacts:
+                thread_id = config["configurable"]["thread_id"]
+                user_id = get_effective_user_id()
+                paths = get_paths()
+                print("\n[Presented files]")
+                for virtual in new_artifacts:
+                    try:
+                        physical = paths.resolve_virtual_path(thread_id, virtual, user_id=user_id)
+                        print(f"  - {virtual}\n    → {physical}")
+                    except ValueError as exc:
+                        print(f"  - {virtual}    (failed to resolve physical path: {exc})")
+                seen_artifacts.update(new_artifacts)
+
        except (KeyboardInterrupt, EOFError):
            print("\nGoodbye!")
            break
@@ -259,6 +259,8 @@ sandbox:

 When you configure `sandbox.mounts`, DeerFlow exposes those `container_path` values in the agent prompt so the agent can discover and operate on mounted directories directly instead of assuming everything must live under `/mnt/user-data`.

+For bare-metal Docker sandbox runs that use localhost, DeerFlow binds the sandbox HTTP port to `127.0.0.1` by default so it is not exposed on every host interface. Docker-outside-of-Docker deployments that connect through `host.docker.internal` keep the broad legacy bind for compatibility. Set `DEER_FLOW_SANDBOX_BIND_HOST` explicitly if your deployment needs a different bind address.
+
 ### Skills

 Configure the skills directory for specialized workflows:
@@ -319,11 +321,16 @@ models:
 - `DEEPSEEK_API_KEY` - DeepSeek API key
 - `NOVITA_API_KEY` - Novita API key (OpenAI-compatible endpoint)
 - `TAVILY_API_KEY` - Tavily search API key
+- `DEER_FLOW_PROJECT_ROOT` - Project root for relative runtime paths
 - `DEER_FLOW_CONFIG_PATH` - Custom config file path
+- `DEER_FLOW_EXTENSIONS_CONFIG_PATH` - Custom extensions config file path
+- `DEER_FLOW_HOME` - Runtime state directory (defaults to `.deer-flow` under the project root)
+- `DEER_FLOW_SKILLS_PATH` - Skills directory when `skills.path` is omitted
+- `GATEWAY_ENABLE_DOCS` - Set to `false` to disable Swagger UI (`/docs`), ReDoc (`/redoc`), and OpenAPI schema (`/openapi.json`) endpoints (default: `true`)

 ## Configuration Location

-The configuration file should be placed in the **project root directory** (`deer-flow/config.yaml`), not in the backend directory.
+The configuration file should be placed in the **project root directory** (`deer-flow/config.yaml`). Set `DEER_FLOW_PROJECT_ROOT` when the process may start from another working directory, or set `DEER_FLOW_CONFIG_PATH` to point at a specific file.

 ## Configuration Priority

@@ -331,12 +338,12 @@ DeerFlow searches for configuration in this order:

 1. Path specified in code via `config_path` argument
 2. Path from `DEER_FLOW_CONFIG_PATH` environment variable
-3. `config.yaml` in current working directory (typically `backend/` when running)
-4. `config.yaml` in parent directory (project root: `deer-flow/`)
+3. `config.yaml` under `DEER_FLOW_PROJECT_ROOT`, or under the current working directory when `DEER_FLOW_PROJECT_ROOT` is unset
+4. Legacy backend/repository-root locations for monorepo compatibility

 ## Best Practices

-1. **Place `config.yaml` in project root** - Not in `backend/` directory
+1. **Place `config.yaml` in project root** - Set `DEER_FLOW_PROJECT_ROOT` if the runtime starts elsewhere
 2. **Never commit `config.yaml`** - It's already in `.gitignore`
 3. **Use environment variables for secrets** - Don't hardcode API keys
 4. **Keep `config.example.yaml` updated** - Document all new options
@@ -347,7 +354,7 @@ DeerFlow searches for configuration in this order:

 ### "Config file not found"
 - Ensure `config.yaml` exists in the **project root** directory (`deer-flow/config.yaml`)
- The backend searches parent directory by default, so root location is preferred
+- If the runtime starts outside the project root, set `DEER_FLOW_PROJECT_ROOT`
 - Alternatively, set `DEER_FLOW_CONFIG_PATH` environment variable to custom location

 ### "Invalid API key"
@@ -357,7 +364,7 @@ DeerFlow searches for configuration in this order:
 ### "Skills not loading"
 - Check that `deer-flow/skills/` directory exists
 - Verify skills have valid `SKILL.md` files
- Check `skills.path` configuration if using custom path
+- Check `skills.path` or `DEER_FLOW_SKILLS_PATH` if using a custom path

 ### "Docker sandbox fails to start"
 - Ensure Docker is running
@@ -22,6 +22,8 @@ POST /api/threads/{thread_id}/uploads
 **请求体：** `multipart/form-data`
 - `files`: 一个或多个文件

+网关会在应用层限制上传规模，默认最多 10 个文件、单文件 50 MiB、单次请求总计 100 MiB。可通过 `config.yaml` 的 `uploads.max_files`、`uploads.max_file_size`、`uploads.max_total_size` 调整；前端会读取同一组限制并在选择文件时提示，超过限制时后端返回 `413 Payload Too Large`。
+
 **响应：**
 ```json
 {
@@ -48,7 +50,23 @@ POST /api/threads/{thread_id}/uploads
 - `virtual_path`: Agent 在沙箱中使用的虚拟路径
 - `artifact_url`: 前端通过 HTTP 访问文件的 URL

-### 2. 列出已上传文件
+### 2. 查询上传限制
+```
+GET /api/threads/{thread_id}/uploads/limits
+```
+
+返回网关当前生效的上传限制，供前端在用户选择文件前提示和拦截。
+
+**响应：**
+```json
+{
+  "max_files": 10,
+  "max_file_size": 52428800,
+  "max_total_size": 104857600
+}
+```
+
+### 3. 列出已上传文件
 ```
 GET /api/threads/{thread_id}/uploads/list
 ```
@@ -71,7 +89,7 @@ GET /api/threads/{thread_id}/uploads/list
 }
 ```

-### 3. 删除文件
+### 4. 删除文件
 ```
 DELETE /api/threads/{thread_id}/uploads/{filename}
 ```
@@ -1,343 +0,0 @@
-# DeerFlow 后端拆分设计文档：Harness + App
-
-> 状态：Draft
-> 作者：DeerFlow Team
-> 日期：2026-03-13
-
-## 1. 背景与动机
-
-DeerFlow 后端当前是一个单一 Python 包（`src.*`），包含了从底层 agent 编排到上层用户产品的所有代码。随着项目发展，这种结构带来了几个问题：
-
- **复用困难**：其他产品（CLI 工具、Slack bot、第三方集成）想用 agent 能力，必须依赖整个后端，包括 FastAPI、IM SDK 等不需要的依赖
- **职责模糊**：agent 编排逻辑和用户产品逻辑混在同一个 `src/` 下，边界不清晰
- **依赖膨胀**：LangGraph Server 运行时不需要 FastAPI/uvicorn/Slack SDK，但当前必须安装全部依赖
-
-本文档提出将后端拆分为两部分：**deerflow-harness**（可发布的 agent 框架包）和 **app**（不打包的用户产品代码）。
-
-## 2. 核心概念
-
-### 2.1 Harness（线束/框架层）
-
-Harness 是 agent 的构建与编排框架，回答 **"如何构建和运行 agent"** 的问题：
-
- Agent 工厂与生命周期管理
- Middleware pipeline
- 工具系统（内置工具 + MCP + 社区工具）
- 沙箱执行环境
- 子 agent 委派
- 记忆系统
- 技能加载与注入
- 模型工厂
- 配置系统
-
-**Harness 是一个可发布的 Python 包**（`deerflow-harness`），可以独立安装和使用。
-
-**Harness 的设计原则**：对上层应用完全无感知。它不知道也不关心谁在调用它——可以是 Web App、CLI、Slack Bot、或者一个单元测试。
-
-### 2.2 App（应用层）
-
-App 是面向用户的产品代码，回答 **"如何将 agent 呈现给用户"** 的问题：
-
- Gateway API（FastAPI REST 接口）
- IM Channels（飞书、Slack、Telegram 集成）
- Custom Agent 的 CRUD 管理
- 文件上传/下载的 HTTP 接口
-
-**App 不打包、不发布**，它是 DeerFlow 项目内部的应用代码，直接运行。
-
-**App 依赖 Harness，但 Harness 不依赖 App。**
-
-### 2.3 边界划分
-
-| 模块 | 归属 | 说明 |
-|------|------|------|
-| `config/` | Harness | 配置系统是基础设施 |
-| `reflection/` | Harness | 动态模块加载工具 |
-| `utils/` | Harness | 通用工具函数 |
-| `agents/` | Harness | Agent 工厂、middleware、state、memory |
-| `subagents/` | Harness | 子 agent 委派系统 |
-| `sandbox/` | Harness | 沙箱执行环境 |
-| `tools/` | Harness | 工具注册与发现 |
-| `mcp/` | Harness | MCP 协议集成 |
-| `skills/` | Harness | 技能加载、解析、定义 schema |
-| `models/` | Harness | LLM 模型工厂 |
-| `community/` | Harness | 社区工具（tavily、jina 等） |
-| `client.py` | Harness | 嵌入式 Python 客户端 |
-| `gateway/` | App | FastAPI REST API |
-| `channels/` | App | IM 平台集成 |
-
-**关于 Custom Agents**：agent 定义格式（`config.yaml` + `SOUL.md` schema）由 Harness 层的 `config/agents_config.py` 定义，但文件的存储、CRUD、发现机制由 App 层的 `gateway/routers/agents.py` 负责。
-
-## 3. 目标架构
-
-### 3.1 目录结构
-
-```
-backend/
-├── packages/
-│   └── harness/
-│       ├── pyproject.toml          # deerflow-harness 包定义
-│       └── deerflow/               # Python 包根（import 前缀: deerflow.*）
-│           ├── __init__.py
-│           ├── config/
-│           ├── reflection/
-│           ├── utils/
-│           ├── agents/
-│           │   ├── lead_agent/
-│           │   ├── middlewares/
-│           │   ├── memory/
-│           │   ├── checkpointer/
-│           │   └── thread_state.py
-│           ├── subagents/
-│           ├── sandbox/
-│           ├── tools/
-│           ├── mcp/
-│           ├── skills/
-│           ├── models/
-│           ├── community/
-│           └── client.py
-├── app/                            # 不打包（import 前缀: app.*）
-│   ├── __init__.py
-│   ├── gateway/
-│   │   ├── __init__.py
-│   │   ├── app.py
-│   │   ├── config.py
-│   │   ├── path_utils.py
-│   │   └── routers/
-│   └── channels/
-│       ├── __init__.py
-│       ├── base.py
-│       ├── manager.py
-│       ├── service.py
-│       ├── store.py
-│       ├── message_bus.py
-│       ├── feishu.py
-│       ├── slack.py
-│       └── telegram.py
-├── pyproject.toml                  # uv workspace root
-├── langgraph.json
-├── tests/
-├── docs/
-└── Makefile
-```
-
-### 3.2 Import 规则
-
-两个层使用不同的 import 前缀，职责边界一目了然：
-
-```python
-# ---------------------------------------------------------------
-# Harness 内部互相引用（deerflow.* 前缀）
-# ---------------------------------------------------------------
-from deerflow.agents import make_lead_agent
-from deerflow.models import create_chat_model
-from deerflow.config import get_app_config
-from deerflow.tools import get_available_tools
-
-# ---------------------------------------------------------------
-# App 内部互相引用（app.* 前缀）
-# ---------------------------------------------------------------
-from app.gateway.app import app
-from app.gateway.routers.uploads import upload_files
-from app.channels.service import start_channel_service
-
-# ---------------------------------------------------------------
-# App 调用 Harness（单向依赖，Harness 永远不 import app）
-# ---------------------------------------------------------------
-from deerflow.agents import make_lead_agent
-from deerflow.models import create_chat_model
-from deerflow.skills import load_skills
-from deerflow.config.extensions_config import get_extensions_config
-```
-
-**App 调用 Harness 示例 — Gateway 中启动 agent**：
-
-```python
-# app/gateway/routers/chat.py
-from deerflow.agents.lead_agent.agent import make_lead_agent
-from deerflow.models import create_chat_model
-from deerflow.config import get_app_config
-
-async def create_chat_session(thread_id: str, model_name: str):
-    config = get_app_config()
-    model = create_chat_model(name=model_name)
-    agent = make_lead_agent(config=...)
-    # ... 使用 agent 处理用户消息
-```
-
-**App 调用 Harness 示例 — Channel 中查询 skills**：
-
-```python
-# app/channels/manager.py
-from deerflow.skills import load_skills
-from deerflow.agents.memory.updater import get_memory_data
-
-def handle_status_command():
-    skills = load_skills(enabled_only=True)
-    memory = get_memory_data()
-    return f"Skills: {len(skills)}, Memory facts: {len(memory.get('facts', []))}"
-```
-
-**禁止方向**：Harness 代码中绝不能出现 `from app.` 或 `import app.`。
-
-### 3.3 为什么 App 不打包
-
-| 方面 | 打包（放 packages/ 下） | 不打包（放 backend/app/） |
-|------|------------------------|--------------------------|
-| 命名空间 | 需要 pkgutil `extend_path` 合并，或独立前缀 | 天然独立，`app.*` vs `deerflow.*` |
-| 发布需求 | 没有——App 是项目内部代码 | 不需要 pyproject.toml |
-| 复杂度 | 需要管理两个包的构建、版本、依赖声明 | 直接运行，零额外配置 |
-| 运行方式 | `pip install deerflow-app` | `PYTHONPATH=. uvicorn app.gateway.app:app` |
-
-App 的唯一消费者是 DeerFlow 项目自身，没有独立发布的需求。放在 `backend/app/` 下作为普通 Python 包，通过 `PYTHONPATH` 或 editable install 让 Python 找到即可。
-
-### 3.4 依赖关系
-
-```
-┌─────────────────────────────────────┐
-│  app/  (不打包，直接运行)             │
-│  ├── fastapi, uvicorn               │
-│  ├── slack-sdk, lark-oapi, ...      │
-│  └── import deerflow.*              │
-└──────────────┬──────────────────────┘
-               │
-               ▼
-┌─────────────────────────────────────┐
-│  deerflow-harness  (可发布的包)       │
-│  ├── langgraph, langchain           │
-│  ├── markitdown, pydantic, ...      │
-│  └── 零 app 依赖                     │
-└─────────────────────────────────────┘
-```
-
-**依赖分类**：
-
-| 分类 | 依赖包 |
-|------|--------|
-| Harness only | agent-sandbox, langchain*, langgraph*, markdownify, markitdown, pydantic, pyyaml, readabilipy, tavily-python, firecrawl-py, tiktoken, ddgs, duckdb, httpx, kubernetes, dotenv |
-| App only | fastapi, uvicorn, sse-starlette, python-multipart, lark-oapi, slack-sdk, python-telegram-bot, markdown-to-mrkdwn |
-| Shared | langgraph-sdk（channels 用 HTTP client）, pydantic, httpx |
-
-### 3.5 Workspace 配置
-
-`backend/pyproject.toml`（workspace root）：
-
-```toml
-[project]
-name = "deer-flow"
-version = "0.1.0"
-requires-python = ">=3.12"
-dependencies = ["deerflow-harness"]
-
-[dependency-groups]
-dev = ["pytest>=8.0.0", "ruff>=0.14.11"]
-# App 的额外依赖（fastapi 等）也声明在 workspace root，因为 app 不打包
-app = ["fastapi", "uvicorn", "sse-starlette", "python-multipart"]
-channels = ["lark-oapi", "slack-sdk", "python-telegram-bot"]
-
-[tool.uv.workspace]
-members = ["packages/harness"]
-
-[tool.uv.sources]
-deerflow-harness = { workspace = true }
-```
-
-## 4. 当前的跨层依赖问题
-
-在拆分之前，需要先解决 `client.py` 中两处从 harness 到 app 的反向依赖：
-
-### 4.1 `_validate_skill_frontmatter`
-
-```python
-# client.py — harness 导入了 app 层代码
-from src.gateway.routers.skills import _validate_skill_frontmatter
-```
-
-**解决方案**：将该函数提取到 `deerflow/skills/validation.py`。这是一个纯逻辑函数（解析 YAML frontmatter、校验字段），与 FastAPI 无关。
-
-### 4.2 `CONVERTIBLE_EXTENSIONS` + `convert_file_to_markdown`
-
-```python
-# client.py — harness 导入了 app 层代码
-from src.gateway.routers.uploads import CONVERTIBLE_EXTENSIONS, convert_file_to_markdown
-```
-
-**解决方案**：将它们提取到 `deerflow/utils/file_conversion.py`。仅依赖 `markitdown` + `pathlib`，是通用工具函数。
-
-## 5. 基础设施变更
-
-### 5.1 LangGraph Server
-
-LangGraph Server 只需要 harness 包。`langgraph.json` 更新：
-
-```json
-{
-  "dependencies": ["./packages/harness"],
-  "graphs": {
-    "lead_agent": "deerflow.agents:make_lead_agent"
-  },
-  "checkpointer": {
-    "path": "./packages/harness/deerflow/runtime/checkpointer/async_provider.py:make_checkpointer"
-  }
-}
-```
-
-### 5.2 Gateway API
-
-```bash
-# serve.sh / Makefile
-# PYTHONPATH 包含 backend/ 根目录，使 app.* 和 deerflow.* 都能被找到
-PYTHONPATH=. uvicorn app.gateway.app:app --host 0.0.0.0 --port 8001
-```
-
-### 5.3 Nginx
-
-无需变更（只做 URL 路由，不涉及 Python 模块路径）。
-
-### 5.4 Docker
-
-Dockerfile 中的 module 引用从 `src.` 改为 `deerflow.` / `app.`，`COPY` 命令需覆盖 `packages/` 和 `app/` 目录。
-
-## 6. 实施计划
-
-分 3 个 PR 递进执行：
-
-### PR 1：提取共享工具函数（Low Risk）
-
-1. 创建 `src/skills/validation.py`，从 `gateway/routers/skills.py` 提取 `_validate_skill_frontmatter`
-2. 创建 `src/utils/file_conversion.py`，从 `gateway/routers/uploads.py` 提取文件转换逻辑
-3. 更新 `client.py`、`gateway/routers/skills.py`、`gateway/routers/uploads.py` 的 import
-4. 运行全部测试确认无回归
-
-### PR 2：Rename + 物理拆分（High Risk，原子操作）
-
-1. 创建 `packages/harness/` 目录，创建 `pyproject.toml`
-2. `git mv` 将 harness 相关模块从 `src/` 移入 `packages/harness/deerflow/`
-3. `git mv` 将 app 相关模块从 `src/` 移入 `app/`
-4. 全局替换 import：
-   - harness 模块：`src.*` → `deerflow.*`（所有 `.py` 文件、`langgraph.json`、测试、文档）
-   - app 模块：`src.gateway.*` → `app.gateway.*`、`src.channels.*` → `app.channels.*`
-5. 更新 workspace root `pyproject.toml`
-6. 更新 `langgraph.json`、`Makefile`、`Dockerfile`
-7. `uv sync` + 全部测试 + 手动验证服务启动
-
-### PR 3：边界检查 + 文档（Low Risk）
-
-1. 添加 lint 规则：检查 harness 不 import app 模块
-2. 更新 `CLAUDE.md`、`README.md`
-
-## 7. 风险与缓解
-
-| 风险 | 影响 | 缓解措施 |
-|------|------|----------|
-| 全局 rename 误伤 | 字符串中的 `src` 被错误替换 | 正则精确匹配 `\bsrc\.`，review diff |
-| LangGraph Server 找不到模块 | 服务启动失败 | `langgraph.json` 的 `dependencies` 指向正确的 harness 包路径 |
-| App 的 `PYTHONPATH` 缺失 | Gateway/Channel 启动 import 报错 | Makefile/Docker 统一设置 `PYTHONPATH=.` |
-| `config.yaml` 中的 `use` 字段引用旧路径 | 运行时模块解析失败 | `config.yaml` 中的 `use` 字段同步更新为 `deerflow.*` |
-| 测试中 `sys.path` 混乱 | 测试失败 | 用 editable install（`uv sync`）确保 deerflow 可导入，`conftest.py` 中添加 `app/` 到 `sys.path` |
-
-## 8. 未来演进
-
- **独立发布**：harness 可以发布到内部 PyPI，让其他项目直接 `pip install deerflow-harness`
- **插件化 App**：不同的 app（web、CLI、bot）可以各自独立，都依赖同一个 harness
- **更细粒度拆分**：如果 harness 内部模块继续增长，可以进一步拆分（如 `deerflow-sandbox`、`deerflow-mcp`）
@@ -23,6 +23,9 @@ DeerFlow uses a YAML configuration file that should be placed in the **project r
   # Option A: Set environment variables (recommended)
   export OPENAI_API_KEY="your-key-here"

+   # Optional: pin the project root when running from another directory
+   export DEER_FLOW_PROJECT_ROOT="/path/to/deer-flow"
+
   # Option B: Edit config.yaml directly
   vim config.yaml  # or your preferred editor
   ```
@@ -35,17 +38,20 @@ DeerFlow uses a YAML configuration file that should be placed in the **project r

 ## Important Notes

- **Location**: `config.yaml` should be in `deer-flow/` (project root), not `deer-flow/backend/`
+- **Location**: `config.yaml` should be in `deer-flow/` (project root)
 - **Git**: `config.yaml` is automatically ignored by git (contains secrets)
- **Priority**: If both `backend/config.yaml` and `../config.yaml` exist, backend version takes precedence
+- **Runtime root**: Set `DEER_FLOW_PROJECT_ROOT` if DeerFlow may start from outside the project root
+- **Runtime data**: State defaults to `.deer-flow` under the project root; set `DEER_FLOW_HOME` to move it
+- **Skills**: Skills default to `skills/` under the project root; set `DEER_FLOW_SKILLS_PATH` or `skills.path` to move them

 ## Configuration File Locations

 The backend searches for `config.yaml` in this order:

-1. `DEER_FLOW_CONFIG_PATH` environment variable (if set)
-2. `backend/config.yaml` (current directory when running from backend/)
-3. `deer-flow/config.yaml` (parent directory - **recommended location**)
+1. Explicit `config_path` argument from code
+2. `DEER_FLOW_CONFIG_PATH` environment variable (if set)
+3. `config.yaml` under `DEER_FLOW_PROJECT_ROOT`, or the current working directory when `DEER_FLOW_PROJECT_ROOT` is unset
+4. Legacy backend/repository-root locations for monorepo compatibility

 **Recommended**: Place `config.yaml` in project root (`deer-flow/config.yaml`).

@@ -77,8 +83,8 @@ python -c "from deerflow.config.app_config import AppConfig; print(AppConfig.res

 If it can't find the config:
 1. Ensure you've copied `config.example.yaml` to `config.yaml`
-2. Verify you're in the correct directory
-3. Check the file exists: `ls -la ../config.yaml`
+2. Verify you're in the project root, or set `DEER_FLOW_PROJECT_ROOT`
+3. Check the file exists: `ls -la config.yaml`

 ### Permission denied

@@ -89,4 +95,4 @@ chmod 600 ../config.yaml  # Protect sensitive configuration
 ## See Also

 - [Configuration Guide](CONFIGURATION.md) - Detailed configuration options
- [Architecture Overview](../CLAUDE.md) - System architecture
+- [Architecture Overview](../CLAUDE.md) - System architecture
@@ -173,7 +173,7 @@ def _assemble_from_features(
      9.   MemoryMiddleware (memory feature)
      10.  ViewImageMiddleware (vision feature)
      11.  SubagentLimitMiddleware (subagent feature)
-      12.  LoopDetectionMiddleware (always)
+      12.  LoopDetectionMiddleware (loop_detection feature)
      13.  ClarificationMiddleware (always last)

    Two-phase ordering:
@@ -254,9 +254,11 @@ def _assemble_from_features(
            from deerflow.agents.middlewares.view_image_middleware import ViewImageMiddleware

            chain.append(ViewImageMiddleware())
-        from deerflow.tools.builtins import view_image_tool

-        extra_tools.append(view_image_tool)
+        if feat.sandbox is not False:
+            from deerflow.tools.builtins import view_image_tool
+
+            extra_tools.append(view_image_tool)

    # --- [11] Subagent ---
    if feat.subagent is not False:
@@ -270,10 +272,15 @@ def _assemble_from_features(

        extra_tools.append(task_tool)

-    # --- [12] LoopDetection (always) ---
-    from deerflow.agents.middlewares.loop_detection_middleware import LoopDetectionMiddleware
+    # --- [12] LoopDetection ---
+    if feat.loop_detection is not False:
+        if isinstance(feat.loop_detection, AgentMiddleware):
+            chain.append(feat.loop_detection)
+        else:
+            from deerflow.agents.middlewares.loop_detection_middleware import LoopDetectionMiddleware
+            from deerflow.config.loop_detection_config import LoopDetectionConfig

-    chain.append(LoopDetectionMiddleware())
+            chain.append(LoopDetectionMiddleware.from_config(LoopDetectionConfig()))

    # --- [13] Clarification (always last among built-ins) ---
    chain.append(ClarificationMiddleware())
@@ -31,6 +31,7 @@ class RuntimeFeatures:
    vision: bool | AgentMiddleware = False
    auto_title: bool | AgentMiddleware = False
    guardrail: Literal[False] | AgentMiddleware = False
+    loop_detection: bool | AgentMiddleware = True


 # ---------------------------------------------------------------------------
@@ -3,7 +3,6 @@ import logging
 from langchain.agents import create_agent
 from langchain.agents.middleware import AgentMiddleware
 from langchain_core.runnables import RunnableConfig
-from langgraph.graph.state import CompiledStateGraph

 from deerflow.agents.lead_agent.prompt import apply_prompt_template
 from deerflow.agents.memory.summarization_hook import memory_flush_hook
@@ -19,9 +18,10 @@ from deerflow.agents.middlewares.tool_error_handling_middleware import build_lea
 from deerflow.agents.middlewares.view_image_middleware import ViewImageMiddleware
 from deerflow.agents.thread_state import ThreadState
 from deerflow.config.agents_config import load_agent_config, validate_agent_name
-from deerflow.config.app_config import AppConfig
-from deerflow.config.deer_flow_context import DeerFlowContext
+from deerflow.config.app_config import AppConfig, get_app_config
 from deerflow.models import create_chat_model
+from deerflow.skills.tool_policy import filter_tools_by_skill_allowed_tools
+from deerflow.skills.types import Skill

 logger = logging.getLogger(__name__)

@@ -35,8 +35,9 @@ def _get_runtime_config(config: RunnableConfig) -> dict:
    return cfg


-def _resolve_model_name(app_config: AppConfig, requested_model_name: str | None = None) -> str:
+def _resolve_model_name(requested_model_name: str | None = None, *, app_config: AppConfig | None = None) -> str:
    """Resolve a runtime model name safely, falling back to default if invalid. Returns None if no models are configured."""
+    app_config = app_config or get_app_config()
    default_model_name = app_config.models[0].name if app_config.models else None
    if default_model_name is None:
        raise ValueError("No chat models are configured. Please configure at least one model in config.yaml.")
@@ -49,9 +50,10 @@ def _resolve_model_name(app_config: AppConfig, requested_model_name: str | None
    return default_model_name


-def _create_summarization_middleware(app_config: AppConfig) -> DeerFlowSummarizationMiddleware | None:
+def _create_summarization_middleware(*, app_config: AppConfig | None = None) -> DeerFlowSummarizationMiddleware | None:
    """Create and configure the summarization middleware from config."""
-    config = app_config.summarization
+    resolved_app_config = app_config or get_app_config()
+    config = resolved_app_config.summarization

    if not config.enabled:
        return None
@@ -72,9 +74,9 @@ def _create_summarization_middleware(app_config: AppConfig) -> DeerFlowSummariza
    # as middleware rather than lead_agent (SummarizationMiddleware is a
    # LangChain built-in, so we tag the model at creation time).
    if config.model_name:
-        model = create_chat_model(name=config.model_name, thinking_enabled=False, app_config=app_config)
+        model = create_chat_model(name=config.model_name, thinking_enabled=False, app_config=resolved_app_config)
    else:
-        model = create_chat_model(thinking_enabled=False, app_config=app_config)
+        model = create_chat_model(thinking_enabled=False, app_config=resolved_app_config)
    model = model.with_config(tags=["middleware:summarize"])

    # Prepare kwargs
@@ -91,17 +93,13 @@ def _create_summarization_middleware(app_config: AppConfig) -> DeerFlowSummariza
        kwargs["summary_prompt"] = config.summary_prompt

    hooks: list[BeforeSummarizationHook] = []
-    if app_config.memory.enabled:
+    if resolved_app_config.memory.enabled:
        hooks.append(memory_flush_hook)

    # The logic below relies on two assumptions holding true: this factory is
    # the sole entry point for DeerFlowSummarizationMiddleware, and the runtime
    # config is not expected to change after startup.
-    try:
-        skills_container_path = app_config.skills.container_path or "/mnt/skills"
-    except Exception:
-        logger.exception("Failed to resolve skills container path; falling back to default")
-        skills_container_path = "/mnt/skills"
+    skills_container_path = resolved_app_config.skills.container_path or "/mnt/skills"

    return DeerFlowSummarizationMiddleware(
        **kwargs,
@@ -240,17 +238,16 @@ Being proactive with task management demonstrates thoroughness and ensures all r
 # ToolErrorHandlingMiddleware should be before ClarificationMiddleware to convert tool exceptions to ToolMessages
 # ClarificationMiddleware should be last to intercept clarification requests after model calls
 def _build_middlewares(
-    app_config: AppConfig,
    config: RunnableConfig,
-    *,
    model_name: str | None,
    agent_name: str | None = None,
    custom_middlewares: list[AgentMiddleware] | None = None,
+    *,
+    app_config: AppConfig | None = None,
 ):
    """Build middleware chain based on runtime configuration.

    Args:
-        app_config: Resolved application config.
        config: Runtime configuration containing configurable options like is_plan_mode.
        agent_name: If provided, MemoryMiddleware will use per-agent memory storage.
        custom_middlewares: Optional list of custom middlewares to inject into the chain.
@@ -258,10 +255,17 @@ def _build_middlewares(
    Returns:
        List of middleware instances.
    """
-    middlewares = build_lead_runtime_middlewares(app_config=app_config, lazy_init=True)
+    resolved_app_config = app_config or get_app_config()
+    middlewares = build_lead_runtime_middlewares(app_config=resolved_app_config, lazy_init=True)
+
+    # Always inject current date (and optionally memory) as <system-reminder> into the
+    # first HumanMessage to keep the system prompt fully static for prefix-cache reuse.
+    from deerflow.agents.middlewares.dynamic_context_middleware import DynamicContextMiddleware
+
+    middlewares.append(DynamicContextMiddleware(agent_name=agent_name, app_config=resolved_app_config))

    # Add summarization middleware if enabled
-    summarization_middleware = _create_summarization_middleware(app_config)
+    summarization_middleware = _create_summarization_middleware(app_config=resolved_app_config)
    if summarization_middleware is not None:
        middlewares.append(summarization_middleware)

@@ -273,23 +277,23 @@ def _build_middlewares(
        middlewares.append(todo_list_middleware)

    # Add TokenUsageMiddleware when token_usage tracking is enabled
-    if app_config.token_usage.enabled:
+    if resolved_app_config.token_usage.enabled:
        middlewares.append(TokenUsageMiddleware())

    # Add TitleMiddleware
-    middlewares.append(TitleMiddleware())
+    middlewares.append(TitleMiddleware(app_config=resolved_app_config))

    # Add MemoryMiddleware (after TitleMiddleware)
-    middlewares.append(MemoryMiddleware(agent_name=agent_name))
+    middlewares.append(MemoryMiddleware(agent_name=agent_name, memory_config=resolved_app_config.memory))

    # Add ViewImageMiddleware only if the current model supports vision.
    # Use the resolved runtime model_name from make_lead_agent to avoid stale config values.
-    model_config = app_config.get_model_config(model_name) if model_name else None
+    model_config = resolved_app_config.get_model_config(model_name) if model_name else None
    if model_config is not None and model_config.supports_vision:
        middlewares.append(ViewImageMiddleware())

    # Add DeferredToolFilterMiddleware to hide deferred tool schemas from model binding
-    if app_config.tool_search.enabled:
+    if resolved_app_config.tool_search.enabled:
        from deerflow.agents.middlewares.deferred_tool_filter_middleware import DeferredToolFilterMiddleware

        middlewares.append(DeferredToolFilterMiddleware())
@@ -301,7 +305,9 @@ def _build_middlewares(
        middlewares.append(SubagentLimitMiddleware(max_concurrent=max_concurrent_subagents))

    # LoopDetectionMiddleware — detect and break repetitive tool call loops
-    middlewares.append(LoopDetectionMiddleware())
+    loop_detection_config = resolved_app_config.loop_detection
+    if loop_detection_config.enabled:
+        middlewares.append(LoopDetectionMiddleware.from_config(loop_detection_config))

    # Inject custom middlewares before ClarificationMiddleware
    if custom_middlewares:
@@ -312,33 +318,42 @@ def _build_middlewares(
    return middlewares


-def make_lead_agent(
-    config: RunnableConfig,
-    app_config: AppConfig | None = None,
-) -> CompiledStateGraph:
-    """Build the lead agent from runtime config.
+def _available_skill_names(agent_config, is_bootstrap: bool) -> set[str] | None:
+    if is_bootstrap:
+        return {"bootstrap"}
+    if agent_config and agent_config.skills is not None:
+        return set(agent_config.skills)
+    return None

-    Args:
-        config: LangGraph ``RunnableConfig`` carrying per-invocation options
-            (``thinking_enabled``, ``model_name``, ``is_plan_mode``, etc.).
-        app_config: Resolved application config. Required for in-process
-            entry points (DeerFlowClient, Gateway Worker). When omitted we
-            are being called via ``langgraph.json`` registration and reload
-            from disk — the LangGraph Server bootstrap path has no other
-            way to thread the value.
-    """
+
+def _load_enabled_skills_for_tool_policy(available_skills: set[str] | None, *, app_config: AppConfig) -> list[Skill]:
+    try:
+        from deerflow.agents.lead_agent.prompt import get_enabled_skills_for_config
+
+        skills = get_enabled_skills_for_config(app_config)
+    except Exception:
+        logger.exception("Failed to load skills for allowed-tools policy")
+        raise
+
+    if available_skills is None:
+        return skills
+    return [skill for skill in skills if skill.name in available_skills]
+
+
+def make_lead_agent(config: RunnableConfig):
+    """LangGraph graph factory; keep the signature compatible with LangGraph Server."""
+    runtime_config = _get_runtime_config(config)
+    runtime_app_config = runtime_config.get("app_config")
+    return _make_lead_agent(config, app_config=runtime_app_config or get_app_config())
+
+
+def _make_lead_agent(config: RunnableConfig, *, app_config: AppConfig):
    # Lazy import to avoid circular dependency
    from deerflow.tools import get_available_tools
-    from deerflow.tools.builtins import setup_agent
-
-    if app_config is None:
-        # LangGraph Server registers ``make_lead_agent`` via ``langgraph.json``
-        # and hands us only a ``RunnableConfig``. Reload config from disk
-        # here — it's a pure function, equivalent to the process-global the
-        # old code path would have read.
-        app_config = AppConfig.from_file()
+    from deerflow.tools.builtins import setup_agent, update_agent

    cfg = _get_runtime_config(config)
+    resolved_app_config = app_config

    thinking_enabled = cfg.get("thinking_enabled", True)
    reasoning_effort = cfg.get("reasoning_effort", None)
@@ -350,13 +365,14 @@ def make_lead_agent(
    agent_name = validate_agent_name(cfg.get("agent_name"))

    agent_config = load_agent_config(agent_name) if not is_bootstrap else None
+    available_skills = _available_skill_names(agent_config, is_bootstrap)
    # Custom agent model from agent config (if any), or None to let _resolve_model_name pick the default
    agent_model_name = agent_config.model if agent_config and agent_config.model else None

    # Final model name resolution: request → agent config → global default, with fallback for unknown names
-    model_name = _resolve_model_name(app_config, requested_model_name or agent_model_name)
+    model_name = _resolve_model_name(requested_model_name or agent_model_name, app_config=resolved_app_config)

-    model_config = app_config.get_model_config(model_name)
+    model_config = resolved_app_config.get_model_config(model_name)

    if model_config is None:
        raise ValueError("No chat model could be resolved. Please configure at least one model in config.yaml or provide a valid 'model_name'/'model' in the request.")
@@ -388,29 +404,43 @@ def make_lead_agent(
            "is_plan_mode": is_plan_mode,
            "subagent_enabled": subagent_enabled,
            "tool_groups": agent_config.tool_groups if agent_config else None,
-            "available_skills": ["bootstrap"] if is_bootstrap else (agent_config.skills if agent_config and agent_config.skills is not None else None),
+            "available_skills": sorted(available_skills) if available_skills is not None else None,
        }
    )

+    skills_for_tool_policy = _load_enabled_skills_for_tool_policy(available_skills, app_config=resolved_app_config)
+
    if is_bootstrap:
        # Special bootstrap agent with minimal prompt for initial custom agent creation flow
+        tools = get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled, app_config=resolved_app_config) + [setup_agent]
        return create_agent(
-            model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, app_config=app_config),
-            tools=get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled, app_config=app_config) + [setup_agent],
-            middleware=_build_middlewares(app_config, config, model_name=model_name),
-            system_prompt=apply_prompt_template(app_config, subagent_enabled=subagent_enabled, max_concurrent_subagents=max_concurrent_subagents, available_skills=set(["bootstrap"])),
+            model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, app_config=resolved_app_config),
+            tools=filter_tools_by_skill_allowed_tools(tools, skills_for_tool_policy),
+            middleware=_build_middlewares(config, model_name=model_name, app_config=resolved_app_config),
+            system_prompt=apply_prompt_template(
+                subagent_enabled=subagent_enabled,
+                max_concurrent_subagents=max_concurrent_subagents,
+                available_skills=set(["bootstrap"]),
+                app_config=resolved_app_config,
+            ),
            state_schema=ThreadState,
-            context_schema=DeerFlowContext,
        )

+    # Custom agents can update their own SOUL.md / config via update_agent.
+    # The default agent (no agent_name) does not see this tool.
+    extra_tools = [update_agent] if agent_name else []
    # Default lead agent (unchanged behavior)
+    tools = get_available_tools(model_name=model_name, groups=agent_config.tool_groups if agent_config else None, subagent_enabled=subagent_enabled, app_config=resolved_app_config)
    return create_agent(
-        model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, reasoning_effort=reasoning_effort, app_config=app_config),
-        tools=get_available_tools(model_name=model_name, groups=agent_config.tool_groups if agent_config else None, subagent_enabled=subagent_enabled, app_config=app_config),
-        middleware=_build_middlewares(app_config, config, model_name=model_name, agent_name=agent_name),
+        model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, reasoning_effort=reasoning_effort, app_config=resolved_app_config),
+        tools=filter_tools_by_skill_allowed_tools(tools + extra_tools, skills_for_tool_policy),
+        middleware=_build_middlewares(config, model_name=model_name, agent_name=agent_name, app_config=resolved_app_config),
        system_prompt=apply_prompt_template(
-            app_config, subagent_enabled=subagent_enabled, max_concurrent_subagents=max_concurrent_subagents, agent_name=agent_name, available_skills=set(agent_config.skills) if agent_config and agent_config.skills is not None else None
+            subagent_enabled=subagent_enabled,
+            max_concurrent_subagents=max_concurrent_subagents,
+            agent_name=agent_name,
+            available_skills=set(agent_config.skills) if agent_config and agent_config.skills is not None else None,
+            app_config=resolved_app_config,
        ),
        state_schema=ThreadState,
-        context_schema=DeerFlowContext,
    )
@@ -1,39 +1,43 @@
+from __future__ import annotations
+
 import asyncio
 import logging
 import threading
-from datetime import datetime
 from functools import lru_cache
+from typing import TYPE_CHECKING

 from deerflow.config.agents_config import load_agent_soul
-from deerflow.config.app_config import AppConfig
-from deerflow.skills import load_skills
-from deerflow.skills.types import Skill
+from deerflow.skills.storage import get_or_new_skill_storage
+from deerflow.skills.types import Skill, SkillCategory
 from deerflow.subagents import get_available_subagent_names

+if TYPE_CHECKING:
+    from deerflow.config.app_config import AppConfig
+
 logger = logging.getLogger(__name__)

 _ENABLED_SKILLS_REFRESH_WAIT_TIMEOUT_SECONDS = 5.0
 _enabled_skills_lock = threading.Lock()
 _enabled_skills_cache: list[Skill] | None = None
+_enabled_skills_by_config_cache: dict[int, tuple[object, list[Skill]]] = {}
 _enabled_skills_refresh_active = False
 _enabled_skills_refresh_version = 0
 _enabled_skills_refresh_event = threading.Event()


-def _load_enabled_skills_sync(app_config: AppConfig | None) -> list[Skill]:
-    return list(load_skills(app_config, enabled_only=True))
+def _load_enabled_skills_sync() -> list[Skill]:
+    return list(get_or_new_skill_storage().load_skills(enabled_only=True))


-def _start_enabled_skills_refresh_thread(app_config: AppConfig | None) -> None:
+def _start_enabled_skills_refresh_thread() -> None:
    threading.Thread(
        target=_refresh_enabled_skills_cache_worker,
-        args=(app_config,),
        name="deerflow-enabled-skills-loader",
        daemon=True,
    ).start()


-def _refresh_enabled_skills_cache_worker(app_config: AppConfig | None) -> None:
+def _refresh_enabled_skills_cache_worker() -> None:
    global _enabled_skills_cache, _enabled_skills_refresh_active

    while True:
@@ -41,8 +45,8 @@ def _refresh_enabled_skills_cache_worker(app_config: AppConfig | None) -> None:
            target_version = _enabled_skills_refresh_version

        try:
-            skills = _load_enabled_skills_sync(app_config)
-        except (OSError, ImportError):
+            skills = _load_enabled_skills_sync()
+        except Exception:
            logger.exception("Failed to load enabled skills for prompt injection")
            skills = []

@@ -58,7 +62,7 @@ def _refresh_enabled_skills_cache_worker(app_config: AppConfig | None) -> None:
            _enabled_skills_cache = None


-def _ensure_enabled_skills_cache(app_config: AppConfig | None) -> threading.Event:
+def _ensure_enabled_skills_cache() -> threading.Event:
    global _enabled_skills_refresh_active

    with _enabled_skills_lock:
@@ -70,84 +74,94 @@ def _ensure_enabled_skills_cache(app_config: AppConfig | None) -> threading.Even
        _enabled_skills_refresh_active = True
        _enabled_skills_refresh_event.clear()

-    _start_enabled_skills_refresh_thread(app_config)
+    _start_enabled_skills_refresh_thread()
    return _enabled_skills_refresh_event


-def _invalidate_enabled_skills_cache(app_config: AppConfig | None) -> threading.Event:
+def _invalidate_enabled_skills_cache() -> threading.Event:
    global _enabled_skills_cache, _enabled_skills_refresh_active, _enabled_skills_refresh_version

    _get_cached_skills_prompt_section.cache_clear()
    with _enabled_skills_lock:
        _enabled_skills_cache = None
+        _enabled_skills_by_config_cache.clear()
        _enabled_skills_refresh_version += 1
        _enabled_skills_refresh_event.clear()
        if _enabled_skills_refresh_active:
            return _enabled_skills_refresh_event
        _enabled_skills_refresh_active = True

-    _start_enabled_skills_refresh_thread(app_config)
+    _start_enabled_skills_refresh_thread()
    return _enabled_skills_refresh_event


-def prime_enabled_skills_cache(app_config: AppConfig | None = None) -> None:
-    _ensure_enabled_skills_cache(app_config)
+def prime_enabled_skills_cache() -> None:
+    _ensure_enabled_skills_cache()


-def warm_enabled_skills_cache(app_config: AppConfig | None = None, timeout_seconds: float = _ENABLED_SKILLS_REFRESH_WAIT_TIMEOUT_SECONDS) -> bool:
-    if _ensure_enabled_skills_cache(app_config).wait(timeout=timeout_seconds):
+def warm_enabled_skills_cache(timeout_seconds: float = _ENABLED_SKILLS_REFRESH_WAIT_TIMEOUT_SECONDS) -> bool:
+    if _ensure_enabled_skills_cache().wait(timeout=timeout_seconds):
        return True

    logger.warning("Timed out waiting %.1fs for enabled skills cache warm-up", timeout_seconds)
    return False


-def _get_enabled_skills(app_config: AppConfig | None = None):
+def _get_enabled_skills():
+    return get_cached_enabled_skills()
+
+
+def get_cached_enabled_skills() -> list[Skill]:
+    """Return the cached enabled-skills list, kicking off a background refresh on miss.
+
+    Safe to call from request paths: never blocks on disk I/O. Returns an empty
+    list on cache miss; the next call will see the warmed result.
+    """
    with _enabled_skills_lock:
        cached = _enabled_skills_cache

    if cached is not None:
        return list(cached)

-    _ensure_enabled_skills_cache(app_config)
+    _ensure_enabled_skills_cache()
    return []


-def _skill_mutability_label(category: str) -> str:
-    return "[custom, editable]" if category == "custom" else "[built-in]"
+def get_enabled_skills_for_config(app_config: AppConfig | None = None) -> list[Skill]:
+    """Return enabled skills using the caller's config source.

+    When a concrete ``app_config`` is supplied, cache the loaded skills by that
+    config object's identity so request-scoped config injection still resolves
+    skill paths from the matching config without rescanning storage on every
+    agent factory call.
+    """
+    if app_config is None:
+        return _get_enabled_skills()

-def clear_skills_system_prompt_cache(app_config: AppConfig | None = None) -> None:
-    _invalidate_enabled_skills_cache(app_config)
-
-
-async def refresh_skills_system_prompt_cache_async(app_config: AppConfig | None = None) -> None:
-    await asyncio.to_thread(_invalidate_enabled_skills_cache(app_config).wait)
-
-
-def _reset_skills_system_prompt_cache_state() -> None:
-    global _enabled_skills_cache, _enabled_skills_refresh_active, _enabled_skills_refresh_version
-
-    _get_cached_skills_prompt_section.cache_clear()
+    cache_key = id(app_config)
    with _enabled_skills_lock:
-        _enabled_skills_cache = None
-        _enabled_skills_refresh_active = False
-        _enabled_skills_refresh_version = 0
-        _enabled_skills_refresh_event.clear()
-
-
-def _refresh_enabled_skills_cache(app_config: AppConfig | None = None) -> None:
-    """Backward-compatible test helper for direct synchronous reload."""
-    try:
-        skills = _load_enabled_skills_sync(app_config)
-    except Exception:
-        logger.exception("Failed to load enabled skills for prompt injection")
-        skills = []
+        cached = _enabled_skills_by_config_cache.get(cache_key)
+        if cached is not None:
+            cached_config, cached_skills = cached
+            if cached_config is app_config:
+                return list(cached_skills)

+    skills = list(get_or_new_skill_storage(app_config=app_config).load_skills(enabled_only=True))
    with _enabled_skills_lock:
-        _enabled_skills_cache = skills
-        _enabled_skills_refresh_active = False
-        _enabled_skills_refresh_event.set()
+        _enabled_skills_by_config_cache[cache_key] = (app_config, skills)
+    return list(skills)
+
+
+def _skill_mutability_label(category: SkillCategory | str) -> str:
+    return "[custom, editable]" if category == SkillCategory.CUSTOM else "[built-in]"
+
+
+def clear_skills_system_prompt_cache() -> None:
+    _invalidate_enabled_skills_cache()
+
+
+async def refresh_skills_system_prompt_cache_async() -> None:
+    await asyncio.to_thread(_invalidate_enabled_skills_cache().wait)


 def _build_skill_evolution_section(skill_evolution_enabled: bool) -> str:
@@ -166,7 +180,7 @@ Skip simple one-off tasks.
 """


-def _build_available_subagents_description(available_names: list[str], bash_available: bool, app_config: AppConfig) -> str:
+def _build_available_subagents_description(available_names: list[str], bash_available: bool, *, app_config: AppConfig | None = None) -> str:
    """Dynamically build subagent type descriptions from registry.

    Mirrors Codex's pattern where agent_type_description is dynamically generated
@@ -188,7 +202,7 @@ def _build_available_subagents_description(available_names: list[str], bash_avai
        if name in builtin_descriptions:
            lines.append(f"- **{name}**: {builtin_descriptions[name]}")
        else:
-            config = get_subagent_config(name, app_config)
+            config = get_subagent_config(name, app_config=app_config)
            if config is not None:
                desc = config.description.split("\n")[0].strip()  # First line only for brevity
                lines.append(f"- **{name}**: {desc}")
@@ -196,23 +210,22 @@ def _build_available_subagents_description(available_names: list[str], bash_avai
    return "\n".join(lines)


-def _build_subagent_section(max_concurrent: int, app_config: AppConfig) -> str:
+def _build_subagent_section(max_concurrent: int, *, app_config: AppConfig | None = None) -> str:
    """Build the subagent system prompt section with dynamic concurrency limit.

    Args:
        max_concurrent: Maximum number of concurrent subagent calls allowed per response.
-        app_config: Application config used to gate bash availability.

    Returns:
        Formatted subagent section string.
    """
    n = max_concurrent
-    available_names = get_available_subagent_names(app_config)
+    available_names = get_available_subagent_names(app_config=app_config) if app_config is not None else get_available_subagent_names()
    bash_available = "bash" in available_names

    # Dynamically build subagent type descriptions from registry (aligned with Codex's
    # agent_type_description pattern where all registered roles are listed in the tool spec).
-    available_subagents = _build_available_subagents_description(available_names, bash_available, app_config)
+    available_subagents = _build_available_subagents_description(available_names, bash_available, app_config=app_config)
    direct_tool_examples = "bash, ls, read_file, web_search, etc." if bash_available else "ls, read_file, web_search, etc."
    direct_execution_example = (
        '# User asks: "Run the tests"\n# Thinking: Cannot decompose into parallel sub-tasks\n# → Execute directly\n\nbash("npm test")  # Direct execution, not task()'
@@ -353,8 +366,7 @@ You are {agent_name}, an open-source super agent.
 </role>

 {soul}
-{memory_context}
-
+{self_update_section}
 <thinking_style>
 - Think concisely and strategically about the user's request BEFORE taking action
 - Break down the task: What is clear? What is ambiguous? What is missing?
@@ -539,34 +551,44 @@ combined with a FastAPI gateway for REST API access [citation:FastAPI](https://f
 """


-def _get_memory_context(app_config: AppConfig, agent_name: str | None = None) -> str:
+def _get_memory_context(agent_name: str | None = None, *, app_config: AppConfig | None = None) -> str:
    """Get memory context for injection into system prompt.

-    Returns an empty string when memory is disabled or the stored memory file
-    cannot be read/parsed. A corrupt memory.json degrades the prompt to
-    no-memory; it never kills the agent.
+    Args:
+        agent_name: If provided, loads per-agent memory. If None, loads global memory.
+        app_config: Explicit application config. When provided, memory options
+            are read from this value instead of the global config singleton.
+
+    Returns:
+        Formatted memory context string wrapped in XML tags, or empty string if disabled.
    """
-    from deerflow.agents.memory import format_memory_for_injection, get_memory_data
-    from deerflow.runtime.user_context import get_effective_user_id
-
-    memory_config = app_config.memory
-    if not memory_config.enabled or not memory_config.injection_enabled:
-        return ""
-
    try:
-        memory_data = get_memory_data(memory_config, agent_name, user_id=get_effective_user_id())
-    except (OSError, ValueError, UnicodeDecodeError):
-        logger.exception("Failed to load memory data for prompt injection")
-        return ""
+        from deerflow.agents.memory import format_memory_for_injection, get_memory_data
+        from deerflow.runtime.user_context import get_effective_user_id

-    memory_content = format_memory_for_injection(memory_data, max_tokens=memory_config.max_injection_tokens)
-    if not memory_content.strip():
-        return ""
+        if app_config is None:
+            from deerflow.config.memory_config import get_memory_config

-    return f"""<memory>
+            config = get_memory_config()
+        else:
+            config = app_config.memory
+
+        if not config.enabled or not config.injection_enabled:
+            return ""
+
+        memory_data = get_memory_data(agent_name, user_id=get_effective_user_id())
+        memory_content = format_memory_for_injection(memory_data, max_tokens=config.max_injection_tokens)
+
+        if not memory_content.strip():
+            return ""
+
+        return f"""<memory>
 {memory_content}
 </memory>
 """
+    except Exception:
+        logger.exception("Failed to load memory context")
+        return ""


@lru_cache(maxsize=32)
@@ -601,12 +623,24 @@ You have access to skills that provide optimized workflows for specific tasks. E
 </skill_system>"""


-def get_skills_prompt_section(app_config: AppConfig, available_skills: set[str] | None = None) -> str:
+def get_skills_prompt_section(available_skills: set[str] | None = None, *, app_config: AppConfig | None = None) -> str:
    """Generate the skills prompt section with available skills list."""
-    skills = _get_enabled_skills(app_config)
+    skills = get_enabled_skills_for_config(app_config)

-    container_base_path = app_config.skills.container_path
-    skill_evolution_enabled = app_config.skill_evolution.enabled
+    if app_config is None:
+        try:
+            from deerflow.config import get_app_config
+
+            config = get_app_config()
+            container_base_path = config.skills.container_path
+            skill_evolution_enabled = config.skill_evolution.enabled
+        except Exception:
+            container_base_path = "/mnt/skills"
+            skill_evolution_enabled = False
+    else:
+        config = app_config
+        container_base_path = config.skills.container_path
+        skill_evolution_enabled = config.skill_evolution.enabled

    if not skills and not skill_evolution_enabled:
        return ""
@@ -630,7 +664,27 @@ def get_agent_soul(agent_name: str | None) -> str:
    return ""


-def get_deferred_tools_prompt_section(app_config: AppConfig) -> str:
+def _build_self_update_section(agent_name: str | None) -> str:
+    """Prompt block that teaches the custom agent to persist self-updates via update_agent."""
+    if not agent_name:
+        return ""
+    return f"""<self_update>
+You are running as the custom agent **{agent_name}** with a persisted SOUL.md and config.yaml.
+
+When the user asks you to update your own description, personality, behaviour, skill set, tool groups, or default model,
+you MUST persist the change with the `update_agent` tool. Do NOT use `bash`, `write_file`, or any sandbox tool to edit
+SOUL.md or config.yaml — those write into a temporary sandbox/tool workspace and the changes will be lost on the next turn.
+
+Rules:
+- Always pass the FULL replacement text for `soul` (no patch semantics). Start from your current SOUL above and apply the user's edits.
+- Only pass the fields that should change. Omit the others to preserve them.
+- Pass `skills=[]` to disable all skills, or omit `skills` to keep the existing whitelist.
+- After `update_agent` returns successfully, tell the user the change is persisted and will take effect on the next turn.
+</self_update>
+"""
+
+
+def get_deferred_tools_prompt_section(*, app_config: AppConfig | None = None) -> str:
    """Generate <available-deferred-tools> block for the system prompt.

    Lists only deferred tool names so the agent knows what exists
@@ -639,7 +693,17 @@ def get_deferred_tools_prompt_section(app_config: AppConfig) -> str:
    """
    from deerflow.tools.builtins.tool_search import get_deferred_registry

-    if not app_config.tool_search.enabled:
+    if app_config is None:
+        try:
+            from deerflow.config import get_app_config
+
+            config = get_app_config()
+        except Exception:
+            return ""
+    else:
+        config = app_config
+
+    if not config.tool_search.enabled:
        return ""

    registry = get_deferred_registry()
@@ -650,9 +714,19 @@ def get_deferred_tools_prompt_section(app_config: AppConfig) -> str:
    return f"<available-deferred-tools>\n{names}\n</available-deferred-tools>"


-def _build_acp_section(app_config: AppConfig) -> str:
+def _build_acp_section(*, app_config: AppConfig | None = None) -> str:
    """Build the ACP agent prompt section, only if ACP agents are configured."""
-    if not app_config.acp_agents:
+    if app_config is None:
+        try:
+            from deerflow.config.acp_config import get_acp_agents
+
+            agents = get_acp_agents()
+        except Exception:
+            return ""
+    else:
+        agents = getattr(app_config, "acp_agents", {}) or {}
+
+    if not agents:
        return ""

    return (
@@ -664,9 +738,20 @@ def _build_acp_section(app_config: AppConfig) -> str:
    )


-def _build_custom_mounts_section(app_config: AppConfig) -> str:
+def _build_custom_mounts_section(*, app_config: AppConfig | None = None) -> str:
    """Build a prompt section for explicitly configured sandbox mounts."""
-    mounts = app_config.sandbox.mounts or []
+    if app_config is None:
+        try:
+            from deerflow.config import get_app_config
+
+            config = get_app_config()
+        except Exception:
+            logger.exception("Failed to load configured sandbox mounts for the lead-agent prompt")
+            return ""
+    else:
+        config = app_config
+
+    mounts = config.sandbox.mounts or []

    if not mounts:
        return ""
@@ -681,19 +766,16 @@ def _build_custom_mounts_section(app_config: AppConfig) -> str:


 def apply_prompt_template(
-    app_config: AppConfig,
    subagent_enabled: bool = False,
    max_concurrent_subagents: int = 3,
    *,
    agent_name: str | None = None,
    available_skills: set[str] | None = None,
+    app_config: AppConfig | None = None,
 ) -> str:
-    # Get memory context
-    memory_context = _get_memory_context(app_config, agent_name)
-
    # Include subagent section only if enabled (from runtime parameter)
    n = max_concurrent_subagents
-    subagent_section = _build_subagent_section(n, app_config) if subagent_enabled else ""
+    subagent_section = _build_subagent_section(n, app_config=app_config) if subagent_enabled else ""

    # Add subagent reminder to critical_reminders if enabled
    subagent_reminder = (
@@ -714,27 +796,28 @@ def apply_prompt_template(
    )

    # Get skills section
-    skills_section = get_skills_prompt_section(app_config, available_skills)
+    skills_section = get_skills_prompt_section(available_skills, app_config=app_config)

    # Get deferred tools section (tool_search)
-    deferred_tools_section = get_deferred_tools_prompt_section(app_config)
+    deferred_tools_section = get_deferred_tools_prompt_section(app_config=app_config)

    # Build ACP agent section only if ACP agents are configured
-    acp_section = _build_acp_section(app_config)
-    custom_mounts_section = _build_custom_mounts_section(app_config)
+    acp_section = _build_acp_section(app_config=app_config)
+    custom_mounts_section = _build_custom_mounts_section(app_config=app_config)
    acp_and_mounts_section = "\n".join(section for section in (acp_section, custom_mounts_section) if section)

-    # Format the prompt with dynamic skills and memory
-    prompt = SYSTEM_PROMPT_TEMPLATE.format(
+    # Build and return the fully static system prompt.
+    # Memory and current date are injected per-turn via DynamicContextMiddleware
+    # as a <system-reminder> in the first HumanMessage, keeping this prompt
+    # identical across users and sessions for maximum prefix-cache reuse.
+    return SYSTEM_PROMPT_TEMPLATE.format(
        agent_name=agent_name or "DeerFlow 2.0",
        soul=get_agent_soul(agent_name),
+        self_update_section=_build_self_update_section(agent_name),
        skills_section=skills_section,
        deferred_tools_section=deferred_tools_section,
-        memory_context=memory_context,
        subagent_section=subagent_section,
        subagent_reminder=subagent_reminder,
        subagent_thinking=subagent_thinking,
        acp_section=acp_and_mounts_section,
    )
-
-    return prompt + f"\n<current_date>{datetime.now().strftime('%Y-%m-%d, %A')}</current_date>"
@@ -7,17 +7,11 @@ from dataclasses import dataclass, field
 from datetime import UTC, datetime
 from typing import Any

-from deerflow.config.app_config import AppConfig
+from deerflow.config.memory_config import get_memory_config

 logger = logging.getLogger(__name__)


-# Module-level config pointer set by the middleware that owns the queue.
-# The queue runs on a background Timer thread where ``Runtime`` and FastAPI
-# request context are not accessible; the enqueuer (which does have runtime
-# context) is responsible for plumbing ``AppConfig`` through ``add()``.
-
-
@dataclass
 class ConversationContext:
    """Context for a conversation to be processed for memory update."""
@@ -37,21 +31,10 @@ class MemoryUpdateQueue:
    This queue collects conversation contexts and processes them after
    a configurable debounce period. Multiple conversations received within
    the debounce window are batched together.
-
-    The queue captures an ``AppConfig`` reference at construction time and
-    reuses it for the MemoryUpdater it spawns. Callers must construct a
-    fresh queue when the config changes rather than reaching into a global.
    """

-    def __init__(self, app_config: AppConfig):
-        """Initialize the memory update queue.
-
-        Args:
-            app_config: Application config. The queue reads its own
-                ``memory`` section for debounce timing and hands the full
-                config to :class:`MemoryUpdater`.
-        """
-        self._app_config = app_config
+    def __init__(self):
+        """Initialize the memory update queue."""
        self._queue: list[ConversationContext] = []
        self._lock = threading.Lock()
        self._timer: threading.Timer | None = None
@@ -66,8 +49,19 @@ class MemoryUpdateQueue:
        correction_detected: bool = False,
        reinforcement_detected: bool = False,
    ) -> None:
-        """Add a conversation to the update queue."""
-        config = self._app_config.memory
+        """Add a conversation to the update queue.
+
+        Args:
+            thread_id: The thread ID.
+            messages: The conversation messages.
+            agent_name: If provided, memory is stored per-agent. If None, uses global memory.
+            user_id: The user ID captured at enqueue time. Stored in ConversationContext so it
+                survives the threading.Timer boundary (ContextVar does not propagate across
+                raw threads).
+            correction_detected: Whether recent turns include an explicit correction signal.
+            reinforcement_detected: Whether recent turns include a positive reinforcement signal.
+        """
+        config = get_memory_config()
        if not config.enabled:
            return

@@ -94,7 +88,7 @@ class MemoryUpdateQueue:
        reinforcement_detected: bool = False,
    ) -> None:
        """Add a conversation and start processing immediately in the background."""
-        config = self._app_config.memory
+        config = get_memory_config()
        if not config.enabled:
            return

@@ -117,7 +111,7 @@ class MemoryUpdateQueue:
        thread_id: str,
        messages: list[Any],
        agent_name: str | None,
-        user_id: str | None = None,
+        user_id: str | None,
        correction_detected: bool,
        reinforcement_detected: bool,
    ) -> None:
@@ -141,7 +135,7 @@ class MemoryUpdateQueue:

    def _reset_timer(self) -> None:
        """Reset the debounce timer."""
-        config = self._app_config.memory
+        config = get_memory_config()
        self._schedule_timer(config.debounce_seconds)

        logger.debug("Memory update timer set for %ss", config.debounce_seconds)
@@ -181,7 +175,7 @@ class MemoryUpdateQueue:
        logger.info("Processing %d queued memory updates", len(contexts_to_process))

        try:
-            updater = MemoryUpdater(self._app_config)
+            updater = MemoryUpdater()

            for context in contexts_to_process:
                try:
@@ -253,35 +247,31 @@ class MemoryUpdateQueue:
            return self._processing


-# Queues keyed by ``id(AppConfig)`` so tests and multi-client setups with
-# distinct configs do not share a debounce queue.
-_memory_queues: dict[int, MemoryUpdateQueue] = {}
+# Global singleton instance
+_memory_queue: MemoryUpdateQueue | None = None
 _queue_lock = threading.Lock()


-def get_memory_queue(app_config: AppConfig) -> MemoryUpdateQueue:
-    """Get or create the memory update queue for the given app config."""
-    key = id(app_config)
-    with _queue_lock:
-        queue = _memory_queues.get(key)
-        if queue is None:
-            queue = MemoryUpdateQueue(app_config)
-            _memory_queues[key] = queue
-        return queue
+def get_memory_queue() -> MemoryUpdateQueue:
+    """Get the global memory update queue singleton.

-
-def reset_memory_queue(app_config: AppConfig | None = None) -> None:
-    """Reset memory queue(s).
-
-    Pass an ``app_config`` to reset only its queue, or omit to reset all
-    (useful at test teardown).
+    Returns:
+        The memory update queue instance.
    """
+    global _memory_queue
    with _queue_lock:
-        if app_config is not None:
-            queue = _memory_queues.pop(id(app_config), None)
-            if queue is not None:
-                queue.clear()
-            return
-        for queue in _memory_queues.values():
-            queue.clear()
-        _memory_queues.clear()
+        if _memory_queue is None:
+            _memory_queue = MemoryUpdateQueue()
+        return _memory_queue
+
+
+def reset_memory_queue() -> None:
+    """Reset the global memory queue.
+
+    This is useful for testing.
+    """
+    global _memory_queue
+    with _queue_lock:
+        if _memory_queue is not None:
+            _memory_queue.clear()
+        _memory_queue = None
@@ -10,7 +10,7 @@ from pathlib import Path
 from typing import Any

 from deerflow.config.agents_config import AGENT_NAME_PATTERN
-from deerflow.config.memory_config import MemoryConfig
+from deerflow.config.memory_config import get_memory_config
 from deerflow.config.paths import get_paths

 logger = logging.getLogger(__name__)
@@ -62,15 +62,8 @@ class MemoryStorage(abc.ABC):
 class FileMemoryStorage(MemoryStorage):
    """File-based memory storage provider."""

-    def __init__(self, memory_config: MemoryConfig):
-        """Initialize the file memory storage.
-
-        Args:
-            memory_config: Memory configuration (storage_path etc.). Stored on
-                the instance so per-request lookups don't need to reach for
-                ambient state.
-        """
-        self._memory_config = memory_config
+    def __init__(self):
+        """Initialize the file memory storage."""
        # Per-user/agent memory cache: keyed by (user_id, agent_name) tuple (None = global)
        # Value: (memory_data, file_mtime)
        self._memory_cache: dict[tuple[str | None, str | None], tuple[dict[str, Any], float | None]] = {}
@@ -90,11 +83,11 @@ class FileMemoryStorage(MemoryStorage):

    def _get_memory_file_path(self, agent_name: str | None = None, *, user_id: str | None = None) -> Path:
        """Get the path to the memory file."""
-        config = self._memory_config
        if user_id is not None:
            if agent_name is not None:
                self._validate_agent_name(agent_name)
                return get_paths().user_agent_memory_file(user_id, agent_name)
+            config = get_memory_config()
            if config.storage_path and Path(config.storage_path).is_absolute():
                return Path(config.storage_path)
            return get_paths().user_memory_file(user_id)
@@ -102,6 +95,7 @@ class FileMemoryStorage(MemoryStorage):
        if agent_name is not None:
            self._validate_agent_name(agent_name)
            return get_paths().agent_memory_file(agent_name)
+        config = get_memory_config()
        if config.storage_path:
            p = Path(config.storage_path)
            return p if p.is_absolute() else get_paths().base_dir / p
@@ -122,16 +116,20 @@ class FileMemoryStorage(MemoryStorage):
            logger.warning("Failed to load memory file: %s", e)
            return create_empty_memory()

+    @staticmethod
+    def _cache_key(agent_name: str | None = None, *, user_id: str | None = None) -> tuple[str | None, str | None]:
+        return (user_id, agent_name)
+
    def load(self, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
        """Load memory data (cached with file modification time check)."""
        file_path = self._get_memory_file_path(agent_name, user_id=user_id)
+        cache_key = self._cache_key(agent_name, user_id=user_id)

        try:
            current_mtime = file_path.stat().st_mtime if file_path.exists() else None
        except OSError:
            current_mtime = None

-        cache_key = (user_id, agent_name)
        with self._cache_lock:
            cached = self._memory_cache.get(cache_key)
            if cached is not None and cached[1] == current_mtime:
@@ -148,13 +146,13 @@ class FileMemoryStorage(MemoryStorage):
        """Reload memory data from file, forcing cache invalidation."""
        file_path = self._get_memory_file_path(agent_name, user_id=user_id)
        memory_data = self._load_memory_from_file(agent_name, user_id=user_id)
+        cache_key = self._cache_key(agent_name, user_id=user_id)

        try:
            mtime = file_path.stat().st_mtime if file_path.exists() else None
        except OSError:
            mtime = None

-        cache_key = (user_id, agent_name)
        with self._cache_lock:
            self._memory_cache[cache_key] = (memory_data, mtime)
        return memory_data
@@ -162,6 +160,7 @@ class FileMemoryStorage(MemoryStorage):
    def save(self, memory_data: dict[str, Any], agent_name: str | None = None, *, user_id: str | None = None) -> bool:
        """Save memory data to file and update cache."""
        file_path = self._get_memory_file_path(agent_name, user_id=user_id)
+        cache_key = self._cache_key(agent_name, user_id=user_id)

        try:
            file_path.parent.mkdir(parents=True, exist_ok=True)
@@ -181,7 +180,6 @@ class FileMemoryStorage(MemoryStorage):
            except OSError:
                mtime = None

-            cache_key = (user_id, agent_name)
            with self._cache_lock:
                self._memory_cache[cache_key] = (memory_data, mtime)
            logger.info("Memory saved to %s", file_path)
@@ -191,31 +189,23 @@ class FileMemoryStorage(MemoryStorage):
            return False


-# Instances keyed by (storage_class_path, id(memory_config)) so tests can
-# construct isolated storages and multi-client setups with different configs
-# don't collide on a single process-wide singleton.
-_storage_instances: dict[tuple[str, int], MemoryStorage] = {}
+_storage_instance: MemoryStorage | None = None
 _storage_lock = threading.Lock()


-def get_memory_storage(memory_config: MemoryConfig) -> MemoryStorage:
-    """Get the configured memory storage instance.
-
-    Caches one instance per ``(storage_class, memory_config)`` pair. In
-    single-config deployments this collapses to one instance; in multi-client
-    or test scenarios each config gets its own storage.
-    """
-    key = (memory_config.storage_class, id(memory_config))
-    existing = _storage_instances.get(key)
-    if existing is not None:
-        return existing
+def get_memory_storage() -> MemoryStorage:
+    """Get the configured memory storage instance."""
+    global _storage_instance
+    if _storage_instance is not None:
+        return _storage_instance

    with _storage_lock:
-        existing = _storage_instances.get(key)
-        if existing is not None:
-            return existing
+        if _storage_instance is not None:
+            return _storage_instance
+
+        config = get_memory_config()
+        storage_class_path = config.storage_class

-        storage_class_path = memory_config.storage_class
        try:
            module_path, class_name = storage_class_path.rsplit(".", 1)
            import importlib
@@ -229,14 +219,13 @@ def get_memory_storage(memory_config: MemoryConfig) -> MemoryStorage:
            if not issubclass(storage_class, MemoryStorage):
                raise TypeError(f"Configured memory storage '{storage_class_path}' is not a subclass of MemoryStorage")

-            instance = storage_class(memory_config)
+            _storage_instance = storage_class()
        except Exception as e:
            logger.error(
                "Failed to load memory storage %s, falling back to FileMemoryStorage: %s",
                storage_class_path,
                e,
            )
-            instance = FileMemoryStorage(memory_config)
+            _storage_instance = FileMemoryStorage()

-        _storage_instances[key] = instance
-        return instance
+    return _storage_instance
@@ -5,19 +5,12 @@ from __future__ import annotations
 from deerflow.agents.memory.message_processing import detect_correction, detect_reinforcement, filter_messages_for_memory
 from deerflow.agents.memory.queue import get_memory_queue
 from deerflow.agents.middlewares.summarization_middleware import SummarizationEvent
-from deerflow.config.app_config import AppConfig
+from deerflow.config.memory_config import get_memory_config


 def memory_flush_hook(event: SummarizationEvent) -> None:
-    """Flush messages about to be summarized into the memory queue.
-
-    Reads ``AppConfig`` from disk on every invocation. This hook is fired by
-    ``SummarizationMiddleware`` which has no ergonomic way to thread an
-    explicit ``app_config`` through; ``AppConfig.from_file()`` is a pure load
-    so the cost is acceptable for this rare pre-summarization callback.
-    """
-    app_config = AppConfig.from_file()
-    if not app_config.memory.enabled or not event.thread_id:
+    """Flush messages about to be summarized into the memory queue."""
+    if not get_memory_config().enabled or not event.thread_id:
        return

    filtered_messages = filter_messages_for_memory(list(event.messages_to_summarize))
@@ -28,7 +21,7 @@ def memory_flush_hook(event: SummarizationEvent) -> None:

    correction_detected = detect_correction(filtered_messages)
    reinforcement_detected = not correction_detected and detect_reinforcement(filtered_messages)
-    queue = get_memory_queue(app_config)
+    queue = get_memory_queue()
    queue.add_nowait(
        thread_id=event.thread_id,
        messages=filtered_messages,
@@ -9,7 +9,6 @@ import logging
 import math
 import re
 import uuid
-from collections.abc import Awaitable
 from typing import Any

 from deerflow.agents.memory.prompt import (
@@ -21,12 +20,17 @@ from deerflow.agents.memory.storage import (
    get_memory_storage,
    utc_now_iso_z,
 )
-from deerflow.config.app_config import AppConfig
-from deerflow.config.memory_config import MemoryConfig
+from deerflow.config.memory_config import get_memory_config
 from deerflow.models import create_chat_model

 logger = logging.getLogger(__name__)

+
+# Thread pool for offloading sync memory updates when called from an async
+# context.  Unlike the previous asyncio.run() approach, this runs *sync*
+# model.invoke() calls — no event loop is created, so the langchain async
+# httpx client pool (globally cached via @lru_cache) is never touched and
+# cross-loop connection reuse is impossible.
 _SYNC_MEMORY_UPDATER_EXECUTOR = concurrent.futures.ThreadPoolExecutor(
    max_workers=4,
    thread_name_prefix="memory-updater-sync",
@@ -39,33 +43,45 @@ def _create_empty_memory() -> dict[str, Any]:
    return create_empty_memory()


-def _save_memory_to_file(memory_config: MemoryConfig, memory_data: dict[str, Any], agent_name: str | None = None, *, user_id: str | None = None) -> bool:
-    """Save via the configured memory storage."""
-    return get_memory_storage(memory_config).save(memory_data, agent_name, user_id=user_id)
+def _save_memory_to_file(memory_data: dict[str, Any], agent_name: str | None = None, *, user_id: str | None = None) -> bool:
+    """Backward-compatible wrapper around the configured memory storage save path."""
+    return get_memory_storage().save(memory_data, agent_name, user_id=user_id)


-def get_memory_data(memory_config: MemoryConfig, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
+def get_memory_data(agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
    """Get the current memory data via storage provider."""
-    return get_memory_storage(memory_config).load(agent_name, user_id=user_id)
+    return get_memory_storage().load(agent_name, user_id=user_id)


-def reload_memory_data(memory_config: MemoryConfig, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
+def reload_memory_data(agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
    """Reload memory data via storage provider."""
-    return get_memory_storage(memory_config).reload(agent_name, user_id=user_id)
+    return get_memory_storage().reload(agent_name, user_id=user_id)


-def import_memory_data(memory_config: MemoryConfig, memory_data: dict[str, Any], agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
-    """Persist imported memory data via storage provider."""
-    storage = get_memory_storage(memory_config)
+def import_memory_data(memory_data: dict[str, Any], agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
+    """Persist imported memory data via storage provider.
+
+    Args:
+        memory_data: Full memory payload to persist.
+        agent_name: If provided, imports into per-agent memory.
+        user_id: If provided, scopes memory to a specific user.
+
+    Returns:
+        The saved memory data after storage normalization.
+
+    Raises:
+        OSError: If persisting the imported memory fails.
+    """
+    storage = get_memory_storage()
    if not storage.save(memory_data, agent_name, user_id=user_id):
        raise OSError("Failed to save imported memory data")
    return storage.load(agent_name, user_id=user_id)


-def clear_memory_data(memory_config: MemoryConfig, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
+def clear_memory_data(agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
    """Clear all stored memory data and persist an empty structure."""
    cleared_memory = create_empty_memory()
-    if not _save_memory_to_file(memory_config, cleared_memory, agent_name, user_id=user_id):
+    if not _save_memory_to_file(cleared_memory, agent_name, user_id=user_id):
        raise OSError("Failed to save cleared memory data")
    return cleared_memory

@@ -78,7 +94,6 @@ def _validate_confidence(confidence: float) -> float:


 def create_memory_fact(
-    memory_config: MemoryConfig,
    content: str,
    category: str = "context",
    confidence: float = 0.5,
@@ -94,7 +109,7 @@ def create_memory_fact(
    normalized_category = category.strip() or "context"
    validated_confidence = _validate_confidence(confidence)
    now = utc_now_iso_z()
-    memory_data = get_memory_data(memory_config, agent_name, user_id=user_id)
+    memory_data = get_memory_data(agent_name, user_id=user_id)
    updated_memory = dict(memory_data)
    facts = list(memory_data.get("facts", []))
    facts.append(
@@ -109,15 +124,15 @@ def create_memory_fact(
    )
    updated_memory["facts"] = facts

-    if not _save_memory_to_file(memory_config, updated_memory, agent_name, user_id=user_id):
+    if not _save_memory_to_file(updated_memory, agent_name, user_id=user_id):
        raise OSError("Failed to save memory data after creating fact")

    return updated_memory


-def delete_memory_fact(memory_config: MemoryConfig, fact_id: str, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
+def delete_memory_fact(fact_id: str, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
    """Delete a fact by its id and persist the updated memory data."""
-    memory_data = get_memory_data(memory_config, agent_name, user_id=user_id)
+    memory_data = get_memory_data(agent_name, user_id=user_id)
    facts = memory_data.get("facts", [])
    updated_facts = [fact for fact in facts if fact.get("id") != fact_id]
    if len(updated_facts) == len(facts):
@@ -126,14 +141,13 @@ def delete_memory_fact(memory_config: MemoryConfig, fact_id: str, agent_name: st
    updated_memory = dict(memory_data)
    updated_memory["facts"] = updated_facts

-    if not _save_memory_to_file(memory_config, updated_memory, agent_name, user_id=user_id):
+    if not _save_memory_to_file(updated_memory, agent_name, user_id=user_id):
        raise OSError(f"Failed to save memory data after deleting fact '{fact_id}'")

    return updated_memory


 def update_memory_fact(
-    memory_config: MemoryConfig,
    fact_id: str,
    content: str | None = None,
    category: str | None = None,
@@ -143,7 +157,7 @@ def update_memory_fact(
    user_id: str | None = None,
 ) -> dict[str, Any]:
    """Update an existing fact and persist the updated memory data."""
-    memory_data = get_memory_data(memory_config, agent_name, user_id=user_id)
+    memory_data = get_memory_data(agent_name, user_id=user_id)
    updated_memory = dict(memory_data)
    updated_facts: list[dict[str, Any]] = []
    found = False
@@ -170,7 +184,7 @@ def update_memory_fact(

    updated_memory["facts"] = updated_facts

-    if not _save_memory_to_file(memory_config, updated_memory, agent_name, user_id=user_id):
+    if not _save_memory_to_file(updated_memory, agent_name, user_id=user_id):
        raise OSError(f"Failed to save memory data after updating fact '{fact_id}'")

    return updated_memory
@@ -213,39 +227,6 @@ def _extract_text(content: Any) -> str:
    return str(content)


-def _run_async_update_sync(coro: Awaitable[bool]) -> bool:
-    """Run an async memory update from sync code, including nested-loop contexts."""
-    handed_off = False
-
-    try:
-        try:
-            loop = asyncio.get_running_loop()
-        except RuntimeError:
-            loop = None
-
-        if loop is not None and loop.is_running():
-            future = _SYNC_MEMORY_UPDATER_EXECUTOR.submit(asyncio.run, coro)
-            handed_off = True
-            return future.result()
-
-        handed_off = True
-        return asyncio.run(coro)
-    except Exception:
-        if not handed_off:
-            close = getattr(coro, "close", None)
-            if callable(close):
-                try:
-                    close()
-                except Exception:
-                    logger.debug(
-                        "Failed to close un-awaited memory update coroutine",
-                        exc_info=True,
-                    )
-
-        logger.exception("Failed to run async memory update from sync context")
-        return False
-
-
 # Matches sentences that describe a file-upload *event* rather than general
 # file-related work.  Deliberately narrow to avoid removing legitimate facts
 # such as "User works with CSV files" or "prefers PDF export".
@@ -295,25 +276,19 @@ def _fact_content_key(content: Any) -> str | None:
 class MemoryUpdater:
    """Updates memory using LLM based on conversation context."""

-    def __init__(self, app_config: AppConfig, model_name: str | None = None):
+    def __init__(self, model_name: str | None = None):
        """Initialize the memory updater.

        Args:
-            app_config: Application config (the updater needs both ``memory``
-                section for behavior and the full config for ``create_chat_model``).
            model_name: Optional model name to use. If None, uses config or default.
        """
-        self._app_config = app_config
        self._model_name = model_name

-    @property
-    def _memory_config(self) -> MemoryConfig:
-        return self._app_config.memory
-
    def _get_model(self):
        """Get the model for memory updates."""
-        model_name = self._model_name or self._memory_config.model_name
-        return create_chat_model(name=model_name, thinking_enabled=False, app_config=self._app_config)
+        config = get_memory_config()
+        model_name = self._model_name or config.model_name
+        return create_chat_model(name=model_name, thinking_enabled=False)

    def _build_correction_hint(
        self,
@@ -349,11 +324,11 @@ class MemoryUpdater:
        user_id: str | None = None,
    ) -> tuple[dict[str, Any], str] | None:
        """Load memory and build the update prompt for a conversation."""
-        config = self._memory_config
+        config = get_memory_config()
        if not config.enabled or not messages:
            return None

-        current_memory = get_memory_data(config, agent_name, user_id=user_id)
+        current_memory = get_memory_data(agent_name, user_id=user_id)
        conversation_text = format_conversation_for_update(messages)
        if not conversation_text.strip():
            return None
@@ -389,7 +364,7 @@ class MemoryUpdater:
        # cannot corrupt the still-cached original object reference.
        updated_memory = self._apply_updates(copy.deepcopy(current_memory), update_data, thread_id)
        updated_memory = _strip_upload_mentions_from_memory(updated_memory)
-        return get_memory_storage(self._memory_config).save(updated_memory, agent_name, user_id=user_id)
+        return get_memory_storage().save(updated_memory, agent_name, user_id=user_id)

    async def aupdate_memory(
        self,
@@ -400,10 +375,43 @@ class MemoryUpdater:
        reinforcement_detected: bool = False,
        user_id: str | None = None,
    ) -> bool:
-        """Update memory asynchronously based on conversation messages."""
+        """Update memory asynchronously by delegating to the sync path.
+
+        Uses ``asyncio.to_thread`` to run the *sync* ``model.invoke()`` path
+        in a worker thread so no second event loop is created and the
+        langchain async httpx client pool (shared with the lead agent) is
+        never touched.  This eliminates the cross-loop connection-reuse bug
+        described in issue #2615.
+        """
+        return await asyncio.to_thread(
+            self._do_update_memory_sync,
+            messages=messages,
+            thread_id=thread_id,
+            agent_name=agent_name,
+            correction_detected=correction_detected,
+            reinforcement_detected=reinforcement_detected,
+            user_id=user_id,
+        )
+
+    def _do_update_memory_sync(
+        self,
+        messages: list[Any],
+        thread_id: str | None = None,
+        agent_name: str | None = None,
+        correction_detected: bool = False,
+        reinforcement_detected: bool = False,
+        user_id: str | None = None,
+    ) -> bool:
+        """Pure-sync memory update using ``model.invoke()``.
+
+        Uses the *sync* LLM call path so no event loop is created.  This
+        guarantees that the langchain provider's globally cached async
+        httpx ``AsyncClient`` / connection pool (the one shared with the
+        lead agent) is never touched — no cross-loop connection reuse is
+        possible.
+        """
        try:
-            prepared = await asyncio.to_thread(
-                self._prepare_update_prompt,
+            prepared = self._prepare_update_prompt(
                messages=messages,
                agent_name=agent_name,
                correction_detected=correction_detected,
@@ -415,9 +423,8 @@ class MemoryUpdater:

            current_memory, prompt = prepared
            model = self._get_model()
-            response = await model.ainvoke(prompt, config={"run_name": "memory_agent"})
-            return await asyncio.to_thread(
-                self._finalize_update,
+            response = model.invoke(prompt, config={"run_name": "memory_agent"})
+            return self._finalize_update(
                current_memory=current_memory,
                response_content=response.content,
                thread_id=thread_id,
@@ -440,7 +447,16 @@ class MemoryUpdater:
        reinforcement_detected: bool = False,
        user_id: str | None = None,
    ) -> bool:
-        """Synchronously update memory via the async updater path.
+        """Synchronously update memory using the sync LLM path.
+
+        Uses ``model.invoke()`` (sync HTTP) which operates on a completely
+        separate connection pool from the async ``AsyncClient`` shared by
+        the lead agent.  This eliminates the cross-loop connection-reuse
+        bug described in issue #2615.
+
+        When called from within a running event loop (e.g. from a LangGraph
+        node), the blocking sync call is offloaded to a thread pool so the
+        caller's loop is not blocked.

        Args:
            messages: List of conversation messages.
@@ -453,78 +469,35 @@ class MemoryUpdater:
        Returns:
            True if update was successful, False otherwise.
        """
-        config = self._memory_config
-        if not config.enabled:
-            return False
-
-        if not messages:
-            return False
-
        try:
-            # Get current memory
-            current_memory = get_memory_data(config, agent_name, user_id=user_id)
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            loop = None

-            # Format conversation for prompt
-            conversation_text = format_conversation_for_update(messages)
-
-            if not conversation_text.strip():
+        if loop is not None and loop.is_running():
+            try:
+                future = _SYNC_MEMORY_UPDATER_EXECUTOR.submit(
+                    self._do_update_memory_sync,
+                    messages=messages,
+                    thread_id=thread_id,
+                    agent_name=agent_name,
+                    correction_detected=correction_detected,
+                    reinforcement_detected=reinforcement_detected,
+                    user_id=user_id,
+                )
+                return future.result()
+            except Exception:
+                logger.exception("Failed to offload memory update to executor")
                return False

-            # Build prompt
-            correction_hint = ""
-            if correction_detected:
-                correction_hint = (
-                    "IMPORTANT: Explicit correction signals were detected in this conversation. "
-                    "Pay special attention to what the agent got wrong, what the user corrected, "
-                    "and record the correct approach as a fact with category "
-                    '"correction" and confidence >= 0.95 when appropriate.'
-                )
-            if reinforcement_detected:
-                reinforcement_hint = (
-                    "IMPORTANT: Positive reinforcement signals were detected in this conversation. "
-                    "The user explicitly confirmed the agent's approach was correct or helpful. "
-                    "Record the confirmed approach, style, or preference as a fact with category "
-                    '"preference" or "behavior" and confidence >= 0.9 when appropriate.'
-                )
-                correction_hint = (correction_hint + "\n" + reinforcement_hint).strip() if correction_hint else reinforcement_hint
-
-            prompt = MEMORY_UPDATE_PROMPT.format(
-                current_memory=json.dumps(current_memory, indent=2),
-                conversation=conversation_text,
-                correction_hint=correction_hint,
-            )
-
-            # Call LLM
-            model = self._get_model()
-            response = model.invoke(prompt)
-            response_text = _extract_text(response.content).strip()
-
-            # Parse response
-            # Remove markdown code blocks if present
-            if response_text.startswith("```"):
-                lines = response_text.split("\n")
-                response_text = "\n".join(lines[1:-1] if lines[-1] == "```" else lines[1:])
-
-            update_data = json.loads(response_text)
-
-            # Apply updates
-            updated_memory = self._apply_updates(current_memory, update_data, thread_id)
-
-            # Strip file-upload mentions from all summaries before saving.
-            # Uploaded files are session-scoped and won't exist in future sessions,
-            # so recording upload events in long-term memory causes the agent to
-            # try (and fail) to locate those files in subsequent conversations.
-            updated_memory = _strip_upload_mentions_from_memory(updated_memory)
-
-            # Save
-            return get_memory_storage(config).save(updated_memory, agent_name, user_id=user_id)
-
-        except json.JSONDecodeError as e:
-            logger.warning("Failed to parse LLM response for memory update: %s", e)
-            return False
-        except Exception as e:
-            logger.exception("Memory update failed: %s", e)
-            return False
+        return self._do_update_memory_sync(
+            messages=messages,
+            thread_id=thread_id,
+            agent_name=agent_name,
+            correction_detected=correction_detected,
+            reinforcement_detected=reinforcement_detected,
+            user_id=user_id,
+        )

    def _apply_updates(
        self,
@@ -542,7 +515,7 @@ class MemoryUpdater:
        Returns:
            Updated memory data.
        """
-        config = self._memory_config
+        config = get_memory_config()
        now = utc_now_iso_z()

        # Update user sections
@@ -0,0 +1,204 @@
+"""Middleware to inject dynamic context (memory, current date) as a system-reminder.
+
+The system prompt is kept fully static for maximum prefix-cache reuse across users
+and sessions.  The current date is always injected.  Per-user memory is also injected
+when ``memory.injection_enabled`` is True in the app config.  Both are delivered once
+per conversation as a dedicated <system-reminder> HumanMessage inserted before the
+first user message (frozen-snapshot pattern).
+
+When a conversation spans midnight the middleware detects the date change and injects
+a lightweight date-update reminder as a separate HumanMessage before the current turn.
+This correction is persisted so subsequent turns on the new day see a consistent history
+and do not re-inject.
+
+Reminder format:
+
+    <system-reminder>
+    <memory>...</memory>
+
+    <current_date>2026-05-08, Friday</current_date>
+    </system-reminder>
+
+Date-update format:
+
+    <system-reminder>
+    <current_date>2026-05-09, Saturday</current_date>
+    </system-reminder>
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import uuid
+from datetime import datetime
+from typing import TYPE_CHECKING, override
+
+from langchain.agents.middleware import AgentMiddleware
+from langchain_core.messages import HumanMessage
+from langgraph.runtime import Runtime
+
+if TYPE_CHECKING:
+    from deerflow.config.app_config import AppConfig
+
+logger = logging.getLogger(__name__)
+
+_DATE_RE = re.compile(r"<current_date>([^<]+)</current_date>")
+_DYNAMIC_CONTEXT_REMINDER_KEY = "dynamic_context_reminder"
+_SUMMARY_MESSAGE_NAME = "summary"
+
+
+def _extract_date(content: str) -> str | None:
+    """Return the first <current_date> value found in *content*, or None."""
+    m = _DATE_RE.search(content)
+    return m.group(1) if m else None
+
+
+def is_dynamic_context_reminder(message: object) -> bool:
+    """Return whether *message* is a hidden dynamic-context reminder."""
+    return isinstance(message, HumanMessage) and bool(message.additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY))
+
+
+def _last_injected_date(messages: list) -> str | None:
+    """Scan messages in reverse and return the most recently injected date.
+
+    Detection uses the ``dynamic_context_reminder`` additional_kwargs flag rather
+    than content substring matching, so user messages containing ``<system-reminder>``
+    are not mistakenly treated as injected reminders.
+    """
+    for msg in reversed(messages):
+        if is_dynamic_context_reminder(msg):
+            content_str = msg.content if isinstance(msg.content, str) else str(msg.content)
+            return _extract_date(content_str)
+    return None
+
+
+def _is_user_injection_target(message: object) -> bool:
+    """Return whether *message* can receive a dynamic-context reminder."""
+    return isinstance(message, HumanMessage) and not is_dynamic_context_reminder(message) and message.name != _SUMMARY_MESSAGE_NAME
+
+
+class DynamicContextMiddleware(AgentMiddleware):
+    """Inject memory and current date into HumanMessages as a <system-reminder>.
+
+    First turn
+    ----------
+    Prepends a full system-reminder (memory + date) to the first HumanMessage and
+    persists it (same message ID).  The first message is then frozen for the whole
+    session — its content never changes again, so the prefix cache can hit on every
+    subsequent turn.
+
+    Midnight crossing
+    -----------------
+    If the conversation spans midnight, the current date differs from the date that
+    was injected earlier.  In that case a lightweight date-update reminder is prepended
+    to the **current** (last) HumanMessage and persisted.  Subsequent turns on the new
+    day see the corrected date in history and skip re-injection.
+    """
+
+    def __init__(self, agent_name: str | None = None, *, app_config: AppConfig | None = None):
+        super().__init__()
+        self._agent_name = agent_name
+        self._app_config = app_config
+
+    def _build_full_reminder(self) -> str:
+        from deerflow.agents.lead_agent.prompt import _get_memory_context
+
+        # Memory injection is gated by injection_enabled; date is always included.
+        injection_enabled = self._app_config.memory.injection_enabled if self._app_config else True
+        memory_context = _get_memory_context(self._agent_name, app_config=self._app_config) if injection_enabled else ""
+        current_date = datetime.now().strftime("%Y-%m-%d, %A")
+
+        lines: list[str] = ["<system-reminder>"]
+        if memory_context:
+            lines.append(memory_context.strip())
+            lines.append("")  # blank line separating memory from date
+        lines.append(f"<current_date>{current_date}</current_date>")
+        lines.append("</system-reminder>")
+
+        return "\n".join(lines)
+
+    def _build_date_update_reminder(self) -> str:
+        current_date = datetime.now().strftime("%Y-%m-%d, %A")
+        return "\n".join(
+            [
+                "<system-reminder>",
+                f"<current_date>{current_date}</current_date>",
+                "</system-reminder>",
+            ]
+        )
+
+    @staticmethod
+    def _make_reminder_and_user_messages(original: HumanMessage, reminder_content: str) -> tuple[HumanMessage, HumanMessage]:
+        """Return (reminder_msg, user_msg) using the ID-swap technique.
+
+        reminder_msg takes the original message's ID so that add_messages replaces it
+        in-place (preserving position).  user_msg carries the original content with a
+        derived ``{id}__user`` ID and is appended immediately after by add_messages.
+
+        If the original message has no ID a stable UUID is generated so the derived
+        ``{id}__user`` ID never collapses to the ambiguous ``None__user`` string.
+        """
+        stable_id = original.id or str(uuid.uuid4())
+        reminder_msg = HumanMessage(
+            content=reminder_content,
+            id=stable_id,
+            additional_kwargs={"hide_from_ui": True, _DYNAMIC_CONTEXT_REMINDER_KEY: True},
+        )
+        user_msg = HumanMessage(
+            content=original.content,
+            id=f"{stable_id}__user",
+            name=original.name,
+            additional_kwargs=original.additional_kwargs,
+        )
+        return reminder_msg, user_msg
+
+    def _inject(self, state) -> dict | None:
+        messages = list(state.get("messages", []))
+        if not messages:
+            return None
+
+        current_date = datetime.now().strftime("%Y-%m-%d, %A")
+        last_date = _last_injected_date(messages)
+        logger.debug(
+            "DynamicContextMiddleware._inject: msg_count=%d last_date=%r current_date=%r",
+            len(messages),
+            last_date,
+            current_date,
+        )
+
+        if last_date is None:
+            # ── First turn: inject full reminder as a separate HumanMessage ─────
+            first_idx = next((i for i, m in enumerate(messages) if _is_user_injection_target(m)), None)
+            if first_idx is None:
+                return None
+            full_reminder = self._build_full_reminder()
+            logger.info(
+                "DynamicContextMiddleware: injecting full reminder (len=%d, has_memory=%s) into first HumanMessage id=%r",
+                len(full_reminder),
+                "<memory>" in full_reminder,
+                messages[first_idx].id,
+            )
+            reminder_msg, user_msg = self._make_reminder_and_user_messages(messages[first_idx], full_reminder)
+            return {"messages": [reminder_msg, user_msg]}
+
+        if last_date == current_date:
+            # ── Same day: nothing to do ──────────────────────────────────────────
+            return None
+
+        # ── Midnight crossed: inject date-update reminder as a separate HumanMessage ──
+        last_human_idx = next((i for i in reversed(range(len(messages))) if _is_user_injection_target(messages[i])), None)
+        if last_human_idx is None:
+            return None
+
+        reminder_msg, user_msg = self._make_reminder_and_user_messages(messages[last_human_idx], self._build_date_update_reminder())
+        logger.info("DynamicContextMiddleware: midnight crossing detected — injected date update before current turn")
+        return {"messages": [reminder_msg, user_msg]}
+
+    @override
+    def before_agent(self, state, runtime: Runtime) -> dict | None:
+        return self._inject(state)
+
+    @override
+    async def abefore_agent(self, state, runtime: Runtime) -> dict | None:
+        return self._inject(state)
@@ -70,20 +70,11 @@ class LLMErrorHandlingMiddleware(AgentMiddleware[AgentState]):
    retry_base_delay_ms: int = 1000
    retry_cap_delay_ms: int = 8000

-    circuit_failure_threshold: int = 5
-    circuit_recovery_timeout_sec: int = 60
-
-    def __init__(self, **kwargs: Any) -> None:
+    def __init__(self, *, app_config: AppConfig, **kwargs: Any) -> None:
        super().__init__(**kwargs)

-        # Load Circuit Breaker configs from app config if available, fall back to defaults
-        try:
-            app_config = AppConfig.from_file()
-            self.circuit_failure_threshold = app_config.circuit_breaker.failure_threshold
-            self.circuit_recovery_timeout_sec = app_config.circuit_breaker.recovery_timeout_sec
-        except (FileNotFoundError, RuntimeError):
-            # Gracefully fall back to class defaults in test environments
-            pass
+        self.circuit_failure_threshold = app_config.circuit_breaker.failure_threshold
+        self.circuit_recovery_timeout_sec = app_config.circuit_breaker.recovery_timeout_sec

        # Circuit Breaker state
        self._circuit_lock = threading.Lock()
@@ -12,20 +12,22 @@ Detection strategy:
     response so the agent is forced to produce a final text answer.
 """

+from __future__ import annotations
+
 import hashlib
 import json
 import logging
 import threading
 from collections import OrderedDict, defaultdict
 from copy import deepcopy
-from typing import override
+from typing import TYPE_CHECKING, override

 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
-from langchain_core.messages import HumanMessage
 from langgraph.runtime import Runtime

-from deerflow.config.deer_flow_context import DeerFlowContext
+if TYPE_CHECKING:
+    from deerflow.config.loop_detection_config import LoopDetectionConfig

 logger = logging.getLogger(__name__)

@@ -142,6 +144,9 @@ _TOOL_FREQ_HARD_STOP_MSG = "[FORCED STOP] Tool {tool_name} called {count} times
 class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
    """Detects and breaks repetitive tool call loops.

+    Threshold parameters are validated upstream by :class:`LoopDetectionConfig`;
+    construct via :meth:`from_config` to ensure values pass Pydantic validation.
+
    Args:
        warn_threshold: Number of identical tool call sets before injecting
            a warning message. Default: 3.
@@ -157,6 +162,14 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
            Default: 30.
        tool_freq_hard_limit: Number of calls to the same tool type before
            forcing a stop. Default: 50.
+        tool_freq_overrides: Per-tool overrides for frequency thresholds,
+            keyed by tool name. Each value is a ``(warn, hard_limit)`` tuple
+            that replaces ``tool_freq_warn`` / ``tool_freq_hard_limit`` for
+            that specific tool. Tools not listed here fall back to the global
+            thresholds. Useful for raising limits on intentionally
+            high-frequency tools (e.g. ``bash`` in batch pipelines) without
+            weakening protection on all other tools. Default: ``None``
+            (no overrides).
    """

    def __init__(
@@ -167,6 +180,7 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
        max_tracked_threads: int = _DEFAULT_MAX_TRACKED_THREADS,
        tool_freq_warn: int = _DEFAULT_TOOL_FREQ_WARN,
        tool_freq_hard_limit: int = _DEFAULT_TOOL_FREQ_HARD_LIMIT,
+        tool_freq_overrides: dict[str, tuple[int, int]] | None = None,
    ):
        super().__init__()
        self.warn_threshold = warn_threshold
@@ -175,17 +189,32 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
        self.max_tracked_threads = max_tracked_threads
        self.tool_freq_warn = tool_freq_warn
        self.tool_freq_hard_limit = tool_freq_hard_limit
+        self._tool_freq_overrides: dict[str, tuple[int, int]] = tool_freq_overrides or {}
        self._lock = threading.Lock()
-        # Per-thread tracking using OrderedDict for LRU eviction
        self._history: OrderedDict[str, list[str]] = OrderedDict()
        self._warned: dict[str, set[str]] = defaultdict(set)
-        # Per-thread, per-tool-type cumulative call counts
        self._tool_freq: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
        self._tool_freq_warned: dict[str, set[str]] = defaultdict(set)

-    def _get_thread_id(self, runtime: Runtime[DeerFlowContext]) -> str:
+    @classmethod
+    def from_config(cls, config: LoopDetectionConfig) -> LoopDetectionMiddleware:
+        """Construct from a Pydantic-validated config, trusting its validation."""
+        return cls(
+            warn_threshold=config.warn_threshold,
+            hard_limit=config.hard_limit,
+            window_size=config.window_size,
+            max_tracked_threads=config.max_tracked_threads,
+            tool_freq_warn=config.tool_freq_warn,
+            tool_freq_hard_limit=config.tool_freq_hard_limit,
+            tool_freq_overrides={name: (o.warn, o.hard_limit) for name, o in config.tool_freq_overrides.items()},
+        )
+
+    def _get_thread_id(self, runtime: Runtime) -> str:
        """Extract thread_id from runtime context for per-thread tracking."""
-        return runtime.context.thread_id or "default"
+        thread_id = runtime.context.get("thread_id") if runtime.context else None
+        if thread_id:
+            return thread_id
+        return "default"

    def _evict_if_needed(self) -> None:
        """Evict least recently used threads if over the limit.
@@ -279,7 +308,12 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
                freq[name] += 1
                tc_count = freq[name]

-                if tc_count >= self.tool_freq_hard_limit:
+                if name in self._tool_freq_overrides:
+                    eff_warn, eff_hard = self._tool_freq_overrides[name]
+                else:
+                    eff_warn, eff_hard = self.tool_freq_warn, self.tool_freq_hard_limit
+
+                if tc_count >= eff_hard:
                    logger.error(
                        "Tool frequency hard limit reached — forcing stop",
                        extra={
@@ -290,7 +324,7 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
                    )
                    return _TOOL_FREQ_HARD_STOP_MSG.format(tool_name=name, count=tc_count), True

-                if tc_count >= self.tool_freq_warn:
+                if tc_count >= eff_warn:
                    warned = self._tool_freq_warned[thread_id]
                    if name not in warned:
                        warned.add(name)
@@ -355,22 +389,39 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
            return {"messages": [stripped_msg]}

        if warning:
-            # Inject as HumanMessage instead of SystemMessage to avoid
-            # Anthropic's "multiple non-consecutive system messages" error.
-            # Anthropic models require system messages only at the start of
-            # the conversation; injecting one mid-conversation crashes
-            # langchain_anthropic's _format_messages(). HumanMessage works
-            # with all providers. See #1299.
-            return {"messages": [HumanMessage(content=warning, name="loop_warning")]}
+            # WORKAROUND for v2.0-m1 — see #2724.
+            #
+            # Append the warning to the AIMessage content instead of
+            # injecting a separate HumanMessage. Inserting any non-tool
+            # message between an AIMessage(tool_calls=...) and its
+            # ToolMessage responses breaks OpenAI/Moonshot strict pairing
+            # validation ("tool_call_ids did not have response messages")
+            # because the tools node has not run yet at after_model time.
+            # tool_calls are preserved so the tools node still executes.
+            #
+            # This is a temporary mitigation: mutating an existing
+            # AIMessage to carry framework-authored text leaks loop-warning
+            # text into downstream consumers (MemoryMiddleware fact
+            # extraction, TitleMiddleware, telemetry, model replay) as if
+            # the model said it. The proper fix is to defer warning
+            # injection from after_model to wrap_model_call so every prior
+            # ToolMessage is already in the request — see RFC #2517 (which
+            # lists "loop intervention does not leave invalid
+            # tool-call/tool-message state" as acceptance criteria) and
+            # the prototype on `fix/loop-detection-tool-call-pairing`.
+            messages = state.get("messages", [])
+            last_msg = messages[-1]
+            patched_msg = last_msg.model_copy(update={"content": self._append_text(last_msg.content, warning)})
+            return {"messages": [patched_msg]}

        return None

    @override
-    def after_model(self, state: AgentState, runtime: Runtime[DeerFlowContext]) -> dict | None:
+    def after_model(self, state: AgentState, runtime: Runtime) -> dict | None:
        return self._apply(state, runtime)

    @override
-    async def aafter_model(self, state: AgentState, runtime: Runtime[DeerFlowContext]) -> dict | None:
+    async def aafter_model(self, state: AgentState, runtime: Runtime) -> dict | None:
        return self._apply(state, runtime)

    def reset(self, thread_id: str | None = None) -> None:
@@ -1,17 +1,21 @@
 """Middleware for memory mechanism."""

 import logging
-from typing import override
+from typing import TYPE_CHECKING, override

 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
+from langgraph.config import get_config
 from langgraph.runtime import Runtime

 from deerflow.agents.memory.message_processing import detect_correction, detect_reinforcement, filter_messages_for_memory
 from deerflow.agents.memory.queue import get_memory_queue
-from deerflow.config.deer_flow_context import DeerFlowContext
+from deerflow.config.memory_config import get_memory_config
 from deerflow.runtime.user_context import get_effective_user_id

+if TYPE_CHECKING:
+    from deerflow.config.memory_config import MemoryConfig
+
 logger = logging.getLogger(__name__)


@@ -33,17 +37,20 @@ class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):

    state_schema = MemoryMiddlewareState

-    def __init__(self, agent_name: str | None = None):
+    def __init__(self, agent_name: str | None = None, *, memory_config: "MemoryConfig | None" = None):
        """Initialize the MemoryMiddleware.

        Args:
            agent_name: If provided, memory is stored per-agent. If None, uses global memory.
+            memory_config: Explicit memory config. When omitted, legacy global
+                config fallback is used.
        """
        super().__init__()
        self._agent_name = agent_name
+        self._memory_config = memory_config

    @override
-    def after_agent(self, state: MemoryMiddlewareState, runtime: Runtime[DeerFlowContext]) -> dict | None:
+    def after_agent(self, state: MemoryMiddlewareState, runtime: Runtime) -> dict | None:
        """Queue conversation for memory update after agent completes.

        Args:
@@ -53,11 +60,15 @@ class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
        Returns:
            None (no state changes needed from this middleware).
        """
-        memory_config = runtime.context.app_config.memory
-        if not memory_config.enabled:
+        config = self._memory_config or get_memory_config()
+        if not config.enabled:
            return None

-        thread_id = runtime.context.thread_id
+        # Get thread ID from runtime context first, then fall back to LangGraph's configurable metadata
+        thread_id = runtime.context.get("thread_id") if runtime.context else None
+        if thread_id is None:
+            config_data = get_config()
+            thread_id = config_data.get("configurable", {}).get("thread_id")
        if not thread_id:
            logger.debug("No thread_id in context, skipping memory update")
            return None
@@ -86,7 +97,7 @@ class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
        # threading.Timer fires on a different thread where ContextVar values are not
        # propagated, so we must store user_id explicitly in ConversationContext.
        user_id = get_effective_user_id()
-        queue = get_memory_queue(runtime.context.app_config)
+        queue = get_memory_queue()
        queue.add(
            thread_id=thread_id,
            messages=filtered_messages,
@@ -7,6 +7,7 @@ from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
 from langgraph.runtime import Runtime

+from deerflow.agents.middlewares.tool_call_metadata import clone_ai_message_with_tool_calls
 from deerflow.subagents.executor import MAX_CONCURRENT_SUBAGENTS

 logger = logging.getLogger(__name__)
@@ -63,7 +64,7 @@ class SubagentLimitMiddleware(AgentMiddleware[AgentState]):
        logger.warning(f"Truncated {dropped_count} excess task tool call(s) from model response (limit: {self.max_concurrent})")

        # Replace the AIMessage with truncated tool_calls (same id triggers replacement)
-        updated_msg = last_msg.model_copy(update={"tool_calls": truncated_tool_calls})
+        updated_msg = clone_ai_message_with_tool_calls(last_msg, truncated_tool_calls)
        return {"messages": [updated_msg]}

    @override
@@ -14,6 +14,9 @@ from langgraph.config import get_config
 from langgraph.graph.message import REMOVE_ALL_MESSAGES
 from langgraph.runtime import Runtime

+from deerflow.agents.middlewares.dynamic_context_middleware import is_dynamic_context_reminder
+from deerflow.agents.middlewares.tool_call_metadata import clone_ai_message_with_tool_calls
+
 logger = logging.getLogger(__name__)


@@ -78,10 +81,7 @@ def _clone_ai_message(
    content: Any | None = None,
 ) -> AIMessage:
    """Clone an AIMessage while replacing its tool_calls list and optional content."""
-    update: dict[str, Any] = {"tool_calls": tool_calls}
-    if content is not None:
-        update["content"] = content
-    return message.model_copy(update=update)
+    return clone_ai_message_with_tool_calls(message, tool_calls, content=content)


@dataclass
@@ -136,6 +136,7 @@ class DeerFlowSummarizationMiddleware(SummarizationMiddleware):
            return None

        messages_to_summarize, preserved_messages = self._partition_with_skill_rescue(messages, cutoff_index)
+        messages_to_summarize, preserved_messages = self._preserve_dynamic_context_reminders(messages_to_summarize, preserved_messages)
        self._fire_hooks(messages_to_summarize, preserved_messages, runtime)
        summary = self._create_summary(messages_to_summarize)
        new_messages = self._build_new_messages(summary)
@@ -161,6 +162,7 @@ class DeerFlowSummarizationMiddleware(SummarizationMiddleware):
            return None

        messages_to_summarize, preserved_messages = self._partition_with_skill_rescue(messages, cutoff_index)
+        messages_to_summarize, preserved_messages = self._preserve_dynamic_context_reminders(messages_to_summarize, preserved_messages)
        self._fire_hooks(messages_to_summarize, preserved_messages, runtime)
        summary = await self._acreate_summary(messages_to_summarize)
        new_messages = self._build_new_messages(summary)
@@ -180,6 +182,24 @@ class DeerFlowSummarizationMiddleware(SummarizationMiddleware):
        """
        return [HumanMessage(content=f"Here is a summary of the conversation to date:\n\n{summary}", name="summary")]

+    def _preserve_dynamic_context_reminders(
+        self,
+        messages_to_summarize: list[AnyMessage],
+        preserved_messages: list[AnyMessage],
+    ) -> tuple[list[AnyMessage], list[AnyMessage]]:
+        """Keep hidden dynamic-context reminders out of summary compression.
+
+        These reminders carry the current date and optional memory. If summarization
+        removes them, DynamicContextMiddleware can mistake the summary HumanMessage
+        for the first user message and inject the reminder in the wrong place.
+        """
+        reminders = [msg for msg in messages_to_summarize if is_dynamic_context_reminder(msg)]
+        if not reminders:
+            return messages_to_summarize, preserved_messages
+
+        remaining = [msg for msg in messages_to_summarize if not is_dynamic_context_reminder(msg)]
+        return remaining, reminders + preserved_messages
+
    def _partition_with_skill_rescue(
        self,
        messages: list[AnyMessage],
@@ -4,10 +4,11 @@ from typing import NotRequired, override

 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
+from langchain_core.messages import HumanMessage
+from langgraph.config import get_config
 from langgraph.runtime import Runtime

 from deerflow.agents.thread_state import ThreadDataState
-from deerflow.config.deer_flow_context import DeerFlowContext
 from deerflow.config.paths import Paths, get_paths
 from deerflow.runtime.user_context import get_effective_user_id

@@ -78,10 +79,14 @@ class ThreadDataMiddleware(AgentMiddleware[ThreadDataMiddlewareState]):
        return self._get_thread_paths(thread_id, user_id=user_id)

    @override
-    def before_agent(self, state: ThreadDataMiddlewareState, runtime: Runtime[DeerFlowContext]) -> dict | None:
-        thread_id = runtime.context.thread_id
+    def before_agent(self, state: ThreadDataMiddlewareState, runtime: Runtime) -> dict | None:
+        context = runtime.context or {}
+        thread_id = context.get("thread_id")
+        if thread_id is None:
+            config = get_config()
+            thread_id = config.get("configurable", {}).get("thread_id")

-        if not thread_id:
+        if thread_id is None:
            raise ValueError("Thread ID is required in runtime context or config.configurable")

        user_id = get_effective_user_id()
@@ -2,18 +2,21 @@

 import logging
 import re
-from typing import Any, NotRequired, override
+from typing import TYPE_CHECKING, Any, NotRequired, override

 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
 from langgraph.config import get_config
 from langgraph.runtime import Runtime

-from deerflow.config.app_config import AppConfig
-from deerflow.config.deer_flow_context import DeerFlowContext
-from deerflow.config.title_config import TitleConfig
+from deerflow.agents.middlewares.dynamic_context_middleware import is_dynamic_context_reminder
+from deerflow.config.title_config import get_title_config
 from deerflow.models import create_chat_model

+if TYPE_CHECKING:
+    from deerflow.config.app_config import AppConfig
+    from deerflow.config.title_config import TitleConfig
+
 logger = logging.getLogger(__name__)


@@ -28,6 +31,18 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):

    state_schema = TitleMiddlewareState

+    def __init__(self, *, app_config: "AppConfig | None" = None, title_config: "TitleConfig | None" = None):
+        super().__init__()
+        self._app_config = app_config
+        self._title_config = title_config
+
+    def _get_title_config(self):
+        if self._title_config is not None:
+            return self._title_config
+        if self._app_config is not None:
+            return self._app_config.title
+        return get_title_config()
+
    def _normalize_content(self, content: object) -> str:
        if isinstance(content, str):
            return content
@@ -47,9 +62,14 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):

        return ""

-    def _should_generate_title(self, state: TitleMiddlewareState, title_config: TitleConfig) -> bool:
+    @staticmethod
+    def _is_user_message_for_title(message: object) -> bool:
+        return getattr(message, "type", None) == "human" and not is_dynamic_context_reminder(message)
+
+    def _should_generate_title(self, state: TitleMiddlewareState) -> bool:
        """Check if we should generate a title for this thread."""
-        if not title_config.enabled:
+        config = self._get_title_config()
+        if not config.enabled:
            return False

        # Check if thread already has a title in state
@@ -62,27 +82,28 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
            return False

        # Count user and assistant messages
-        user_messages = [m for m in messages if m.type == "human"]
+        user_messages = [m for m in messages if self._is_user_message_for_title(m)]
        assistant_messages = [m for m in messages if m.type == "ai"]

        # Generate title after first complete exchange
        return len(user_messages) == 1 and len(assistant_messages) >= 1

-    def _build_title_prompt(self, state: TitleMiddlewareState, title_config: TitleConfig) -> tuple[str, str]:
+    def _build_title_prompt(self, state: TitleMiddlewareState) -> tuple[str, str]:
        """Extract user/assistant messages and build the title prompt.

        Returns (prompt_string, user_msg) so callers can use user_msg as fallback.
        """
+        config = self._get_title_config()
        messages = state.get("messages", [])

-        user_msg_content = next((m.content for m in messages if m.type == "human"), "")
+        user_msg_content = next((m.content for m in messages if self._is_user_message_for_title(m)), "")
        assistant_msg_content = next((m.content for m in messages if m.type == "ai"), "")

        user_msg = self._normalize_content(user_msg_content)
        assistant_msg = self._strip_think_tags(self._normalize_content(assistant_msg_content))

-        prompt = title_config.prompt_template.format(
-            max_words=title_config.max_words,
+        prompt = config.prompt_template.format(
+            max_words=config.max_words,
            user_msg=user_msg[:500],
            assistant_msg=assistant_msg[:500],
        )
@@ -92,15 +113,17 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
        """Remove <think>...</think> blocks emitted by reasoning models (e.g. minimax, DeepSeek-R1)."""
        return re.sub(r"<think>[\s\S]*?</think>", "", text, flags=re.IGNORECASE).strip()

-    def _parse_title(self, content: object, title_config: TitleConfig) -> str:
+    def _parse_title(self, content: object) -> str:
        """Normalize model output into a clean title string."""
+        config = self._get_title_config()
        title_content = self._normalize_content(content)
        title_content = self._strip_think_tags(title_content)
        title = title_content.strip().strip('"').strip("'")
-        return title[: title_config.max_chars] if len(title) > title_config.max_chars else title
+        return title[: config.max_chars] if len(title) > config.max_chars else title

-    def _fallback_title(self, user_msg: str, title_config: TitleConfig) -> str:
-        fallback_chars = min(title_config.max_chars, 50)
+    def _fallback_title(self, user_msg: str) -> str:
+        config = self._get_title_config()
+        fallback_chars = min(config.max_chars, 50)
        if len(user_msg) > fallback_chars:
            return user_msg[:fallback_chars].rstrip() + "..."
        return user_msg if user_msg else "New Conversation"
@@ -116,42 +139,46 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
        except Exception:
            parent = {}
        config = {**parent}
+        config["run_name"] = "title_agent"
        config["tags"] = [*(config.get("tags") or []), "middleware:title"]
        return config

-    def _generate_title_result(self, state: TitleMiddlewareState, title_config: TitleConfig) -> dict | None:
+    def _generate_title_result(self, state: TitleMiddlewareState) -> dict | None:
        """Generate a local fallback title without blocking on an LLM call."""
-        if not self._should_generate_title(state, title_config):
+        if not self._should_generate_title(state):
            return None

-        _, user_msg = self._build_title_prompt(state, title_config)
-        return {"title": self._fallback_title(user_msg, title_config)}
+        _, user_msg = self._build_title_prompt(state)
+        return {"title": self._fallback_title(user_msg)}

-    async def _agenerate_title_result(self, state: TitleMiddlewareState, app_config: AppConfig) -> dict | None:
+    async def _agenerate_title_result(self, state: TitleMiddlewareState) -> dict | None:
        """Generate a title asynchronously and fall back locally on failure."""
-        title_config = app_config.title
-        if not self._should_generate_title(state, title_config):
+        if not self._should_generate_title(state):
            return None

-        prompt, user_msg = self._build_title_prompt(state, title_config)
+        config = self._get_title_config()
+        prompt, user_msg = self._build_title_prompt(state)

        try:
-            if title_config.model_name:
-                model = create_chat_model(name=title_config.model_name, thinking_enabled=False, app_config=app_config)
+            model_kwargs = {"thinking_enabled": False}
+            if self._app_config is not None:
+                model_kwargs["app_config"] = self._app_config
+            if config.model_name:
+                model = create_chat_model(name=config.model_name, **model_kwargs)
            else:
-                model = create_chat_model(thinking_enabled=False, app_config=app_config)
+                model = create_chat_model(**model_kwargs)
            response = await model.ainvoke(prompt, config=self._get_runnable_config())
-            title = self._parse_title(response.content, title_config)
+            title = self._parse_title(response.content)
            if title:
                return {"title": title}
        except Exception:
            logger.debug("Failed to generate async title; falling back to local title", exc_info=True)
-        return {"title": self._fallback_title(user_msg, title_config)}
+        return {"title": self._fallback_title(user_msg)}

    @override
-    def after_model(self, state: TitleMiddlewareState, runtime: Runtime[DeerFlowContext]) -> dict | None:
-        return self._generate_title_result(state, runtime.context.app_config.title)
+    def after_model(self, state: TitleMiddlewareState, runtime: Runtime) -> dict | None:
+        return self._generate_title_result(state)

    @override
-    async def aafter_model(self, state: TitleMiddlewareState, runtime: Runtime[DeerFlowContext]) -> dict | None:
-        return await self._agenerate_title_result(state, runtime.context.app_config)
+    async def aafter_model(self, state: TitleMiddlewareState, runtime: Runtime) -> dict | None:
+        return await self._agenerate_title_result(state)
@@ -1,37 +1,303 @@
-"""Middleware for logging LLM token usage."""
+"""Middleware for logging token usage and annotating step attribution."""
+
+from __future__ import annotations

 import logging
-from typing import override
+from collections import defaultdict
+from typing import Any, override

 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
+from langchain.agents.middleware.todo import Todo
+from langchain_core.messages import AIMessage
 from langgraph.runtime import Runtime

 logger = logging.getLogger(__name__)

+TOKEN_USAGE_ATTRIBUTION_KEY = "token_usage_attribution"
+
+
+def _string_arg(value: Any) -> str | None:
+    if isinstance(value, str):
+        normalized = value.strip()
+        return normalized or None
+    return None
+
+
+def _normalize_todos(value: Any) -> list[Todo]:
+    if not isinstance(value, list):
+        return []
+
+    normalized: list[Todo] = []
+    for item in value:
+        if not isinstance(item, dict):
+            continue
+
+        todo: Todo = {}
+        content = _string_arg(item.get("content"))
+        status = item.get("status")
+
+        if content is not None:
+            todo["content"] = content
+        if status in {"pending", "in_progress", "completed"}:
+            todo["status"] = status
+
+        normalized.append(todo)
+
+    return normalized
+
+
+def _todo_action_kind(previous: Todo | None, current: Todo) -> str:
+    status = current.get("status")
+    previous_content = previous.get("content") if previous else None
+    current_content = current.get("content")
+
+    if previous is None:
+        if status == "completed":
+            return "todo_complete"
+        if status == "in_progress":
+            return "todo_start"
+        return "todo_update"
+
+    if previous_content != current_content:
+        return "todo_update"
+
+    if status == "completed":
+        return "todo_complete"
+    if status == "in_progress":
+        return "todo_start"
+    return "todo_update"
+
+
+def _build_todo_actions(previous_todos: list[Todo], next_todos: list[Todo]) -> list[dict[str, Any]]:
+    # This is the single source of truth for precise write_todos token
+    # attribution. The frontend intentionally falls back to a generic
+    # "Update to-do list" label when this metadata is missing or malformed.
+    previous_by_content: dict[str, list[tuple[int, Todo]]] = defaultdict(list)
+    matched_previous_indices: set[int] = set()
+
+    for index, todo in enumerate(previous_todos):
+        content = todo.get("content")
+        if isinstance(content, str) and content:
+            previous_by_content[content].append((index, todo))
+
+    actions: list[dict[str, Any]] = []
+
+    for index, todo in enumerate(next_todos):
+        content = todo.get("content")
+        if not isinstance(content, str) or not content:
+            continue
+
+        previous_match: Todo | None = None
+        content_matches = previous_by_content.get(content)
+        if content_matches:
+            while content_matches and content_matches[0][0] in matched_previous_indices:
+                content_matches.pop(0)
+            if content_matches:
+                previous_index, previous_match = content_matches.pop(0)
+                matched_previous_indices.add(previous_index)
+
+        if previous_match is None and index < len(previous_todos) and index not in matched_previous_indices:
+            previous_match = previous_todos[index]
+            matched_previous_indices.add(index)
+
+        if previous_match is not None:
+            previous_content = previous_match.get("content")
+            previous_status = previous_match.get("status")
+            if previous_content == content and previous_status == todo.get("status"):
+                continue
+
+        actions.append(
+            {
+                "kind": _todo_action_kind(previous_match, todo),
+                "content": content,
+            }
+        )
+
+    for index, todo in enumerate(previous_todos):
+        if index in matched_previous_indices:
+            continue
+
+        content = todo.get("content")
+        if not isinstance(content, str) or not content:
+            continue
+
+        actions.append(
+            {
+                "kind": "todo_remove",
+                "content": content,
+            }
+        )
+
+    return actions
+
+
+def _describe_tool_call(tool_call: dict[str, Any], todos: list[Todo]) -> list[dict[str, Any]]:
+    name = _string_arg(tool_call.get("name")) or "unknown"
+    args = tool_call.get("args") if isinstance(tool_call.get("args"), dict) else {}
+    tool_call_id = _string_arg(tool_call.get("id"))
+
+    if name == "write_todos":
+        next_todos = _normalize_todos(args.get("todos"))
+        actions = _build_todo_actions(todos, next_todos)
+        if not actions:
+            return [
+                {
+                    "kind": "tool",
+                    "tool_name": name,
+                    "tool_call_id": tool_call_id,
+                }
+            ]
+        return [
+            {
+                **action,
+                "tool_call_id": tool_call_id,
+            }
+            for action in actions
+        ]
+
+    if name == "task":
+        return [
+            {
+                "kind": "subagent",
+                "description": _string_arg(args.get("description")),
+                "subagent_type": _string_arg(args.get("subagent_type")),
+                "tool_call_id": tool_call_id,
+            }
+        ]
+
+    if name in {"web_search", "image_search"}:
+        query = _string_arg(args.get("query"))
+        return [
+            {
+                "kind": "search",
+                "tool_name": name,
+                "query": query,
+                "tool_call_id": tool_call_id,
+            }
+        ]
+
+    if name == "present_files":
+        return [
+            {
+                "kind": "present_files",
+                "tool_call_id": tool_call_id,
+            }
+        ]
+
+    if name == "ask_clarification":
+        return [
+            {
+                "kind": "clarification",
+                "tool_call_id": tool_call_id,
+            }
+        ]
+
+    return [
+        {
+            "kind": "tool",
+            "tool_name": name,
+            "description": _string_arg(args.get("description")),
+            "tool_call_id": tool_call_id,
+        }
+    ]
+
+
+def _infer_step_kind(message: AIMessage, actions: list[dict[str, Any]]) -> str:
+    if actions:
+        first_kind = actions[0].get("kind")
+        if len(actions) == 1 and first_kind in {"todo_start", "todo_complete", "todo_update", "todo_remove"}:
+            return "todo_update"
+        if len(actions) == 1 and first_kind == "subagent":
+            return "subagent_dispatch"
+        return "tool_batch"
+
+    if message.content:
+        return "final_answer"
+    return "thinking"
+
+
+def _build_attribution(message: AIMessage, todos: list[Todo]) -> dict[str, Any]:
+    tool_calls = getattr(message, "tool_calls", None) or []
+    actions: list[dict[str, Any]] = []
+    current_todos = list(todos)
+
+    for raw_tool_call in tool_calls:
+        if not isinstance(raw_tool_call, dict):
+            continue
+
+        described_actions = _describe_tool_call(raw_tool_call, current_todos)
+        actions.extend(described_actions)
+
+        if raw_tool_call.get("name") == "write_todos":
+            args = raw_tool_call.get("args") if isinstance(raw_tool_call.get("args"), dict) else {}
+            current_todos = _normalize_todos(args.get("todos"))
+
+    tool_call_ids: list[str] = []
+    for tool_call in tool_calls:
+        if not isinstance(tool_call, dict):
+            continue
+
+        tool_call_id = _string_arg(tool_call.get("id"))
+        if tool_call_id is not None:
+            tool_call_ids.append(tool_call_id)
+
+    return {
+        # Schema changes should remain additive where possible so older
+        # frontends can ignore unknown fields and fall back safely.
+        "version": 1,
+        "kind": _infer_step_kind(message, actions),
+        "shared_attribution": len(actions) > 1,
+        "tool_call_ids": tool_call_ids,
+        "actions": actions,
+    }
+

 class TokenUsageMiddleware(AgentMiddleware):
-    """Logs token usage from model response usage_metadata."""
+    """Logs token usage from model responses and annotates the AI step."""

-    @override
-    def after_model(self, state: AgentState, runtime: Runtime) -> dict | None:
-        return self._log_usage(state)
-
-    @override
-    async def aafter_model(self, state: AgentState, runtime: Runtime) -> dict | None:
-        return self._log_usage(state)
-
-    def _log_usage(self, state: AgentState) -> None:
+    def _apply(self, state: AgentState) -> dict | None:
        messages = state.get("messages", [])
        if not messages:
            return None
+
        last = messages[-1]
+        if not isinstance(last, AIMessage):
+            return None
+
        usage = getattr(last, "usage_metadata", None)
        if usage:
+            input_token_details = usage.get("input_token_details") or {}
+            output_token_details = usage.get("output_token_details") or {}
+            detail_parts = []
+            if input_token_details:
+                detail_parts.append(f"input_token_details={input_token_details}")
+            if output_token_details:
+                detail_parts.append(f"output_token_details={output_token_details}")
+            detail_suffix = f" {' '.join(detail_parts)}" if detail_parts else ""
            logger.info(
-                "LLM token usage: input=%s output=%s total=%s",
+                "LLM token usage: input=%s output=%s total=%s%s",
                usage.get("input_tokens", "?"),
                usage.get("output_tokens", "?"),
                usage.get("total_tokens", "?"),
+                detail_suffix,
            )
-        return None
+
+        todos = state.get("todos") or []
+        attribution = _build_attribution(last, todos if isinstance(todos, list) else [])
+        additional_kwargs = dict(getattr(last, "additional_kwargs", {}) or {})
+
+        if additional_kwargs.get(TOKEN_USAGE_ATTRIBUTION_KEY) == attribution:
+            return None
+
+        additional_kwargs[TOKEN_USAGE_ATTRIBUTION_KEY] = attribution
+        updated_msg = last.model_copy(update={"additional_kwargs": additional_kwargs})
+        return {"messages": [updated_msg]}
+
+    @override
+    def after_model(self, state: AgentState, runtime: Runtime) -> dict | None:
+        return self._apply(state)
+
+    @override
+    async def aafter_model(self, state: AgentState, runtime: Runtime) -> dict | None:
+        return self._apply(state)
@@ -0,0 +1,50 @@
+"""Helpers for keeping AIMessage tool-call metadata consistent."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from langchain_core.messages import AIMessage
+
+
+def _raw_tool_call_id(raw_tool_call: Any) -> str | None:
+    if not isinstance(raw_tool_call, dict):
+        return None
+
+    raw_id = raw_tool_call.get("id")
+    return raw_id if isinstance(raw_id, str) and raw_id else None
+
+
+def clone_ai_message_with_tool_calls(
+    message: AIMessage,
+    tool_calls: list[dict[str, Any]],
+    *,
+    content: Any | None = None,
+) -> AIMessage:
+    """Clone an AIMessage while keeping raw provider tool-call metadata in sync."""
+    kept_ids = {tc["id"] for tc in tool_calls if isinstance(tc.get("id"), str) and tc["id"]}
+
+    update: dict[str, Any] = {"tool_calls": tool_calls}
+    if content is not None:
+        update["content"] = content
+
+    additional_kwargs = dict(getattr(message, "additional_kwargs", {}) or {})
+    raw_tool_calls = additional_kwargs.get("tool_calls")
+    if isinstance(raw_tool_calls, list):
+        synced_raw_tool_calls = [raw_tc for raw_tc in raw_tool_calls if _raw_tool_call_id(raw_tc) in kept_ids]
+        if synced_raw_tool_calls:
+            additional_kwargs["tool_calls"] = synced_raw_tool_calls
+        else:
+            additional_kwargs.pop("tool_calls", None)
+
+    if not tool_calls:
+        additional_kwargs.pop("function_call", None)
+
+    update["additional_kwargs"] = additional_kwargs
+
+    response_metadata = dict(getattr(message, "response_metadata", {}) or {})
+    if not tool_calls and response_metadata.get("finish_reason") == "tool_calls":
+        response_metadata["finish_reason"] = "stop"
+    update["response_metadata"] = response_metadata
+
+    return message.model_copy(update=update)
@@ -1,10 +1,8 @@
 """Tool error handling middleware and shared runtime middleware builders."""

-from __future__ import annotations
-
 import logging
 from collections.abc import Awaitable, Callable
-from typing import TYPE_CHECKING, override
+from typing import override

 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
@@ -13,8 +11,7 @@ from langgraph.errors import GraphBubbleUp
 from langgraph.prebuilt.tool_node import ToolCallRequest
 from langgraph.types import Command

-if TYPE_CHECKING:
-    from deerflow.config.app_config import AppConfig
+from deerflow.config.app_config import AppConfig

 logger = logging.getLogger(__name__)

@@ -72,7 +69,7 @@ class ToolErrorHandlingMiddleware(AgentMiddleware[AgentState]):

 def _build_runtime_middlewares(
    *,
-    app_config: "AppConfig",
+    app_config: AppConfig,
    include_uploads: bool,
    include_dangling_tool_call_patch: bool,
    lazy_init: bool = True,
@@ -97,7 +94,7 @@ def _build_runtime_middlewares(

        middlewares.append(DanglingToolCallMiddleware())

-    middlewares.append(LLMErrorHandlingMiddleware())
+    middlewares.append(LLMErrorHandlingMiddleware(app_config=app_config))

    # Guardrail middleware (if configured)
    guardrails_config = app_config.guardrails
@@ -129,7 +126,7 @@ def _build_runtime_middlewares(
    return middlewares


-def build_lead_runtime_middlewares(*, app_config: "AppConfig", lazy_init: bool = True) -> list[AgentMiddleware]:
+def build_lead_runtime_middlewares(*, app_config: AppConfig, lazy_init: bool = True) -> list[AgentMiddleware]:
    """Middlewares shared by lead agent runtime before lead-only middlewares."""
    return _build_runtime_middlewares(
        app_config=app_config,
@@ -139,10 +136,32 @@ def build_lead_runtime_middlewares(*, app_config: "AppConfig", lazy_init: bool =
    )


-def build_subagent_runtime_middlewares(*, lazy_init: bool = True) -> list[AgentMiddleware]:
+def build_subagent_runtime_middlewares(
+    *,
+    app_config: AppConfig | None = None,
+    model_name: str | None = None,
+    lazy_init: bool = True,
+) -> list[AgentMiddleware]:
    """Middlewares shared by subagent runtime before subagent-only middlewares."""
-    return _build_runtime_middlewares(
+    if app_config is None:
+        from deerflow.config import get_app_config
+
+        app_config = get_app_config()
+
+    middlewares = _build_runtime_middlewares(
+        app_config=app_config,
        include_uploads=False,
        include_dangling_tool_call_patch=True,
        lazy_init=lazy_init,
    )
+
+    if model_name is None and app_config.models:
+        model_name = app_config.models[0].name
+
+    model_config = app_config.get_model_config(model_name) if model_name else None
+    if model_config is not None and model_config.supports_vision:
+        from deerflow.agents.middlewares.view_image_middleware import ViewImageMiddleware
+
+        middlewares.append(ViewImageMiddleware())
+
+    return middlewares
@@ -9,7 +9,6 @@ from langchain.agents.middleware import AgentMiddleware
 from langchain_core.messages import HumanMessage
 from langgraph.runtime import Runtime

-from deerflow.config.deer_flow_context import DeerFlowContext
 from deerflow.config.paths import Paths, get_paths
 from deerflow.runtime.user_context import get_effective_user_id
 from deerflow.utils.file_conversion import extract_outline
@@ -186,7 +185,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
        return files if files else None

    @override
-    def before_agent(self, state: UploadsMiddlewareState, runtime: Runtime[DeerFlowContext]) -> dict | None:
+    def before_agent(self, state: UploadsMiddlewareState, runtime: Runtime) -> dict | None:
        """Inject uploaded files information before agent execution.

        New files come from the current message's additional_kwargs.files.
@@ -215,7 +214,14 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
            return None

        # Resolve uploads directory for existence checks
-        thread_id = runtime.context.thread_id
+        thread_id = (runtime.context or {}).get("thread_id")
+        if thread_id is None:
+            try:
+                from langgraph.config import get_config
+
+                thread_id = get_config().get("configurable", {}).get("thread_id")
+            except RuntimeError:
+                pass  # get_config() raises outside a runnable context (e.g. unit tests)
        uploads_dir = self._paths.sandbox_uploads_dir(thread_id, user_id=get_effective_user_id()) if thread_id else None

        # Get newly uploaded files from the current message's additional_kwargs.files
@@ -36,13 +36,12 @@ from deerflow.agents.lead_agent.agent import _build_middlewares
 from deerflow.agents.lead_agent.prompt import apply_prompt_template
 from deerflow.agents.thread_state import ThreadState
 from deerflow.config.agents_config import AGENT_NAME_PATTERN
-from deerflow.config.app_config import AppConfig
-from deerflow.config.deer_flow_context import DeerFlowContext
-from deerflow.config.extensions_config import ExtensionsConfig
+from deerflow.config.app_config import get_app_config, reload_app_config
+from deerflow.config.extensions_config import ExtensionsConfig, SkillStateConfig, get_extensions_config, reload_extensions_config
 from deerflow.config.paths import get_paths
 from deerflow.models import create_chat_model
 from deerflow.runtime.user_context import get_effective_user_id
-from deerflow.skills.installer import install_skill_from_archive
+from deerflow.skills.storage import get_or_new_skill_storage
 from deerflow.uploads.manager import (
    claim_unique_filename,
    delete_file_safe,
@@ -117,7 +116,6 @@ class DeerFlowClient:
        config_path: str | None = None,
        checkpointer=None,
        *,
-        config: AppConfig | None = None,
        model_name: str | None = None,
        thinking_enabled: bool = True,
        subagent_enabled: bool = False,
@@ -132,14 +130,9 @@ class DeerFlowClient:

        Args:
            config_path: Path to config.yaml. Uses default resolution if None.
-                Ignored when ``config`` is provided.
            checkpointer: LangGraph checkpointer instance for state persistence.
                Required for multi-turn conversations on the same thread_id.
                Without a checkpointer, each call is stateless.
-            config: Optional pre-constructed AppConfig. When provided, it takes
-                precedence over ``config_path`` and no file is read. Enables
-                multi-client isolation: two clients with different configs can
-                coexist in the same process without touching process-global state.
            model_name: Override the default model name from config.
            thinking_enabled: Enable model's extended thinking.
            subagent_enabled: Enable subagent delegation.
@@ -148,18 +141,9 @@ class DeerFlowClient:
            available_skills: Optional set of skill names to make available. If None (default), all scanned skills are available.
            middlewares: Optional list of custom middlewares to inject into the agent.
        """
-        # Constructor-captured config: the client owns its AppConfig for its lifetime.
-        # Multiple clients with different configs do not contend.
-        #
-        # Priority: explicit ``config=`` > explicit ``config_path=`` > ``AppConfig.from_file()``
-        # with default path resolution. There is no ambient global fallback; if
-        # config.yaml cannot be located, ``from_file`` raises loudly.
-        if config is not None:
-            self._app_config = config
-        elif config_path is not None:
-            self._app_config = AppConfig.from_file(config_path)
-        else:
-            self._app_config = AppConfig.from_file()
+        if config_path is not None:
+            reload_app_config(config_path)
+        self._app_config = get_app_config()

        if agent_name is not None and not AGENT_NAME_PATTERN.match(agent_name):
            raise ValueError(f"Invalid agent name '{agent_name}'. Must match pattern: {AGENT_NAME_PATTERN.pattern}")
@@ -187,15 +171,6 @@ class DeerFlowClient:
        self._agent = None
        self._agent_config_key = None

-    def _reload_config(self) -> None:
-        """Reload config from file and refresh the cached reference.
-
-        Only the client's own ``_app_config`` is rebuilt. Other clients
-        and the process-global are untouched, so multi-client coexistence
-        survives reload.
-        """
-        self._app_config = AppConfig.from_file()
-
    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------
@@ -253,11 +228,10 @@ class DeerFlowClient:
        max_concurrent_subagents = cfg.get("max_concurrent_subagents", 3)

        kwargs: dict[str, Any] = {
-            "model": create_chat_model(name=model_name, thinking_enabled=thinking_enabled, app_config=self._app_config),
+            "model": create_chat_model(name=model_name, thinking_enabled=thinking_enabled),
            "tools": self._get_tools(model_name=model_name, subagent_enabled=subagent_enabled),
-            "middleware": _build_middlewares(self._app_config, config, model_name=model_name, agent_name=self._agent_name, custom_middlewares=self._middlewares),
+            "middleware": _build_middlewares(config, model_name=model_name, agent_name=self._agent_name, custom_middlewares=self._middlewares),
            "system_prompt": apply_prompt_template(
-                self._app_config,
                subagent_enabled=subagent_enabled,
                max_concurrent_subagents=max_concurrent_subagents,
                agent_name=self._agent_name,
@@ -269,7 +243,7 @@ class DeerFlowClient:
        if checkpointer is None:
            from deerflow.runtime.checkpointer import get_checkpointer

-            checkpointer = get_checkpointer(self._app_config)
+            checkpointer = get_checkpointer()
        if checkpointer is not None:
            kwargs["checkpointer"] = checkpointer

@@ -277,11 +251,12 @@ class DeerFlowClient:
        self._agent_config_key = key
        logger.info("Agent created: agent_name=%s, model=%s, thinking=%s", self._agent_name, model_name, thinking_enabled)

-    def _get_tools(self, *, model_name: str | None, subagent_enabled: bool):
+    @staticmethod
+    def _get_tools(*, model_name: str | None, subagent_enabled: bool):
        """Lazy import to avoid circular dependency at module level."""
        from deerflow.tools import get_available_tools

-        return get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled, app_config=self._app_config)
+        return get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled)

    @staticmethod
    def _serialize_tool_calls(tool_calls) -> list[dict]:
@@ -289,25 +264,35 @@ class DeerFlowClient:
        return [{"name": tc["name"], "args": tc["args"], "id": tc.get("id")} for tc in tool_calls]

    @staticmethod
-    def _ai_text_event(msg_id: str | None, text: str, usage: dict | None) -> "StreamEvent":
-        """Build a ``messages-tuple`` AI text event, attaching usage when present."""
+    def _serialize_additional_kwargs(msg) -> dict[str, Any] | None:
+        """Copy message additional_kwargs when present."""
+        additional_kwargs = getattr(msg, "additional_kwargs", None)
+        if isinstance(additional_kwargs, dict) and additional_kwargs:
+            return dict(additional_kwargs)
+        return None
+
+    @staticmethod
+    def _ai_text_event(msg_id: str | None, text: str, usage: dict | None, additional_kwargs: dict[str, Any] | None = None) -> "StreamEvent":
+        """Build a ``messages-tuple`` AI text event."""
        data: dict[str, Any] = {"type": "ai", "content": text, "id": msg_id}
        if usage:
            data["usage_metadata"] = usage
+        if additional_kwargs:
+            data["additional_kwargs"] = additional_kwargs
        return StreamEvent(type="messages-tuple", data=data)

    @staticmethod
-    def _ai_tool_calls_event(msg_id: str | None, tool_calls) -> "StreamEvent":
+    def _ai_tool_calls_event(msg_id: str | None, tool_calls, additional_kwargs: dict[str, Any] | None = None) -> "StreamEvent":
        """Build a ``messages-tuple`` AI tool-calls event."""
-        return StreamEvent(
-            type="messages-tuple",
-            data={
-                "type": "ai",
-                "content": "",
-                "id": msg_id,
-                "tool_calls": DeerFlowClient._serialize_tool_calls(tool_calls),
-            },
-        )
+        data: dict[str, Any] = {
+            "type": "ai",
+            "content": "",
+            "id": msg_id,
+            "tool_calls": DeerFlowClient._serialize_tool_calls(tool_calls),
+        }
+        if additional_kwargs:
+            data["additional_kwargs"] = additional_kwargs
+        return StreamEvent(type="messages-tuple", data=data)

    @staticmethod
    def _tool_message_event(msg: ToolMessage) -> "StreamEvent":
@@ -332,19 +317,30 @@ class DeerFlowClient:
                d["tool_calls"] = DeerFlowClient._serialize_tool_calls(msg.tool_calls)
            if getattr(msg, "usage_metadata", None):
                d["usage_metadata"] = msg.usage_metadata
+            if additional_kwargs := DeerFlowClient._serialize_additional_kwargs(msg):
+                d["additional_kwargs"] = additional_kwargs
            return d
        if isinstance(msg, ToolMessage):
-            return {
+            d = {
                "type": "tool",
                "content": DeerFlowClient._extract_text(msg.content),
                "name": getattr(msg, "name", None),
                "tool_call_id": getattr(msg, "tool_call_id", None),
                "id": getattr(msg, "id", None),
            }
+            if additional_kwargs := DeerFlowClient._serialize_additional_kwargs(msg):
+                d["additional_kwargs"] = additional_kwargs
+            return d
        if isinstance(msg, HumanMessage):
-            return {"type": "human", "content": msg.content, "id": getattr(msg, "id", None)}
+            d = {"type": "human", "content": msg.content, "id": getattr(msg, "id", None)}
+            if additional_kwargs := DeerFlowClient._serialize_additional_kwargs(msg):
+                d["additional_kwargs"] = additional_kwargs
+            return d
        if isinstance(msg, SystemMessage):
-            return {"type": "system", "content": msg.content, "id": getattr(msg, "id", None)}
+            d = {"type": "system", "content": msg.content, "id": getattr(msg, "id", None)}
+            if additional_kwargs := DeerFlowClient._serialize_additional_kwargs(msg):
+                d["additional_kwargs"] = additional_kwargs
+            return d
        return {"type": "unknown", "content": str(msg), "id": getattr(msg, "id", None)}

    @staticmethod
@@ -402,7 +398,7 @@ class DeerFlowClient:
        if checkpointer is None:
            from deerflow.runtime.checkpointer.provider import get_checkpointer

-            checkpointer = get_checkpointer(self._app_config)
+            checkpointer = get_checkpointer()

        thread_info_map = {}

@@ -457,7 +453,7 @@ class DeerFlowClient:
        if checkpointer is None:
            from deerflow.runtime.checkpointer.provider import get_checkpointer

-            checkpointer = get_checkpointer(self._app_config)
+            checkpointer = get_checkpointer()

        config = {"configurable": {"thread_id": thread_id}}
        checkpoints = []
@@ -567,6 +563,7 @@ class DeerFlowClient:
            - type="messages-tuple"  data={"type": "ai", "content": <delta>, "id": str}
            - type="messages-tuple"  data={"type": "ai", "content": <delta>, "id": str, "usage_metadata": {...}}
            - type="messages-tuple"  data={"type": "ai", "content": "", "id": str, "tool_calls": [...]}
+            - type="messages-tuple"  data={"type": "ai", "content": "", "id": str, "additional_kwargs": {...}}
            - type="messages-tuple"  data={"type": "tool", "content": str, "name": str, "tool_call_id": str, "id": str}
            - type="end"             data={"usage": {"input_tokens": int, "output_tokens": int, "total_tokens": int}}
        """
@@ -577,7 +574,9 @@ class DeerFlowClient:
        self._ensure_agent(config)

        state: dict[str, Any] = {"messages": [HumanMessage(content=message)]}
-        context = DeerFlowContext(app_config=self._app_config, thread_id=thread_id, agent_name=self._agent_name)
+        context = {"thread_id": thread_id}
+        if self._agent_name:
+            context["agent_name"] = self._agent_name

        seen_ids: set[str] = set()
        # Cross-mode handoff: ids already streamed via LangGraph ``messages``
@@ -587,6 +586,7 @@ class DeerFlowClient:
        # in both the final ``messages`` chunk and the values snapshot —
        # count it only on whichever arrives first.
        counted_usage_ids: set[str] = set()
+        sent_additional_kwargs_by_id: dict[str, dict[str, Any]] = {}
        cumulative_usage: dict[str, int] = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0}

        def _account_usage(msg_id: str | None, usage: Any) -> dict | None:
@@ -616,6 +616,20 @@ class DeerFlowClient:
                "total_tokens": total_tokens,
            }

+        def _unsent_additional_kwargs(msg_id: str | None, additional_kwargs: dict[str, Any] | None) -> dict[str, Any] | None:
+            if not additional_kwargs:
+                return None
+            if not msg_id:
+                return additional_kwargs
+
+            sent = sent_additional_kwargs_by_id.setdefault(msg_id, {})
+            delta = {key: value for key, value in additional_kwargs.items() if sent.get(key) != value}
+            if not delta:
+                return None
+
+            sent.update(delta)
+            return delta
+
        for item in self._agent.stream(
            state,
            config=config,
@@ -643,17 +657,31 @@ class DeerFlowClient:

                if isinstance(msg_chunk, AIMessage):
                    text = self._extract_text(msg_chunk.content)
+                    additional_kwargs = self._serialize_additional_kwargs(msg_chunk)
                    counted_usage = _account_usage(msg_id, msg_chunk.usage_metadata)
+                    sent_additional_kwargs = False

                    if text:
                        if msg_id:
                            streamed_ids.add(msg_id)
-                        yield self._ai_text_event(msg_id, text, counted_usage)
+                        additional_kwargs_delta = _unsent_additional_kwargs(msg_id, additional_kwargs)
+                        yield self._ai_text_event(
+                            msg_id,
+                            text,
+                            counted_usage,
+                            additional_kwargs_delta,
+                        )
+                        sent_additional_kwargs = bool(additional_kwargs_delta)

                    if msg_chunk.tool_calls:
                        if msg_id:
                            streamed_ids.add(msg_id)
-                        yield self._ai_tool_calls_event(msg_id, msg_chunk.tool_calls)
+                        additional_kwargs_delta = None if sent_additional_kwargs else _unsent_additional_kwargs(msg_id, additional_kwargs)
+                        yield self._ai_tool_calls_event(
+                            msg_id,
+                            msg_chunk.tool_calls,
+                            additional_kwargs_delta,
+                        )

                elif isinstance(msg_chunk, ToolMessage):
                    if msg_id:
@@ -676,17 +704,45 @@ class DeerFlowClient:
                if msg_id and msg_id in streamed_ids:
                    if isinstance(msg, AIMessage):
                        _account_usage(msg_id, getattr(msg, "usage_metadata", None))
+                        additional_kwargs = self._serialize_additional_kwargs(msg)
+                        additional_kwargs_delta = _unsent_additional_kwargs(msg_id, additional_kwargs)
+                        if additional_kwargs_delta:
+                            # Metadata-only follow-up: ``messages-tuple`` has no
+                            # dedicated attribution event, so clients should
+                            # merge this empty-content AI event by message id
+                            # and ignore it for text rendering.
+                            yield self._ai_text_event(msg_id, "", None, additional_kwargs_delta)
                    continue

                if isinstance(msg, AIMessage):
                    counted_usage = _account_usage(msg_id, msg.usage_metadata)
+                    additional_kwargs = self._serialize_additional_kwargs(msg)
+                    sent_additional_kwargs = False

                    if msg.tool_calls:
-                        yield self._ai_tool_calls_event(msg_id, msg.tool_calls)
+                        additional_kwargs_delta = _unsent_additional_kwargs(msg_id, additional_kwargs)
+                        yield self._ai_tool_calls_event(
+                            msg_id,
+                            msg.tool_calls,
+                            additional_kwargs_delta,
+                        )
+                        sent_additional_kwargs = bool(additional_kwargs_delta)

                    text = self._extract_text(msg.content)
                    if text:
-                        yield self._ai_text_event(msg_id, text, counted_usage)
+                        additional_kwargs_delta = None if sent_additional_kwargs else _unsent_additional_kwargs(msg_id, additional_kwargs)
+                        yield self._ai_text_event(
+                            msg_id,
+                            text,
+                            counted_usage,
+                            additional_kwargs_delta,
+                        )
+                    elif msg_id:
+                        additional_kwargs_delta = None if sent_additional_kwargs else _unsent_additional_kwargs(msg_id, additional_kwargs)
+                        if not additional_kwargs_delta:
+                            continue
+                        # See the metadata-only follow-up convention above.
+                        yield self._ai_text_event(msg_id, "", None, additional_kwargs_delta)

                elif isinstance(msg, ToolMessage):
                    yield self._tool_message_event(msg)
@@ -775,8 +831,6 @@ class DeerFlowClient:
            Dict with "skills" key containing list of skill info dicts,
            matching the Gateway API ``SkillsListResponse`` schema.
        """
-        from deerflow.skills.loader import load_skills
-
        return {
            "skills": [
                {
@@ -786,7 +840,7 @@ class DeerFlowClient:
                    "category": s.category,
                    "enabled": s.enabled,
                }
-                for s in load_skills(self._app_config, enabled_only=enabled_only)
+                for s in get_or_new_skill_storage().load_skills(enabled_only=enabled_only)
            ]
        }

@@ -798,19 +852,19 @@ class DeerFlowClient:
        """
        from deerflow.agents.memory.updater import get_memory_data

-        return get_memory_data(self._app_config.memory, user_id=get_effective_user_id())
+        return get_memory_data(user_id=get_effective_user_id())

    def export_memory(self) -> dict:
        """Export current memory data for backup or transfer."""
        from deerflow.agents.memory.updater import get_memory_data

-        return get_memory_data(self._app_config.memory, user_id=get_effective_user_id())
+        return get_memory_data(user_id=get_effective_user_id())

    def import_memory(self, memory_data: dict) -> dict:
        """Import and persist full memory data."""
        from deerflow.agents.memory.updater import import_memory_data

-        return import_memory_data(self._app_config.memory, memory_data, user_id=get_effective_user_id())
+        return import_memory_data(memory_data, user_id=get_effective_user_id())

    def get_model(self, name: str) -> dict | None:
        """Get a specific model's configuration by name.
@@ -845,8 +899,8 @@ class DeerFlowClient:
            Dict with "mcp_servers" key mapping server name to config,
            matching the Gateway API ``McpConfigResponse`` schema.
        """
-        ext = self._app_config.extensions
-        return {"mcp_servers": {name: server.model_dump() for name, server in ext.mcp_servers.items()}}
+        config = get_extensions_config()
+        return {"mcp_servers": {name: server.model_dump() for name, server in config.mcp_servers.items()}}

    def update_mcp_config(self, mcp_servers: dict[str, dict]) -> dict:
        """Update MCP server configurations.
@@ -868,19 +922,18 @@ class DeerFlowClient:
        if config_path is None:
            raise FileNotFoundError("Cannot locate extensions_config.json. Set DEER_FLOW_EXTENSIONS_CONFIG_PATH or ensure it exists in the project root.")

-        current_ext = self._app_config.extensions
+        current_config = get_extensions_config()

        config_data = {
            "mcpServers": mcp_servers,
-            "skills": {name: {"enabled": skill.enabled} for name, skill in current_ext.skills.items()},
+            "skills": {name: {"enabled": skill.enabled} for name, skill in current_config.skills.items()},
        }

        self._atomic_write_json(config_path, config_data)

        self._agent = None
        self._agent_config_key = None
-        self._reload_config()
-        reloaded = self._app_config.extensions
+        reloaded = reload_extensions_config()
        return {"mcp_servers": {name: server.model_dump() for name, server in reloaded.mcp_servers.items()}}

    # ------------------------------------------------------------------
@@ -896,9 +949,9 @@ class DeerFlowClient:
        Returns:
            Skill info dict, or None if not found.
        """
-        from deerflow.skills.loader import load_skills
+        from deerflow.skills.storage import get_or_new_skill_storage

-        skill = next((s for s in load_skills(self._app_config, enabled_only=False) if s.name == name), None)
+        skill = next((s for s in get_or_new_skill_storage().load_skills(enabled_only=False) if s.name == name), None)
        if skill is None:
            return None
        return {
@@ -923,9 +976,9 @@ class DeerFlowClient:
            ValueError: If the skill is not found.
            OSError: If the config file cannot be written.
        """
-        from deerflow.skills.loader import load_skills
+        from deerflow.skills.storage import get_or_new_skill_storage

-        skills = load_skills(self._app_config, enabled_only=False)
+        skills = get_or_new_skill_storage().load_skills(enabled_only=False)
        skill = next((s for s in skills if s.name == name), None)
        if skill is None:
            raise ValueError(f"Skill '{name}' not found")
@@ -934,25 +987,21 @@ class DeerFlowClient:
        if config_path is None:
            raise FileNotFoundError("Cannot locate extensions_config.json. Set DEER_FLOW_EXTENSIONS_CONFIG_PATH or ensure it exists in the project root.")

-        # Do not mutate self._app_config (frozen value). Compose the new
-        # skills state in a fresh dict, write it to disk, and let _reload_config()
-        # below rebuild AppConfig from the updated file.
-        ext = self._app_config.extensions
-        new_skills = {n: {"enabled": sc.enabled} for n, sc in ext.skills.items()}
-        new_skills[name] = {"enabled": enabled}
+        extensions_config = get_extensions_config()
+        extensions_config.skills[name] = SkillStateConfig(enabled=enabled)

        config_data = {
-            "mcpServers": {n: s.model_dump() for n, s in ext.mcp_servers.items()},
-            "skills": new_skills,
+            "mcpServers": {n: s.model_dump() for n, s in extensions_config.mcp_servers.items()},
+            "skills": {n: {"enabled": sc.enabled} for n, sc in extensions_config.skills.items()},
        }

        self._atomic_write_json(config_path, config_data)

        self._agent = None
        self._agent_config_key = None
-        self._reload_config()
+        reload_extensions_config()

-        updated = next((s for s in load_skills(self._app_config, enabled_only=False) if s.name == name), None)
+        updated = next((s for s in get_or_new_skill_storage().load_skills(enabled_only=False) if s.name == name), None)
        if updated is None:
            raise RuntimeError(f"Skill '{name}' disappeared after update")
        return {
@@ -976,7 +1025,7 @@ class DeerFlowClient:
            FileNotFoundError: If the file does not exist.
            ValueError: If the file is invalid.
        """
-        return install_skill_from_archive(skill_path)
+        return get_or_new_skill_storage().install_skill_from_archive(skill_path)

    # ------------------------------------------------------------------
    # Public API — memory management
@@ -990,25 +1039,25 @@ class DeerFlowClient:
        """
        from deerflow.agents.memory.updater import reload_memory_data

-        return reload_memory_data(self._app_config.memory, user_id=get_effective_user_id())
+        return reload_memory_data(user_id=get_effective_user_id())

    def clear_memory(self) -> dict:
        """Clear all persisted memory data."""
        from deerflow.agents.memory.updater import clear_memory_data

-        return clear_memory_data(self._app_config.memory, user_id=get_effective_user_id())
+        return clear_memory_data(user_id=get_effective_user_id())

    def create_memory_fact(self, content: str, category: str = "context", confidence: float = 0.5) -> dict:
        """Create a single fact manually."""
        from deerflow.agents.memory.updater import create_memory_fact

-        return create_memory_fact(self._app_config.memory, content=content, category=category, confidence=confidence)
+        return create_memory_fact(content=content, category=category, confidence=confidence)

    def delete_memory_fact(self, fact_id: str) -> dict:
        """Delete a single fact from memory by fact id."""
        from deerflow.agents.memory.updater import delete_memory_fact

-        return delete_memory_fact(self._app_config.memory, fact_id)
+        return delete_memory_fact(fact_id)

    def update_memory_fact(
        self,
@@ -1021,7 +1070,6 @@ class DeerFlowClient:
        from deerflow.agents.memory.updater import update_memory_fact

        return update_memory_fact(
-            self._app_config.memory,
            fact_id=fact_id,
            content=content,
            category=category,
@@ -1034,7 +1082,9 @@ class DeerFlowClient:
        Returns:
            Memory config dict.
        """
-        config = self._app_config.memory
+        from deerflow.config.memory_config import get_memory_config
+
+        config = get_memory_config()
        return {
            "enabled": config.enabled,
            "storage_path": config.storage_path,
@@ -48,6 +48,12 @@ class AioSandbox(Sandbox):
            self._home_dir = context.home_dir
        return self._home_dir

+    # Default no_change_timeout for exec_command (seconds).  Matches the
+    # client-level timeout so that long-running commands which produce no
+    # output are not prematurely terminated by the sandbox's built-in 120 s
+    # default.
+    _DEFAULT_NO_CHANGE_TIMEOUT = 600
+
    def execute_command(self, command: str) -> str:
        """Execute a shell command in the sandbox.

@@ -66,13 +72,13 @@ class AioSandbox(Sandbox):
        """
        with self._lock:
            try:
-                result = self._client.shell.exec_command(command=command)
+                result = self._client.shell.exec_command(command=command, no_change_timeout=self._DEFAULT_NO_CHANGE_TIMEOUT)
                output = result.data.output if result.data else ""

                if output and _ERROR_OBSERVATION_SIGNATURE in output:
                    logger.warning("ErrorObservation detected in sandbox output, retrying with a fresh session")
                    fresh_id = str(uuid.uuid4())
-                    result = self._client.shell.exec_command(command=command, id=fresh_id)
+                    result = self._client.shell.exec_command(command=command, id=fresh_id, no_change_timeout=self._DEFAULT_NO_CHANGE_TIMEOUT)
                    output = result.data.output if result.data else ""

                return output if output else "(no output)"
@@ -108,7 +114,7 @@ class AioSandbox(Sandbox):
        """
        with self._lock:
            try:
-                result = self._client.shell.exec_command(command=f"find {shlex.quote(path)} -maxdepth {max_depth} -type f -o -type d 2>/dev/null | head -500")
+                result = self._client.shell.exec_command(command=f"find {shlex.quote(path)} -maxdepth {max_depth} -type f -o -type d 2>/dev/null | head -500", no_change_timeout=self._DEFAULT_NO_CHANGE_TIMEOUT)
                output = result.data.output if result.data else ""
                if output:
                    return [line.strip() for line in output.strip().split("\n") if line.strip()]
@@ -25,7 +25,7 @@ except ImportError:  # pragma: no cover - Windows fallback
    fcntl = None  # type: ignore[assignment]
    import msvcrt

-from deerflow.config.app_config import AppConfig
+from deerflow.config import get_app_config
 from deerflow.config.paths import VIRTUAL_PATH_PREFIX, get_paths
 from deerflow.runtime.user_context import get_effective_user_id
 from deerflow.sandbox.sandbox import Sandbox
@@ -80,6 +80,7 @@ class AioSandboxProvider(SandboxProvider):
        port: 8080                      # Base port for local containers
        container_prefix: deer-flow-sandbox
        idle_timeout: 600               # Idle timeout in seconds (0 to disable)
+        auto_restart: true              # Restart crashed containers automatically
        replicas: 3                     # Max concurrent sandbox containers (LRU eviction when exceeded)
        mounts:                         # Volume mounts for local containers
          - host_path: /path/on/host
@@ -90,8 +91,7 @@ class AioSandboxProvider(SandboxProvider):
          API_KEY: $MY_API_KEY
    """

-    def __init__(self, app_config: "AppConfig"):
-        self._app_config = app_config
+    def __init__(self):
        self._lock = threading.Lock()
        self._sandboxes: dict[str, AioSandbox] = {}  # sandbox_id -> AioSandbox instance
        self._sandbox_infos: dict[str, SandboxInfo] = {}  # sandbox_id -> SandboxInfo (for destroy)
@@ -160,16 +160,19 @@ class AioSandboxProvider(SandboxProvider):

    def _load_config(self) -> dict:
        """Load sandbox configuration from app config."""
-        sandbox_config = self._app_config.sandbox
+        config = get_app_config()
+        sandbox_config = config.sandbox

        idle_timeout = getattr(sandbox_config, "idle_timeout", None)
        replicas = getattr(sandbox_config, "replicas", None)
+        auto_restart = getattr(sandbox_config, "auto_restart", True)

        return {
            "image": sandbox_config.image or DEFAULT_IMAGE,
            "port": sandbox_config.port or DEFAULT_PORT,
            "container_prefix": sandbox_config.container_prefix or DEFAULT_CONTAINER_PREFIX,
            "idle_timeout": idle_timeout if idle_timeout is not None else DEFAULT_IDLE_TIMEOUT,
+            "auto_restart": auto_restart,
            "replicas": replicas if replicas is not None else DEFAULT_REPLICAS,
            "mounts": sandbox_config.mounts or [],
            "environment": self._resolve_env_vars(sandbox_config.environment or {}),
@@ -283,15 +286,17 @@ class AioSandboxProvider(SandboxProvider):
            (paths.host_acp_workspace_dir(thread_id, user_id=user_id), "/mnt/acp-workspace", True),
        ]

-    def _get_skills_mount(self) -> tuple[str, str, bool] | None:
+    @staticmethod
+    def _get_skills_mount() -> tuple[str, str, bool] | None:
        """Get the skills directory mount configuration.

        Mount source uses DEER_FLOW_HOST_SKILLS_PATH when running inside Docker (DooD)
        so the host Docker daemon can resolve the path.
        """
        try:
-            skills_path = self._app_config.skills.get_skills_path()
-            container_path = self._app_config.skills.container_path
+            config = get_app_config()
+            skills_path = config.skills.get_skills_path()
+            container_path = config.skills.container_path

            if skills_path.exists():
                # When running inside Docker with DooD, use host-side skills path.
@@ -606,18 +611,58 @@ class AioSandboxProvider(SandboxProvider):
    def get(self, sandbox_id: str) -> Sandbox | None:
        """Get a sandbox by ID. Updates last activity timestamp.

+        When ``auto_restart`` is enabled (the default), the container's liveness
+        is verified on each lookup.  If the underlying container has crashed, the
+        sandbox is evicted from all caches so that the next ``acquire()`` call will
+        transparently create a fresh container.
+
        Args:
            sandbox_id: The ID of the sandbox.

        Returns:
-            The sandbox instance if found, None otherwise.
+            The sandbox instance if found and alive, None otherwise.
        """
        with self._lock:
            sandbox = self._sandboxes.get(sandbox_id)
-            if sandbox is not None:
-                self._last_activity[sandbox_id] = time.time()
+            if sandbox is None:
+                return None
+            self._last_activity[sandbox_id] = time.time()
+            auto_restart = self._config.get("auto_restart", True)
+            info = self._sandbox_infos.get(sandbox_id) if auto_restart else None
+
+        if not info:
            return sandbox

+        if self._backend.is_alive(info):
+            return sandbox
+
+        info_to_destroy = None
+        with self._lock:
+            current_sandbox = self._sandboxes.get(sandbox_id)
+            current_info = self._sandbox_infos.get(sandbox_id)
+            if current_sandbox is None:
+                return None
+            if current_info is not info:
+                self._last_activity[sandbox_id] = time.time()
+                return current_sandbox
+
+            logger.warning(f"Sandbox {sandbox_id} container is not alive, evicting from cache for auto-restart")
+            self._sandboxes.pop(sandbox_id, None)
+            self._sandbox_infos.pop(sandbox_id, None)
+            self._last_activity.pop(sandbox_id, None)
+            self._warm_pool.pop(sandbox_id, None)
+            thread_ids = [tid for tid, sid in self._thread_sandboxes.items() if sid == sandbox_id]
+            for tid in thread_ids:
+                del self._thread_sandboxes[tid]
+            info_to_destroy = info
+
+        if info_to_destroy:
+            try:
+                self._backend.destroy(info_to_destroy)
+            except Exception as e:
+                logger.warning(f"Failed to cleanup dead sandbox {sandbox_id}: {e}")
+        return None
+
    def release(self, sandbox_id: str) -> None:
        """Release a sandbox from active use into the warm pool.

@@ -9,6 +9,7 @@ from __future__ import annotations
 import json
 import logging
 import os
+import shlex
 import subprocess
 from datetime import datetime

@@ -86,6 +87,88 @@ def _format_container_mount(runtime: str, host_path: str, container_path: str, r
    return ["-v", mount_spec]


+def _redact_container_command_for_log(cmd: list[str]) -> list[str]:
+    """Return a Docker/Container command with environment values redacted."""
+    redacted: list[str] = []
+    redact_next_env = False
+
+    for arg in cmd:
+        if redact_next_env:
+            if "=" in arg:
+                key = arg.split("=", 1)[0]
+                redacted.append(f"{key}=<redacted>" if key else "<redacted>")
+            else:
+                redacted.append(arg)
+            redact_next_env = False
+            continue
+
+        if arg in {"-e", "--env"}:
+            redacted.append(arg)
+            redact_next_env = True
+            continue
+
+        if arg.startswith("--env="):
+            value = arg.removeprefix("--env=")
+            if "=" in value:
+                key = value.split("=", 1)[0]
+                redacted.append(f"--env={key}=<redacted>" if key else "--env=<redacted>")
+            else:
+                redacted.append(arg)
+            continue
+
+        redacted.append(arg)
+
+    return redacted
+
+
+def _format_container_command_for_log(cmd: list[str]) -> str:
+    if os.name == "nt":
+        return subprocess.list2cmdline(cmd)
+    return shlex.join(cmd)
+
+
+def _normalize_sandbox_host(host: str) -> str:
+    return host.strip().lower()
+
+
+def _is_ipv6_loopback_sandbox_host(host: str) -> bool:
+    return _normalize_sandbox_host(host) in {"::1", "[::1]"}
+
+
+def _is_loopback_sandbox_host(host: str) -> bool:
+    return _normalize_sandbox_host(host) in {"", "localhost", "127.0.0.1", "::1", "[::1]"}
+
+
+def _resolve_docker_bind_host(sandbox_host: str | None = None, bind_host: str | None = None) -> str:
+    """Choose the host interface for legacy Docker ``-p`` sandbox publishing.
+
+    Bare-metal/local runs talk to sandboxes through localhost and should not
+    expose the sandbox HTTP API on every host interface.  Docker-outside-of-
+    Docker deployments commonly use ``host.docker.internal`` from another
+    container; keep their legacy broad bind unless operators opt into a
+    narrower bind with ``DEER_FLOW_SANDBOX_BIND_HOST``.  When operators choose
+    an IPv6 loopback sandbox host, bind Docker to IPv6 loopback as well so the
+    advertised sandbox URL and published socket use the same address family.
+    """
+    explicit_bind = bind_host if bind_host is not None else os.environ.get("DEER_FLOW_SANDBOX_BIND_HOST")
+    if explicit_bind is not None:
+        explicit_bind = explicit_bind.strip()
+        if explicit_bind:
+            logger.debug("Docker sandbox bind: %s (explicit bind host override)", explicit_bind)
+            return explicit_bind
+
+    host = sandbox_host if sandbox_host is not None else os.environ.get("DEER_FLOW_SANDBOX_HOST", "localhost")
+    if _is_ipv6_loopback_sandbox_host(host):
+        logger.debug("Docker sandbox bind: [::1] (IPv6 loopback sandbox host)")
+        return "[::1]"
+    if _is_loopback_sandbox_host(host):
+        logger.debug("Docker sandbox bind: 127.0.0.1 (loopback default)")
+        return "127.0.0.1"
+
+    logger.debug("Docker sandbox bind: 0.0.0.0 (non-loopback sandbox host compatibility)")
+    return "0.0.0.0"
+
+
 class LocalContainerBackend(SandboxBackend):
    """Backend that manages sandbox containers locally using Docker or Apple Container.

@@ -424,12 +507,17 @@ class LocalContainerBackend(SandboxBackend):
        if self._runtime == "docker":
            cmd.extend(["--security-opt", "seccomp=unconfined"])

+        if self._runtime == "docker":
+            port_mapping = f"{_resolve_docker_bind_host()}:{port}:8080"
+        else:
+            port_mapping = f"{port}:8080"
+
        cmd.extend(
            [
                "--rm",
                "-d",
                "-p",
-                f"{port}:8080",
+                port_mapping,
                "--name",
                container_name,
            ]
@@ -464,7 +552,8 @@ class LocalContainerBackend(SandboxBackend):

        cmd.append(self._image)

-        logger.info(f"Starting container using {self._runtime}: {' '.join(cmd)}")
+        log_cmd = _format_container_command_for_log(_redact_container_command_for_log(cmd))
+        logger.info(f"Starting container using {self._runtime}: {log_cmd}")

        try:
            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
@@ -84,8 +84,52 @@ class RemoteSandboxBackend(SandboxBackend):
        """
        return self._provisioner_discover(sandbox_id)

+    def list_running(self) -> list[SandboxInfo]:
+        """Return all sandboxes currently managed by the provisioner.
+
+        Calls ``GET /api/sandboxes`` so that ``AioSandboxProvider._reconcile_orphans()``
+        can adopt pods that were created by a previous process and were never
+        explicitly destroyed.
+        Without this, a process restart silently orphans all existing k8s Pods —
+        they stay running forever because the idle checker only
+        tracks in-process state.
+        """
+        return self._provisioner_list()
+
    # ── Provisioner API calls ─────────────────────────────────────────────

+    def _provisioner_list(self) -> list[SandboxInfo]:
+        """GET /api/sandboxes → list all running sandboxes."""
+        try:
+            resp = requests.get(f"{self._provisioner_url}/api/sandboxes", timeout=10)
+            resp.raise_for_status()
+            data = resp.json()
+            if not isinstance(data, dict):
+                logger.warning("Provisioner list_running returned non-dict payload: %r", type(data))
+                return []
+
+            sandboxes = data.get("sandboxes", [])
+            if not isinstance(sandboxes, list):
+                logger.warning("Provisioner list_running returned non-list sandboxes: %r", type(sandboxes))
+                return []
+
+            infos: list[SandboxInfo] = []
+            for sandbox in sandboxes:
+                if not isinstance(sandbox, dict):
+                    logger.warning("Provisioner list_running entry is not a dict: %r", type(sandbox))
+                    continue
+
+                sandbox_id = sandbox.get("sandbox_id")
+                sandbox_url = sandbox.get("sandbox_url")
+                if isinstance(sandbox_id, str) and sandbox_id and isinstance(sandbox_url, str) and sandbox_url:
+                    infos.append(SandboxInfo(sandbox_id=sandbox_id, sandbox_url=sandbox_url))
+
+            logger.info("Provisioner list_running: %d sandbox(es) found", len(infos))
+            return infos
+        except requests.RequestException as exc:
+            logger.warning("Provisioner list_running failed: %s", exc)
+            return []
+
    def _provisioner_create(self, thread_id: str, sandbox_id: str, extra_mounts: list[tuple[str, str, bool]] | None = None) -> SandboxInfo:
        """POST /api/sandboxes → create Pod + Service."""
        try:
@@ -5,9 +5,9 @@ Web Search Tool - Search the web using DuckDuckGo (no API key required).
 import json
 import logging

-from langchain.tools import ToolRuntime, tool
+from langchain.tools import tool

-from deerflow.config.deer_flow_context import resolve_context
+from deerflow.config import get_app_config

 logger = logging.getLogger(__name__)

@@ -55,7 +55,6 @@ def _search_text(
@tool("web_search", parse_docstring=True)
 def web_search_tool(
    query: str,
-    runtime: ToolRuntime,
    max_results: int = 5,
 ) -> str:
    """Search the web for information. Use this tool to find current information, news, articles, and facts from the internet.
@@ -64,11 +63,11 @@ def web_search_tool(
        query: Search keywords describing what you want to find. Be specific for better results.
        max_results: Maximum number of results to return. Default is 5.
    """
-    tool_config = resolve_context(runtime).app_config.get_tool_config("web_search")
+    config = get_app_config().get_tool_config("web_search")

    # Override max_results from config if set
-    if tool_config is not None and "max_results" in tool_config.model_extra:
-        max_results = tool_config.model_extra.get("max_results", max_results)
+    if config is not None and "max_results" in config.model_extra:
+        max_results = config.model_extra.get("max_results", max_results)

    results = _search_text(
        query=query,
@@ -1,39 +1,37 @@
 import json

 from exa_py import Exa
-from langchain.tools import ToolRuntime, tool
+from langchain.tools import tool

-from deerflow.config.app_config import AppConfig
-from deerflow.config.deer_flow_context import resolve_context
+from deerflow.config import get_app_config


-def _get_exa_client(app_config: AppConfig, tool_name: str = "web_search") -> Exa:
-    tool_config = app_config.get_tool_config(tool_name)
+def _get_exa_client(tool_name: str = "web_search") -> Exa:
+    config = get_app_config().get_tool_config(tool_name)
    api_key = None
-    if tool_config is not None and "api_key" in tool_config.model_extra:
-        api_key = tool_config.model_extra.get("api_key")
+    if config is not None and "api_key" in config.model_extra:
+        api_key = config.model_extra.get("api_key")
    return Exa(api_key=api_key)


@tool("web_search", parse_docstring=True)
-def web_search_tool(query: str, runtime: ToolRuntime) -> str:
+def web_search_tool(query: str) -> str:
    """Search the web.

    Args:
        query: The query to search for.
    """
    try:
-        app_config = resolve_context(runtime).app_config
-        tool_config = app_config.get_tool_config("web_search")
+        config = get_app_config().get_tool_config("web_search")
        max_results = 5
        search_type = "auto"
        contents_max_characters = 1000
-        if tool_config is not None:
-            max_results = tool_config.model_extra.get("max_results", max_results)
-            search_type = tool_config.model_extra.get("search_type", search_type)
-            contents_max_characters = tool_config.model_extra.get("contents_max_characters", contents_max_characters)
+        if config is not None:
+            max_results = config.model_extra.get("max_results", max_results)
+            search_type = config.model_extra.get("search_type", search_type)
+            contents_max_characters = config.model_extra.get("contents_max_characters", contents_max_characters)

-        client = _get_exa_client(app_config)
+        client = _get_exa_client()
        res = client.search(
            query,
            type=search_type,
@@ -56,7 +54,7 @@ def web_search_tool(query: str, runtime: ToolRuntime) -> str:


@tool("web_fetch", parse_docstring=True)
-def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
+def web_fetch_tool(url: str) -> str:
    """Fetch the contents of a web page at a given URL.
    Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
    This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
@@ -67,7 +65,7 @@ def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
        url: The URL to fetch the contents of.
    """
    try:
-        client = _get_exa_client(resolve_context(runtime).app_config, "web_fetch")
+        client = _get_exa_client("web_fetch")
        res = client.get_contents([url], text={"max_characters": 4096})

        if res.results:
@@ -1,35 +1,33 @@
 import json

 from firecrawl import FirecrawlApp
-from langchain.tools import ToolRuntime, tool
+from langchain.tools import tool

-from deerflow.config.app_config import AppConfig
-from deerflow.config.deer_flow_context import resolve_context
+from deerflow.config import get_app_config


-def _get_firecrawl_client(app_config: AppConfig, tool_name: str = "web_search") -> FirecrawlApp:
-    tool_config = app_config.get_tool_config(tool_name)
+def _get_firecrawl_client(tool_name: str = "web_search") -> FirecrawlApp:
+    config = get_app_config().get_tool_config(tool_name)
    api_key = None
-    if tool_config is not None and "api_key" in tool_config.model_extra:
-        api_key = tool_config.model_extra.get("api_key")
+    if config is not None and "api_key" in config.model_extra:
+        api_key = config.model_extra.get("api_key")
    return FirecrawlApp(api_key=api_key)  # type: ignore[arg-type]


@tool("web_search", parse_docstring=True)
-def web_search_tool(query: str, runtime: ToolRuntime) -> str:
+def web_search_tool(query: str) -> str:
    """Search the web.

    Args:
        query: The query to search for.
    """
    try:
-        app_config = resolve_context(runtime).app_config
-        tool_config = app_config.get_tool_config("web_search")
+        config = get_app_config().get_tool_config("web_search")
        max_results = 5
-        if tool_config is not None:
-            max_results = tool_config.model_extra.get("max_results", max_results)
+        if config is not None:
+            max_results = config.model_extra.get("max_results", max_results)

-        client = _get_firecrawl_client(app_config, "web_search")
+        client = _get_firecrawl_client("web_search")
        result = client.search(query, limit=max_results)

        # result.web contains list of SearchResultWeb objects
@@ -49,7 +47,7 @@ def web_search_tool(query: str, runtime: ToolRuntime) -> str:


@tool("web_fetch", parse_docstring=True)
-def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
+def web_fetch_tool(url: str) -> str:
    """Fetch the contents of a web page at a given URL.
    Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
    This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
@@ -60,8 +58,7 @@ def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
        url: The URL to fetch the contents of.
    """
    try:
-        app_config = resolve_context(runtime).app_config
-        client = _get_firecrawl_client(app_config, "web_fetch")
+        client = _get_firecrawl_client("web_fetch")
        result = client.scrape(url, formats=["markdown"])

        markdown_content = result.markdown or ""
@@ -5,9 +5,9 @@ Image Search Tool - Search images using DuckDuckGo for reference in image genera
 import json
 import logging

-from langchain.tools import ToolRuntime, tool
+from langchain.tools import tool

-from deerflow.config.deer_flow_context import resolve_context
+from deerflow.config import get_app_config

 logger = logging.getLogger(__name__)

@@ -77,7 +77,6 @@ def _search_images(
@tool("image_search", parse_docstring=True)
 def image_search_tool(
    query: str,
-    runtime: ToolRuntime,
    max_results: int = 5,
    size: str | None = None,
    type_image: str | None = None,
@@ -100,11 +99,11 @@ def image_search_tool(
        type_image: Image type filter. Options: "photo", "clipart", "gif", "transparent", "line". Use "photo" for realistic references.
        layout: Layout filter. Options: "Square", "Tall", "Wide". Choose based on your generation needs.
    """
-    tool_config = resolve_context(runtime).app_config.get_tool_config("image_search")
+    config = get_app_config().get_tool_config("image_search")

    # Override max_results from config if set
-    if tool_config is not None and "max_results" in tool_config.model_extra:
-        max_results = tool_config.model_extra.get("max_results", max_results)
+    if config is not None and "max_results" in config.model_extra:
+        max_results = config.model_extra.get("max_results", max_results)

    results = _search_images(
        query=query,
@@ -1,7 +1,6 @@
-from langchain.tools import ToolRuntime, tool
+from langchain.tools import tool

-from deerflow.config.app_config import AppConfig
-from deerflow.config.deer_flow_context import resolve_context
+from deerflow.config import get_app_config
 from deerflow.utils.readability import ReadabilityExtractor

 from .infoquest_client import InfoQuestClient
@@ -9,13 +8,13 @@ from .infoquest_client import InfoQuestClient
 readability_extractor = ReadabilityExtractor()


-def _get_infoquest_client(app_config: AppConfig) -> InfoQuestClient:
-    search_config = app_config.get_tool_config("web_search")
+def _get_infoquest_client() -> InfoQuestClient:
+    search_config = get_app_config().get_tool_config("web_search")
    search_time_range = -1
    if search_config is not None and "search_time_range" in search_config.model_extra:
        search_time_range = search_config.model_extra.get("search_time_range")

-    fetch_config = app_config.get_tool_config("web_fetch")
+    fetch_config = get_app_config().get_tool_config("web_fetch")
    fetch_time = -1
    if fetch_config is not None and "fetch_time" in fetch_config.model_extra:
        fetch_time = fetch_config.model_extra.get("fetch_time")
@@ -26,7 +25,7 @@ def _get_infoquest_client(app_config: AppConfig) -> InfoQuestClient:
    if fetch_config is not None and "navigation_timeout" in fetch_config.model_extra:
        navigation_timeout = fetch_config.model_extra.get("navigation_timeout")

-    image_search_config = app_config.get_tool_config("image_search")
+    image_search_config = get_app_config().get_tool_config("image_search")
    image_search_time_range = -1
    if image_search_config is not None and "image_search_time_range" in image_search_config.model_extra:
        image_search_time_range = image_search_config.model_extra.get("image_search_time_range")
@@ -45,18 +44,19 @@ def _get_infoquest_client(app_config: AppConfig) -> InfoQuestClient:


@tool("web_search", parse_docstring=True)
-def web_search_tool(query: str, runtime: ToolRuntime) -> str:
+def web_search_tool(query: str) -> str:
    """Search the web.

    Args:
        query: The query to search for.
    """
-    client = _get_infoquest_client(resolve_context(runtime).app_config)
+
+    client = _get_infoquest_client()
    return client.web_search(query)


@tool("web_fetch", parse_docstring=True)
-def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
+def web_fetch_tool(url: str) -> str:
    """Fetch the contents of a web page at a given URL.
    Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
    This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
@@ -66,7 +66,7 @@ def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
    Args:
        url: The URL to fetch the contents of.
    """
-    client = _get_infoquest_client(resolve_context(runtime).app_config)
+    client = _get_infoquest_client()
    result = client.fetch(url)
    if result.startswith("Error: "):
        return result
@@ -75,7 +75,7 @@ def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:


@tool("image_search", parse_docstring=True)
-def image_search_tool(query: str, runtime: ToolRuntime) -> str:
+def image_search_tool(query: str) -> str:
    """Search for images online. Use this tool BEFORE image generation to find reference images for characters, portraits, objects, scenes, or any content requiring visual accuracy.

    **When to use:**
@@ -89,5 +89,5 @@ def image_search_tool(query: str, runtime: ToolRuntime) -> str:
    Args:
        query: The query to search for images.
    """
-    client = _get_infoquest_client(resolve_context(runtime).app_config)
+    client = _get_infoquest_client()
    return client.image_search(query)
@@ -1,16 +1,16 @@
 import asyncio

-from langchain.tools import ToolRuntime, tool
+from langchain.tools import tool

 from deerflow.community.jina_ai.jina_client import JinaClient
-from deerflow.config.deer_flow_context import resolve_context
+from deerflow.config import get_app_config
 from deerflow.utils.readability import ReadabilityExtractor

 readability_extractor = ReadabilityExtractor()


@tool("web_fetch", parse_docstring=True)
-async def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
+async def web_fetch_tool(url: str) -> str:
    """Fetch the contents of a web page at a given URL.
    Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
    This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
@@ -22,9 +22,9 @@ async def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
    """
    jina_client = JinaClient()
    timeout = 10
-    tool_config = resolve_context(runtime).app_config.get_tool_config("web_fetch")
-    if tool_config is not None and "timeout" in tool_config.model_extra:
-        timeout = tool_config.model_extra.get("timeout")
+    config = get_app_config().get_tool_config("web_fetch")
+    if config is not None and "timeout" in config.model_extra:
+        timeout = config.model_extra.get("timeout")
    html_content = await jina_client.crawl(url, return_format="html", timeout=timeout)
    if isinstance(html_content, str) and html_content.startswith("Error:"):
        return html_content
@@ -0,0 +1,3 @@
+from .tools import web_search_tool
+
+__all__ = ["web_search_tool"]
@@ -0,0 +1,95 @@
+"""
+Web Search Tool - Search the web using Serper (Google Search API).
+
+Serper provides real-time Google Search results via a JSON API.
+An API key is required. Sign up at https://serper.dev to get one.
+"""
+
+import json
+import logging
+import os
+
+import httpx
+from langchain.tools import tool
+
+from deerflow.config import get_app_config
+
+logger = logging.getLogger(__name__)
+
+_SERPER_ENDPOINT = "https://google.serper.dev/search"
+_api_key_warned = False
+
+
+def _get_api_key() -> str | None:
+    config = get_app_config().get_tool_config("web_search")
+    if config is not None:
+        api_key = config.model_extra.get("api_key")
+        if isinstance(api_key, str) and api_key.strip():
+            return api_key
+    return os.getenv("SERPER_API_KEY")
+
+
+@tool("web_search", parse_docstring=True)
+def web_search_tool(query: str, max_results: int = 5) -> str:
+    """Search the web for information using Google Search via Serper.
+
+    Args:
+        query: Search keywords describing what you want to find. Be specific for better results.
+        max_results: Maximum number of search results to return. Default is 5.
+    """
+    global _api_key_warned
+
+    config = get_app_config().get_tool_config("web_search")
+    if config is not None and "max_results" in config.model_extra:
+        max_results = config.model_extra.get("max_results", max_results)
+
+    api_key = _get_api_key()
+    if not api_key:
+        if not _api_key_warned:
+            _api_key_warned = True
+            logger.warning("Serper API key is not set. Set SERPER_API_KEY in your environment or provide api_key in config.yaml. Sign up at https://serper.dev")
+        return json.dumps(
+            {"error": "SERPER_API_KEY is not configured", "query": query},
+            ensure_ascii=False,
+        )
+
+    headers = {
+        "X-API-KEY": api_key,
+        "Content-Type": "application/json",
+    }
+    payload = {"q": query, "num": max_results}
+
+    try:
+        with httpx.Client(timeout=30) as client:
+            response = client.post(_SERPER_ENDPOINT, headers=headers, json=payload)
+        response.raise_for_status()
+        data = response.json()
+    except httpx.HTTPStatusError as e:
+        logger.error(f"Serper API returned HTTP {e.response.status_code}: {e.response.text}")
+        return json.dumps(
+            {"error": f"Serper API error: HTTP {e.response.status_code}", "query": query},
+            ensure_ascii=False,
+        )
+    except Exception as e:
+        logger.error(f"Serper search failed: {type(e).__name__}: {e}")
+        return json.dumps({"error": str(e), "query": query}, ensure_ascii=False)
+
+    organic = data.get("organic", [])
+    if not organic:
+        return json.dumps({"error": "No results found", "query": query}, ensure_ascii=False)
+
+    normalized_results = [
+        {
+            "title": r.get("title", ""),
+            "url": r.get("link", ""),
+            "content": r.get("snippet", ""),
+        }
+        for r in organic[:max_results]
+    ]
+
+    output = {
+        "query": query,
+        "total_results": len(normalized_results),
+        "results": normalized_results,
+    }
+    return json.dumps(output, indent=2, ensure_ascii=False)
@@ -1,34 +1,32 @@
 import json

-from langchain.tools import ToolRuntime, tool
+from langchain.tools import tool
 from tavily import TavilyClient

-from deerflow.config.app_config import AppConfig
-from deerflow.config.deer_flow_context import resolve_context
+from deerflow.config import get_app_config


-def _get_tavily_client(app_config: AppConfig) -> TavilyClient:
-    tool_config = app_config.get_tool_config("web_search")
+def _get_tavily_client() -> TavilyClient:
+    config = get_app_config().get_tool_config("web_search")
    api_key = None
-    if tool_config is not None and "api_key" in tool_config.model_extra:
-        api_key = tool_config.model_extra.get("api_key")
+    if config is not None and "api_key" in config.model_extra:
+        api_key = config.model_extra.get("api_key")
    return TavilyClient(api_key=api_key)


@tool("web_search", parse_docstring=True)
-def web_search_tool(query: str, runtime: ToolRuntime) -> str:
+def web_search_tool(query: str) -> str:
    """Search the web.

    Args:
        query: The query to search for.
    """
-    app_config = resolve_context(runtime).app_config
-    tool_config = app_config.get_tool_config("web_search")
+    config = get_app_config().get_tool_config("web_search")
    max_results = 5
-    if tool_config is not None and "max_results" in tool_config.model_extra:
-        max_results = tool_config.model_extra.get("max_results")
+    if config is not None and "max_results" in config.model_extra:
+        max_results = config.model_extra.get("max_results")

-    client = _get_tavily_client(app_config)
+    client = _get_tavily_client()
    res = client.search(query, max_results=max_results)
    normalized_results = [
        {
@@ -43,7 +41,7 @@ def web_search_tool(query: str, runtime: ToolRuntime) -> str:


@tool("web_fetch", parse_docstring=True)
-def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
+def web_fetch_tool(url: str) -> str:
    """Fetch the contents of a web page at a given URL.
    Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
    This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
@@ -53,8 +51,7 @@ def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
    Args:
        url: The URL to fetch the contents of.
    """
-    app_config = resolve_context(runtime).app_config
-    client = _get_tavily_client(app_config)
+    client = _get_tavily_client()
    res = client.extract([url])
    if "failed_results" in res and len(res["failed_results"]) > 0:
        return f"Error: {res['failed_results'][0]['error']}"
@@ -1,6 +1,7 @@
-from .app_config import AppConfig
-from .extensions_config import ExtensionsConfig
-from .memory_config import MemoryConfig
+from .app_config import get_app_config
+from .extensions_config import ExtensionsConfig, get_extensions_config
+from .loop_detection_config import LoopDetectionConfig
+from .memory_config import MemoryConfig, get_memory_config
 from .paths import Paths, get_paths
 from .skill_evolution_config import SkillEvolutionConfig
 from .skills_config import SkillsConfig
@@ -13,16 +14,19 @@ from .tracing_config import (
 )

 __all__ = [
-    "AppConfig",
-    "ExtensionsConfig",
-    "MemoryConfig",
-    "Paths",
+    "get_app_config",
    "SkillEvolutionConfig",
-    "SkillsConfig",
-    "get_enabled_tracing_providers",
-    "get_explicitly_enabled_tracing_providers",
+    "Paths",
    "get_paths",
+    "SkillsConfig",
+    "ExtensionsConfig",
+    "get_extensions_config",
+    "LoopDetectionConfig",
+    "MemoryConfig",
+    "get_memory_config",
    "get_tracing_config",
+    "get_explicitly_enabled_tracing_providers",
+    "get_enabled_tracing_providers",
    "is_tracing_enabled",
    "validate_enabled_tracing_providers",
 ]
@@ -1,13 +1,16 @@
 """ACP (Agent Client Protocol) agent configuration loaded from config.yaml."""

-from pydantic import BaseModel, ConfigDict, Field
+import logging
+from collections.abc import Mapping
+
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)


 class ACPAgentConfig(BaseModel):
    """Configuration for a single ACP-compatible agent."""

-    model_config = ConfigDict(frozen=True)
-
    command: str = Field(description="Command to launch the ACP agent subprocess")
    args: list[str] = Field(default_factory=list, description="Additional command arguments")
    env: dict[str, str] = Field(default_factory=dict, description="Environment variables to inject into the agent subprocess. Values starting with $ are resolved from host environment variables.")
@@ -21,3 +24,28 @@ class ACPAgentConfig(BaseModel):
            "are denied — the agent must be configured to operate without requesting permissions."
        ),
    )
+
+
+_acp_agents: dict[str, ACPAgentConfig] = {}
+
+
+def get_acp_agents() -> dict[str, ACPAgentConfig]:
+    """Get the currently configured ACP agents.
+
+    Returns:
+        Mapping of agent name -> ACPAgentConfig.  Empty dict if no ACP agents are configured.
+    """
+    return _acp_agents
+
+
+def load_acp_config_from_dict(config_dict: Mapping[str, Mapping[str, object]] | None) -> None:
+    """Load ACP agent configuration from a dictionary (typically from config.yaml).
+
+    Args:
+        config_dict: Mapping of agent name -> config fields.
+    """
+    global _acp_agents
+    if config_dict is None:
+        config_dict = {}
+    _acp_agents = {name: ACPAgentConfig(**cfg) for name, cfg in config_dict.items()}
+    logger.info("ACP config loaded: %d agent(s): %s", len(_acp_agents), list(_acp_agents.keys()))
@@ -1,14 +1,32 @@
 """Configuration for the custom agents management API."""

-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, Field


 class AgentsApiConfig(BaseModel):
    """Configuration for custom-agent and user-profile management routes."""

-    model_config = ConfigDict(frozen=True)
-
    enabled: bool = Field(
        default=False,
        description=("Whether to expose the custom-agent management API over HTTP. When disabled, the gateway rejects read/write access to custom agent SOUL.md, config, and USER.md prompt-management routes."),
    )
+
+
+_agents_api_config: AgentsApiConfig = AgentsApiConfig()
+
+
+def get_agents_api_config() -> AgentsApiConfig:
+    """Get the current agents API configuration."""
+    return _agents_api_config
+
+
+def set_agents_api_config(config: AgentsApiConfig) -> None:
+    """Set the agents API configuration."""
+    global _agents_api_config
+    _agents_api_config = config
+
+
+def load_agents_api_config_from_dict(config_dict: dict) -> None:
+    """Load agents API configuration from a dictionary."""
+    global _agents_api_config
+    _agents_api_config = AgentsApiConfig(**config_dict)
@@ -1,13 +1,22 @@
-"""Configuration and loaders for custom agents."""
+"""Configuration and loaders for custom agents.
+
+Custom agents are stored per-user under ``{base_dir}/users/{user_id}/agents/{name}/``.
+A legacy shared layout at ``{base_dir}/agents/{name}/`` is still readable so that
+installations that pre-date user isolation continue to work until they run the
+``scripts/migrate_user_isolation.py`` migration. New writes always target the
+per-user layout.
+"""

 import logging
 import re
+from pathlib import Path
 from typing import Any

 import yaml
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel

 from deerflow.config.paths import get_paths
+from deerflow.runtime.user_context import get_effective_user_id

 logger = logging.getLogger(__name__)

@@ -29,8 +38,6 @@ def validate_agent_name(name: str | None) -> str | None:
 class AgentConfig(BaseModel):
    """Configuration for a custom agent."""

-    model_config = ConfigDict(frozen=True)
-
    name: str
    description: str = ""
    model: str | None = None
@@ -42,14 +49,47 @@ class AgentConfig(BaseModel):
    skills: list[str] | None = None


-def load_agent_config(name: str | None) -> AgentConfig | None:
+def resolve_agent_dir(name: str, *, user_id: str | None = None) -> Path:
+    """Return the on-disk directory for an agent, preferring the per-user layout.
+
+    Resolution order:
+    1. ``{base_dir}/users/{user_id}/agents/{name}/`` (per-user, current layout).
+    2. ``{base_dir}/agents/{name}/`` (legacy shared layout — read-only fallback).
+
+    If neither exists, the per-user path is returned so callers that intend to
+    create the agent write into the new layout.
+
+    Args:
+        name: Validated agent name.
+        user_id: Owner of the agent. Defaults to the effective user from the
+            request context (or ``"default"`` in no-auth mode).
+    """
+    paths = get_paths()
+    effective_user = user_id or get_effective_user_id()
+    user_path = paths.user_agent_dir(effective_user, name)
+    if user_path.exists():
+        return user_path
+
+    legacy_path = paths.agent_dir(name)
+    if legacy_path.exists():
+        return legacy_path
+
+    return user_path
+
+
+def load_agent_config(name: str | None, *, user_id: str | None = None) -> AgentConfig | None:
    """Load the custom or default agent's config from its directory.

+    Reads from the per-user layout first; falls back to the legacy shared layout
+    for installations that have not yet been migrated.
+
    Args:
        name: The agent name.
+        user_id: Owner of the agent. Defaults to the effective user from the
+            current request context.

    Returns:
-        AgentConfig instance.
+        AgentConfig instance, or ``None`` if ``name`` is ``None``.

    Raises:
        FileNotFoundError: If the agent directory or config.yaml does not exist.
@@ -60,7 +100,7 @@ def load_agent_config(name: str | None) -> AgentConfig | None:
        return None

    name = validate_agent_name(name)
-    agent_dir = get_paths().agent_dir(name)
+    agent_dir = resolve_agent_dir(name, user_id=user_id)
    config_file = agent_dir / "config.yaml"

    if not agent_dir.exists():
@@ -86,7 +126,7 @@ def load_agent_config(name: str | None) -> AgentConfig | None:
    return AgentConfig(**data)


-def load_agent_soul(agent_name: str | None) -> str | None:
+def load_agent_soul(agent_name: str | None, *, user_id: str | None = None) -> str | None:
    """Read the SOUL.md file for a custom agent, if it exists.

    SOUL.md defines the agent's personality, values, and behavioral guardrails.
@@ -94,11 +134,16 @@ def load_agent_soul(agent_name: str | None) -> str | None:

    Args:
        agent_name: The name of the agent or None for the default agent.
+        user_id: Owner of the agent. Defaults to the effective user from the
+            current request context.

    Returns:
        The SOUL.md content as a string, or None if the file does not exist.
    """
-    agent_dir = get_paths().agent_dir(agent_name) if agent_name else get_paths().base_dir
+    if agent_name:
+        agent_dir = resolve_agent_dir(agent_name, user_id=user_id)
+    else:
+        agent_dir = get_paths().base_dir
    soul_path = agent_dir / SOUL_FILENAME
    if not soul_path.exists():
        return None
@@ -106,32 +151,50 @@ def load_agent_soul(agent_name: str | None) -> str | None:
    return content or None


-def list_custom_agents() -> list[AgentConfig]:
+def list_custom_agents(*, user_id: str | None = None) -> list[AgentConfig]:
    """Scan the agents directory and return all valid custom agents.

+    Returns the union of agents in the per-user layout and the legacy shared
+    layout, so that pre-migration installations remain visible until they are
+    migrated. Per-user entries shadow legacy entries with the same name.
+
+    Args:
+        user_id: Owner whose agents to list. Defaults to the effective user
+            from the current request context.
+
    Returns:
        List of AgentConfig for each valid agent directory found.
    """
-    agents_dir = get_paths().agents_dir
-
-    if not agents_dir.exists():
-        return []
+    paths = get_paths()
+    effective_user = user_id or get_effective_user_id()

+    seen: set[str] = set()
    agents: list[AgentConfig] = []

-    for entry in sorted(agents_dir.iterdir()):
-        if not entry.is_dir():
+    user_root = paths.user_agents_dir(effective_user)
+    legacy_root = paths.agents_dir
+
+    for root in (user_root, legacy_root):
+        if not root.exists():
            continue
+        for entry in sorted(root.iterdir()):
+            if not entry.is_dir():
+                continue
+            if entry.name in seen:
+                continue
+            config_file = entry / "config.yaml"
+            if not config_file.exists():
+                logger.debug(f"Skipping {entry.name}: no config.yaml")
+                continue

-        config_file = entry / "config.yaml"
-        if not config_file.exists():
-            logger.debug(f"Skipping {entry.name}: no config.yaml")
-            continue
-
-        try:
-            agent_cfg = load_agent_config(entry.name)
-            agents.append(agent_cfg)
-        except Exception as e:
-            logger.warning(f"Skipping agent '{entry.name}': {e}")
+            try:
+                agent_cfg = load_agent_config(entry.name, user_id=effective_user)
+                if agent_cfg is None:
+                    continue
+                agents.append(agent_cfg)
+                seen.add(entry.name)
+            except Exception as e:
+                logger.warning(f"Skipping agent '{entry.name}': {e}")

+    agents.sort(key=lambda a: a.name)
    return agents
@@ -1,7 +1,7 @@
-from __future__ import annotations
-
 import logging
 import os
+from collections.abc import Mapping
+from contextvars import ContextVar
 from pathlib import Path
 from typing import Any, Self

@@ -9,25 +9,27 @@ import yaml
 from dotenv import load_dotenv
 from pydantic import BaseModel, ConfigDict, Field

-from deerflow.config.acp_config import ACPAgentConfig
-from deerflow.config.agents_api_config import AgentsApiConfig
-from deerflow.config.checkpointer_config import CheckpointerConfig
+from deerflow.config.acp_config import ACPAgentConfig, load_acp_config_from_dict
+from deerflow.config.agents_api_config import AgentsApiConfig, load_agents_api_config_from_dict
+from deerflow.config.checkpointer_config import CheckpointerConfig, load_checkpointer_config_from_dict
 from deerflow.config.database_config import DatabaseConfig
 from deerflow.config.extensions_config import ExtensionsConfig
-from deerflow.config.guardrails_config import GuardrailsConfig
-from deerflow.config.memory_config import MemoryConfig
+from deerflow.config.guardrails_config import GuardrailsConfig, load_guardrails_config_from_dict
+from deerflow.config.loop_detection_config import LoopDetectionConfig
+from deerflow.config.memory_config import MemoryConfig, load_memory_config_from_dict
 from deerflow.config.model_config import ModelConfig
 from deerflow.config.run_events_config import RunEventsConfig
+from deerflow.config.runtime_paths import existing_project_file
 from deerflow.config.sandbox_config import SandboxConfig
 from deerflow.config.skill_evolution_config import SkillEvolutionConfig
 from deerflow.config.skills_config import SkillsConfig
-from deerflow.config.stream_bridge_config import StreamBridgeConfig
-from deerflow.config.subagents_config import SubagentsAppConfig
-from deerflow.config.summarization_config import SummarizationConfig
-from deerflow.config.title_config import TitleConfig
+from deerflow.config.stream_bridge_config import StreamBridgeConfig, load_stream_bridge_config_from_dict
+from deerflow.config.subagents_config import SubagentsAppConfig, load_subagents_config_from_dict
+from deerflow.config.summarization_config import SummarizationConfig, load_summarization_config_from_dict
+from deerflow.config.title_config import TitleConfig, load_title_config_from_dict
 from deerflow.config.token_usage_config import TokenUsageConfig
 from deerflow.config.tool_config import ToolConfig, ToolGroupConfig
-from deerflow.config.tool_search_config import ToolSearchConfig
+from deerflow.config.tool_search_config import ToolSearchConfig, load_tool_search_config_from_dict

 load_dotenv()

@@ -47,17 +49,41 @@ class CircuitBreakerConfig(BaseModel):
    recovery_timeout_sec: int = Field(default=60, description="Time in seconds before attempting to recover the circuit")


-def _default_config_candidates() -> tuple[Path, ...]:
-    """Return deterministic config.yaml locations without relying on cwd."""
+def _legacy_config_candidates() -> tuple[Path, ...]:
+    """Return source-tree config.yaml locations for monorepo compatibility."""
    backend_dir = Path(__file__).resolve().parents[4]
    repo_root = backend_dir.parent
    return (backend_dir / "config.yaml", repo_root / "config.yaml")


+def logging_level_from_config(name: str | None) -> int:
+    """Map ``config.yaml`` ``log_level`` string to a :mod:`logging` level constant."""
+    mapping = logging.getLevelNamesMapping()
+    return mapping.get((name or "info").strip().upper(), logging.INFO)
+
+
+def apply_logging_level(name: str | None) -> None:
+    """Resolve *name* to a logging level and apply it to the ``deerflow``/``app`` logger hierarchies.
+
+    Only the ``deerflow`` and ``app`` logger levels are changed so that
+    third-party library verbosity (e.g. uvicorn, sqlalchemy) is not
+    affected. Root handler levels are lowered (never raised) so that
+    messages from the configured loggers can propagate through without
+    being filtered, while preserving handler thresholds that may be
+    intentionally restrictive for third-party log output.
+    """
+    level = logging_level_from_config(name)
+    for logger_name in ("deerflow", "app"):
+        logging.getLogger(logger_name).setLevel(level)
+    for handler in logging.root.handlers:
+        if level < handler.level:
+            handler.setLevel(level)
+
+
 class AppConfig(BaseModel):
    """Config for the DeerFlow application"""

-    log_level: str = Field(default="info", description="Logging level for deerflow modules (debug/info/warning/error)")
+    log_level: str = Field(default="info", description="Logging level for deerflow and app modules (debug/info/warning/error); third-party libraries are not affected")
    token_usage: TokenUsageConfig = Field(default_factory=TokenUsageConfig, description="Token usage tracking configuration")
    models: list[ModelConfig] = Field(default_factory=list, description="Available models")
    sandbox: SandboxConfig = Field(description="Sandbox configuration")
@@ -71,15 +97,16 @@ class AppConfig(BaseModel):
    summarization: SummarizationConfig = Field(default_factory=SummarizationConfig, description="Conversation summarization configuration")
    memory: MemoryConfig = Field(default_factory=MemoryConfig, description="Memory subsystem configuration")
    agents_api: AgentsApiConfig = Field(default_factory=AgentsApiConfig, description="Custom-agent management API configuration")
+    acp_agents: dict[str, ACPAgentConfig] = Field(default_factory=dict, description="ACP-compatible agent configuration")
    subagents: SubagentsAppConfig = Field(default_factory=SubagentsAppConfig, description="Subagent runtime configuration")
    guardrails: GuardrailsConfig = Field(default_factory=GuardrailsConfig, description="Guardrail middleware configuration")
    circuit_breaker: CircuitBreakerConfig = Field(default_factory=CircuitBreakerConfig, description="LLM circuit breaker configuration")
+    loop_detection: LoopDetectionConfig = Field(default_factory=LoopDetectionConfig, description="Loop detection middleware configuration")
+    model_config = ConfigDict(extra="allow")
    database: DatabaseConfig = Field(default_factory=DatabaseConfig, description="Unified database backend configuration")
    run_events: RunEventsConfig = Field(default_factory=RunEventsConfig, description="Run event storage configuration")
-    model_config = ConfigDict(extra="allow", frozen=True)
    checkpointer: CheckpointerConfig | None = Field(default=None, description="Checkpointer configuration")
    stream_bridge: StreamBridgeConfig | None = Field(default=None, description="Stream bridge configuration")
-    acp_agents: dict[str, ACPAgentConfig] = Field(default_factory=dict, description="ACP agent configurations keyed by agent name")

    @classmethod
    def resolve_config_path(cls, config_path: str | None = None) -> Path:
@@ -88,7 +115,8 @@ class AppConfig(BaseModel):
        Priority:
        1. If provided `config_path` argument, use it.
        2. If provided `DEER_FLOW_CONFIG_PATH` environment variable, use it.
-        3. Otherwise, search deterministic backend/repository-root defaults from `_default_config_candidates()`.
+        3. Otherwise, search the caller project root.
+        4. Finally, search legacy backend/repository-root defaults for monorepo compatibility.
        """
        if config_path:
            path = Path(config_path)
@@ -101,10 +129,14 @@ class AppConfig(BaseModel):
                raise FileNotFoundError(f"Config file specified by environment variable `DEER_FLOW_CONFIG_PATH` not found at {path}")
            return path
        else:
-            for path in _default_config_candidates():
+            project_config = existing_project_file(("config.yaml",))
+            if project_config is not None:
+                return project_config
+
+            for path in _legacy_config_candidates():
                if path.exists():
                    return path
-            raise FileNotFoundError("`config.yaml` file not found at the default backend or repository root locations")
+            raise FileNotFoundError("`config.yaml` file not found in the project root or legacy backend/repository root locations")

    @classmethod
    def from_file(cls, config_path: str | None = None) -> Self:
@@ -128,13 +160,54 @@ class AppConfig(BaseModel):
        config_data = cls.resolve_env_variables(config_data)
        cls._apply_database_defaults(config_data)

+        # Load circuit_breaker config if present
+        if "circuit_breaker" in config_data:
+            config_data["circuit_breaker"] = config_data["circuit_breaker"]
+
        # Load extensions config separately (it's in a different file)
        extensions_config = ExtensionsConfig.from_file()
        config_data["extensions"] = extensions_config.model_dump()

        result = cls.model_validate(config_data)
+        acp_agents = cls._validate_acp_agents(config_data.get("acp_agents", {}))
+        cls._apply_singleton_configs(result, acp_agents)
        return result

+    @classmethod
+    def _validate_acp_agents(
+        cls,
+        config_data: Mapping[str, Mapping[str, object]] | None,
+    ) -> dict[str, ACPAgentConfig]:
+        if config_data is None:
+            config_data = {}
+        return {name: ACPAgentConfig(**cfg) for name, cfg in config_data.items()}
+
+    @classmethod
+    def _apply_singleton_configs(cls, config: Self, acp_agents: dict[str, ACPAgentConfig]) -> None:
+        from deerflow.config.checkpointer_config import get_checkpointer_config
+
+        previous_checkpointer_config = get_checkpointer_config()
+
+        load_title_config_from_dict(config.title.model_dump())
+        load_summarization_config_from_dict(config.summarization.model_dump())
+        load_memory_config_from_dict(config.memory.model_dump())
+        load_agents_api_config_from_dict(config.agents_api.model_dump())
+        load_subagents_config_from_dict(config.subagents.model_dump())
+        load_tool_search_config_from_dict(config.tool_search.model_dump())
+        load_guardrails_config_from_dict(config.guardrails.model_dump())
+        load_checkpointer_config_from_dict(config.checkpointer.model_dump() if config.checkpointer is not None else None)
+        load_stream_bridge_config_from_dict(config.stream_bridge.model_dump() if config.stream_bridge is not None else None)
+        load_acp_config_from_dict({name: agent.model_dump() for name, agent in acp_agents.items()})
+
+        if previous_checkpointer_config != config.checkpointer:
+            # These runtime singletons derive their backend from checkpointer config.
+            # Keep imports local to avoid cycles: both providers import get_app_config.
+            from deerflow.runtime.checkpointer import reset_checkpointer
+            from deerflow.runtime.store import reset_store
+
+            reset_checkpointer()
+            reset_store()
+
    @classmethod
    def _apply_database_defaults(cls, config_data: dict[str, Any]) -> None:
        """Apply config.yaml defaults for persistence when the section is absent."""
@@ -250,8 +323,133 @@ class AppConfig(BaseModel):
        """
        return next((group for group in self.tool_groups if group.name == name), None)

-    # AppConfig is a pure value object: construct with ``from_file()``, pass around.
-    # Composition roots that hold the resolved instance:
-    #   - Gateway:   ``app.state.config`` via ``Depends(get_config)``
-    #   - Client:    ``DeerFlowClient._app_config``
-    #   - Agent run: ``Runtime[DeerFlowContext].context.app_config``
+
+# Compatibility singleton layer for code paths that have not yet been
+# migrated to explicit ``AppConfig`` threading. New composition roots should
+# prefer constructing ``AppConfig`` once and passing it down directly.
+_app_config: AppConfig | None = None
+_app_config_path: Path | None = None
+_app_config_mtime: float | None = None
+_app_config_is_custom = False
+_current_app_config: ContextVar[AppConfig | None] = ContextVar("deerflow_current_app_config", default=None)
+_current_app_config_stack: ContextVar[tuple[AppConfig | None, ...]] = ContextVar("deerflow_current_app_config_stack", default=())
+
+
+def _get_config_mtime(config_path: Path) -> float | None:
+    """Get the modification time of a config file if it exists."""
+    try:
+        return config_path.stat().st_mtime
+    except OSError:
+        return None
+
+
+def _load_and_cache_app_config(config_path: str | None = None) -> AppConfig:
+    """Load config from disk and refresh cache metadata."""
+    global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
+
+    resolved_path = AppConfig.resolve_config_path(config_path)
+    _app_config = AppConfig.from_file(str(resolved_path))
+    _app_config_path = resolved_path
+    _app_config_mtime = _get_config_mtime(resolved_path)
+    _app_config_is_custom = False
+    return _app_config
+
+
+def get_app_config() -> AppConfig:
+    """Get the DeerFlow config instance.
+
+    Returns a cached singleton instance and automatically reloads it when the
+    underlying config file path or modification time changes. Use
+    `reload_app_config()` to force a reload, or `reset_app_config()` to clear
+    the cache.
+    """
+    global _app_config, _app_config_path, _app_config_mtime
+
+    runtime_override = _current_app_config.get()
+    if runtime_override is not None:
+        return runtime_override
+
+    if _app_config is not None and _app_config_is_custom:
+        return _app_config
+
+    resolved_path = AppConfig.resolve_config_path()
+    current_mtime = _get_config_mtime(resolved_path)
+
+    should_reload = _app_config is None or _app_config_path != resolved_path or _app_config_mtime != current_mtime
+    if should_reload:
+        if _app_config_path == resolved_path and _app_config_mtime is not None and current_mtime is not None and _app_config_mtime != current_mtime:
+            logger.info(
+                "Config file has been modified (mtime: %s -> %s), reloading AppConfig",
+                _app_config_mtime,
+                current_mtime,
+            )
+        _load_and_cache_app_config(str(resolved_path))
+    return _app_config
+
+
+def reload_app_config(config_path: str | None = None) -> AppConfig:
+    """Reload the config from file and update the cached instance.
+
+    This is useful when the config file has been modified and you want
+    to pick up the changes without restarting the application.
+
+    Args:
+        config_path: Optional path to config file. If not provided,
+                     uses the default resolution strategy.
+
+    Returns:
+        The newly loaded AppConfig instance.
+    """
+    return _load_and_cache_app_config(config_path)
+
+
+def reset_app_config() -> None:
+    """Reset the cached config instance.
+
+    This clears the singleton cache, causing the next call to
+    `get_app_config()` to reload from file. Useful for testing
+    or when switching between different configurations.
+    """
+    global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
+    _app_config = None
+    _app_config_path = None
+    _app_config_mtime = None
+    _app_config_is_custom = False
+
+
+def set_app_config(config: AppConfig) -> None:
+    """Set a custom config instance.
+
+    This allows injecting a custom or mock config for testing purposes.
+
+    Args:
+        config: The AppConfig instance to use.
+    """
+    global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
+    _app_config = config
+    _app_config_path = None
+    _app_config_mtime = None
+    _app_config_is_custom = True
+
+
+def peek_current_app_config() -> AppConfig | None:
+    """Return the runtime-scoped AppConfig override, if one is active."""
+    return _current_app_config.get()
+
+
+def push_current_app_config(config: AppConfig) -> None:
+    """Push a runtime-scoped AppConfig override for the current execution context."""
+    stack = _current_app_config_stack.get()
+    _current_app_config_stack.set(stack + (_current_app_config.get(),))
+    _current_app_config.set(config)
+
+
+def pop_current_app_config() -> None:
+    """Pop the latest runtime-scoped AppConfig override for the current execution context."""
+    stack = _current_app_config_stack.get()
+    if not stack:
+        _current_app_config.set(None)
+        return
+    previous = stack[-1]
+    _current_app_config_stack.set(stack[:-1])
+    _current_app_config.set(previous)
@@ -2,7 +2,7 @@

 from typing import Literal

-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, Field

 CheckpointerType = Literal["memory", "sqlite", "postgres"]

@@ -10,18 +10,41 @@ CheckpointerType = Literal["memory", "sqlite", "postgres"]
 class CheckpointerConfig(BaseModel):
    """Configuration for LangGraph state persistence checkpointer."""

-    model_config = ConfigDict(frozen=True)
-
    type: CheckpointerType = Field(
        description="Checkpointer backend type. "
        "'memory' is in-process only (lost on restart). "
        "'sqlite' persists to a local file (requires langgraph-checkpoint-sqlite). "
-        "'postgres' persists to PostgreSQL (requires langgraph-checkpoint-postgres)."
+        "'postgres' persists to PostgreSQL (install with deerflow-harness[postgres])."
    )
    connection_string: str | None = Field(
        default=None,
        description="Connection string for sqlite (file path) or postgres (DSN). "
-        "Required for sqlite and postgres types. "
+        "Optional for sqlite and defaults to 'store.db' when omitted. "
+        "Required for postgres. "
        "For sqlite, use a file path like '.deer-flow/checkpoints.db' or ':memory:' for in-memory. "
        "For postgres, use a DSN like 'postgresql://user:pass@localhost:5432/db'.",
    )
+
+
+# Global configuration instance — None means no checkpointer is configured.
+_checkpointer_config: CheckpointerConfig | None = None
+
+
+def get_checkpointer_config() -> CheckpointerConfig | None:
+    """Get the current checkpointer configuration, or None if not configured."""
+    return _checkpointer_config
+
+
+def set_checkpointer_config(config: CheckpointerConfig | None) -> None:
+    """Set the checkpointer configuration."""
+    global _checkpointer_config
+    _checkpointer_config = config
+
+
+def load_checkpointer_config_from_dict(config_dict: dict | None) -> None:
+    """Load checkpointer configuration from a dictionary."""
+    global _checkpointer_config
+    if config_dict is None:
+        _checkpointer_config = None
+        return
+    _checkpointer_config = CheckpointerConfig(**config_dict)
@@ -34,11 +34,10 @@ from __future__ import annotations
 import os
 from typing import Literal

-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, Field


 class DatabaseConfig(BaseModel):
-    model_config = ConfigDict(frozen=True)
    backend: Literal["memory", "sqlite", "postgres"] = Field(
        default="memory",
        description=("Storage backend for both checkpointer and application data. 'memory' for development (no persistence across restarts), 'sqlite' for single-node deployment, 'postgres' for production multi-node deployment."),
@@ -1,55 +0,0 @@
-"""Per-invocation context for DeerFlow agent execution.
-
-Injected via LangGraph Runtime. Middleware and tools access this
-via Runtime[DeerFlowContext] parameters, through resolve_context().
-"""
-
-from __future__ import annotations
-
-import logging
-from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any
-
-if TYPE_CHECKING:
-    from deerflow.config.app_config import AppConfig
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass(frozen=True)
-class DeerFlowContext:
-    """Typed, immutable, per-invocation context injected via LangGraph Runtime.
-
-    Fields are all known at run start and never change during execution.
-    Mutable runtime state (e.g. sandbox_id) flows through ThreadState, not here.
-    """
-
-    app_config: AppConfig
-    thread_id: str
-    agent_name: str | None = None
-
-
-def resolve_context(runtime: Any) -> DeerFlowContext:
-    """Return the typed DeerFlowContext that the runtime carries.
-
-    Gateway mode (``DeerFlowClient``, ``run_agent``) always attaches a typed
-    ``DeerFlowContext`` via ``agent.astream(context=...)``; the LangGraph
-    Server path uses ``langgraph.json`` registration where the top-level
-    ``make_lead_agent`` loads ``AppConfig`` from disk itself, so we still
-    arrive here with a typed context.
-
-    Only the dict/None shapes that legacy tests used to exercise would fall
-    through this function; we now reject them loudly instead of papering
-    over the missing context with an ambient ``AppConfig`` lookup.
-    """
-    ctx = getattr(runtime, "context", None)
-    if isinstance(ctx, DeerFlowContext):
-        return ctx
-
-    raise RuntimeError(
-        "resolve_context: runtime.context is not a DeerFlowContext "
-        "(got type %s). Every entry point must attach one at invoke time — "
-        "Gateway/Client via agent.astream(context=DeerFlowContext(...)), "
-        "LangGraph Server via the make_lead_agent boundary that loads "
-        "AppConfig.from_file()." % type(ctx).__name__
-    )
@@ -7,12 +7,12 @@ from typing import Any, Literal

 from pydantic import BaseModel, ConfigDict, Field

+from deerflow.config.runtime_paths import existing_project_file
+

 class McpOAuthConfig(BaseModel):
    """OAuth configuration for an MCP server (HTTP/SSE transports)."""

-    model_config = ConfigDict(extra="allow", frozen=True)
-
    enabled: bool = Field(default=True, description="Whether OAuth token injection is enabled")
    token_url: str = Field(description="OAuth token endpoint URL")
    grant_type: Literal["client_credentials", "refresh_token"] = Field(
@@ -30,13 +30,12 @@ class McpOAuthConfig(BaseModel):
    default_token_type: str = Field(default="Bearer", description="Default token type when missing in token response")
    refresh_skew_seconds: int = Field(default=60, description="Refresh token this many seconds before expiry")
    extra_token_params: dict[str, str] = Field(default_factory=dict, description="Additional form params sent to token endpoint")
+    model_config = ConfigDict(extra="allow")


 class McpServerConfig(BaseModel):
    """Configuration for a single MCP server."""

-    model_config = ConfigDict(extra="allow", frozen=True)
-
    enabled: bool = Field(default=True, description="Whether this MCP server is enabled")
    type: str = Field(default="stdio", description="Transport type: 'stdio', 'sse', or 'http'")
    command: str | None = Field(default=None, description="Command to execute to start the MCP server (for stdio type)")
@@ -46,13 +45,12 @@ class McpServerConfig(BaseModel):
    headers: dict[str, str] = Field(default_factory=dict, description="HTTP headers to send (for sse or http type)")
    oauth: McpOAuthConfig | None = Field(default=None, description="OAuth configuration (for sse or http type)")
    description: str = Field(default="", description="Human-readable description of what this MCP server provides")
+    model_config = ConfigDict(extra="allow")


 class SkillStateConfig(BaseModel):
    """Configuration for a single skill's state."""

-    model_config = ConfigDict(frozen=True)
-
    enabled: bool = Field(default=True, description="Whether this skill is enabled")


@@ -68,7 +66,7 @@ class ExtensionsConfig(BaseModel):
        default_factory=dict,
        description="Map of skill name to state configuration",
    )
-    model_config = ConfigDict(extra="allow", frozen=True, populate_by_name=True)
+    model_config = ConfigDict(extra="allow", populate_by_name=True)

    @classmethod
    def resolve_config_path(cls, config_path: str | None = None) -> Path | None:
@@ -77,8 +75,8 @@ class ExtensionsConfig(BaseModel):
        Priority:
        1. If provided `config_path` argument, use it.
        2. If provided `DEER_FLOW_EXTENSIONS_CONFIG_PATH` environment variable, use it.
-        3. Otherwise, check for `extensions_config.json` in the current directory, then in the parent directory.
-        4. For backward compatibility, also check for `mcp_config.json` if `extensions_config.json` is not found.
+        3. Otherwise, search the caller project root for `extensions_config.json`, then `mcp_config.json`.
+        4. For backward compatibility, also search legacy backend/repository-root defaults.
        5. If not found, return None (extensions are optional).

        Args:
@@ -87,8 +85,9 @@ class ExtensionsConfig(BaseModel):
        Resolution order:
            1. If provided `config_path` argument, use it.
            2. If provided `DEER_FLOW_EXTENSIONS_CONFIG_PATH` environment variable, use it.
-            3. Otherwise, search backend/repository-root defaults for
+            3. Otherwise, search the caller project root for
               `extensions_config.json`, then legacy `mcp_config.json`.
+            4. Finally, search backend/repository-root defaults for monorepo compatibility.

        Returns:
            Path to the extensions config file if found, otherwise None.
@@ -104,6 +103,10 @@ class ExtensionsConfig(BaseModel):
                raise FileNotFoundError(f"Extensions config file specified by environment variable `DEER_FLOW_EXTENSIONS_CONFIG_PATH` not found at {path}")
            return path
        else:
+            project_config = existing_project_file(("extensions_config.json", "mcp_config.json"))
+            if project_config is not None:
+                return project_config
+
            backend_dir = Path(__file__).resolve().parents[4]
            repo_root = backend_dir.parent
            for path in (
@@ -199,3 +202,62 @@ class ExtensionsConfig(BaseModel):
            # Default to enable for public & custom skill
            return skill_category in ("public", "custom")
        return skill_config.enabled
+
+
+_extensions_config: ExtensionsConfig | None = None
+
+
+def get_extensions_config() -> ExtensionsConfig:
+    """Get the extensions config instance.
+
+    Returns a cached singleton instance. Use `reload_extensions_config()` to reload
+    from file, or `reset_extensions_config()` to clear the cache.
+
+    Returns:
+        The cached ExtensionsConfig instance.
+    """
+    global _extensions_config
+    if _extensions_config is None:
+        _extensions_config = ExtensionsConfig.from_file()
+    return _extensions_config
+
+
+def reload_extensions_config(config_path: str | None = None) -> ExtensionsConfig:
+    """Reload the extensions config from file and update the cached instance.
+
+    This is useful when the config file has been modified and you want
+    to pick up the changes without restarting the application.
+
+    Args:
+        config_path: Optional path to extensions config file. If not provided,
+                     uses the default resolution strategy.
+
+    Returns:
+        The newly loaded ExtensionsConfig instance.
+    """
+    global _extensions_config
+    _extensions_config = ExtensionsConfig.from_file(config_path)
+    return _extensions_config
+
+
+def reset_extensions_config() -> None:
+    """Reset the cached extensions config instance.
+
+    This clears the singleton cache, causing the next call to
+    `get_extensions_config()` to reload from file. Useful for testing
+    or when switching between different configurations.
+    """
+    global _extensions_config
+    _extensions_config = None
+
+
+def set_extensions_config(config: ExtensionsConfig) -> None:
+    """Set a custom extensions config instance.
+
+    This allows injecting a custom or mock config for testing purposes.
+
+    Args:
+        config: The ExtensionsConfig instance to use.
+    """
+    global _extensions_config
+    _extensions_config = config
@@ -1,13 +1,11 @@
 """Configuration for pre-tool-call authorization."""

-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, Field


 class GuardrailProviderConfig(BaseModel):
    """Configuration for a guardrail provider."""

-    model_config = ConfigDict(frozen=True)
-
    use: str = Field(description="Class path (e.g. 'deerflow.guardrails.builtin:AllowlistProvider')")
    config: dict = Field(default_factory=dict, description="Provider-specific settings passed as kwargs")

@@ -20,9 +18,31 @@ class GuardrailsConfig(BaseModel):
    agent's passport reference, and returns an allow/deny decision.
    """

-    model_config = ConfigDict(frozen=True)
-
    enabled: bool = Field(default=False, description="Enable guardrail middleware")
    fail_closed: bool = Field(default=True, description="Block tool calls if provider errors")
    passport: str | None = Field(default=None, description="OAP passport path or hosted agent ID")
    provider: GuardrailProviderConfig | None = Field(default=None, description="Guardrail provider configuration")
+
+
+_guardrails_config: GuardrailsConfig | None = None
+
+
+def get_guardrails_config() -> GuardrailsConfig:
+    """Get the guardrails config, returning defaults if not loaded."""
+    global _guardrails_config
+    if _guardrails_config is None:
+        _guardrails_config = GuardrailsConfig()
+    return _guardrails_config
+
+
+def load_guardrails_config_from_dict(data: dict) -> GuardrailsConfig:
+    """Load guardrails config from a dict (called during AppConfig loading)."""
+    global _guardrails_config
+    _guardrails_config = GuardrailsConfig.model_validate(data)
+    return _guardrails_config
+
+
+def reset_guardrails_config() -> None:
+    """Reset the cached config instance. Used in tests to prevent singleton leaks."""
+    global _guardrails_config
+    _guardrails_config = None
@@ -0,0 +1,73 @@
+"""Configuration for loop detection middleware."""
+
+from pydantic import BaseModel, Field, model_validator
+
+
+class ToolFreqOverride(BaseModel):
+    """Per-tool frequency threshold override.
+
+    Can be higher or lower than the global defaults. Commonly used to raise
+    thresholds for high-frequency tools like bash in batch workflows (e.g.
+    RNA-seq pipelines) without weakening protection on every other tool.
+    """
+
+    warn: int = Field(ge=1)
+    hard_limit: int = Field(ge=1)
+
+    @model_validator(mode="after")
+    def _validate(self) -> "ToolFreqOverride":
+        if self.hard_limit < self.warn:
+            raise ValueError("hard_limit must be >= warn")
+        return self
+
+
+class LoopDetectionConfig(BaseModel):
+    """Configuration for repetitive tool-call loop detection."""
+
+    enabled: bool = Field(
+        default=True,
+        description="Whether to enable repetitive tool-call loop detection",
+    )
+    warn_threshold: int = Field(
+        default=3,
+        ge=1,
+        description="Number of identical tool-call sets before injecting a warning",
+    )
+    hard_limit: int = Field(
+        default=5,
+        ge=1,
+        description="Number of identical tool-call sets before forcing a stop",
+    )
+    window_size: int = Field(
+        default=20,
+        ge=1,
+        description="Number of recent tool-call sets to track per thread",
+    )
+    max_tracked_threads: int = Field(
+        default=100,
+        ge=1,
+        description="Maximum number of thread histories to keep in memory",
+    )
+    tool_freq_warn: int = Field(
+        default=30,
+        ge=1,
+        description="Number of calls to the same tool type before injecting a frequency warning",
+    )
+    tool_freq_hard_limit: int = Field(
+        default=50,
+        ge=1,
+        description="Number of calls to the same tool type before forcing a stop",
+    )
+    tool_freq_overrides: dict[str, ToolFreqOverride] = Field(
+        default_factory=dict,
+        description=("Per-tool overrides for tool_freq_warn / tool_freq_hard_limit, keyed by tool name. Values can be higher or lower than the global defaults. Commonly used to raise thresholds for high-frequency tools like bash."),
+    )
+
+    @model_validator(mode="after")
+    def validate_thresholds(self) -> "LoopDetectionConfig":
+        """Ensure hard stop cannot happen before the warning threshold."""
+        if self.hard_limit < self.warn_threshold:
+            raise ValueError("hard_limit must be greater than or equal to warn_threshold")
+        if self.tool_freq_hard_limit < self.tool_freq_warn:
+            raise ValueError("tool_freq_hard_limit must be greater than or equal to tool_freq_warn")
+        return self
@@ -1,13 +1,11 @@
 """Configuration for memory mechanism."""

-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, Field


 class MemoryConfig(BaseModel):
    """Configuration for global memory mechanism."""

-    model_config = ConfigDict(frozen=True)
-
    enabled: bool = Field(
        default=True,
        description="Whether to enable memory mechanism",
@@ -62,3 +60,24 @@ class MemoryConfig(BaseModel):
        le=8000,
        description="Maximum tokens to use for memory injection",
    )
+
+
+# Global configuration instance
+_memory_config: MemoryConfig = MemoryConfig()
+
+
+def get_memory_config() -> MemoryConfig:
+    """Get the current memory configuration."""
+    return _memory_config
+
+
+def set_memory_config(config: MemoryConfig) -> None:
+    """Set the memory configuration."""
+    global _memory_config
+    _memory_config = config
+
+
+def load_memory_config_from_dict(config_dict: dict) -> None:
+    """Load memory configuration from a dictionary."""
+    global _memory_config
+    _memory_config = MemoryConfig(**config_dict)
@@ -12,7 +12,7 @@ class ModelConfig(BaseModel):
        description="Class path of the model provider(e.g. langchain_openai.ChatOpenAI)",
    )
    model: str = Field(..., description="Model name")
-    model_config = ConfigDict(extra="allow", frozen=True)
+    model_config = ConfigDict(extra="allow")
    use_responses_api: bool | None = Field(
        default=None,
        description="Whether to route OpenAI ChatOpenAI calls through the /v1/responses API",
@@ -3,6 +3,8 @@ import re
 import shutil
 from pathlib import Path, PureWindowsPath

+from deerflow.config.runtime_paths import runtime_home
+
 # Virtual path prefix seen by agents inside the sandbox
 VIRTUAL_PATH_PREFIX = "/mnt/user-data"

@@ -11,9 +13,8 @@ _SAFE_USER_ID_RE = re.compile(r"^[A-Za-z0-9_\-]+$")


 def _default_local_base_dir() -> Path:
-    """Return the repo-local DeerFlow state directory without relying on cwd."""
-    backend_dir = Path(__file__).resolve().parents[4]
-    return backend_dir / ".deer-flow"
+    """Return the caller project's writable DeerFlow state directory."""
+    return runtime_home()


 def _validate_thread_id(thread_id: str) -> str:
@@ -81,7 +82,7 @@ class Paths:
    BaseDir resolution (in priority order):
        1. Constructor argument `base_dir`
        2. DEER_FLOW_HOME environment variable
-        3. Repo-local fallback derived from this module path: `{backend_dir}/.deer-flow`
+        3. Caller project fallback: `{project_root}/.deer-flow`
    """

    def __init__(self, base_dir: str | Path | None = None) -> None:
@@ -131,15 +132,20 @@ class Paths:

    @property
    def agents_dir(self) -> Path:
-        """Root directory for all custom agents: `{base_dir}/agents/`."""
+        """Legacy root for shared (pre user-isolation) custom agents: `{base_dir}/agents/`.
+
+        New code should use :meth:`user_agents_dir` instead. This property remains
+        only as a read-side fallback for installations that have not yet run the
+        ``migrate_user_isolation.py`` script.
+        """
        return self.base_dir / "agents"

    def agent_dir(self, name: str) -> Path:
-        """Directory for a specific agent: `{base_dir}/agents/{name}/`."""
+        """Legacy per-agent directory (no user isolation): `{base_dir}/agents/{name}/`."""
        return self.agents_dir / name.lower()

    def agent_memory_file(self, name: str) -> Path:
-        """Per-agent memory file: `{base_dir}/agents/{name}/memory.json`."""
+        """Legacy per-agent memory file: `{base_dir}/agents/{name}/memory.json`."""
        return self.agent_dir(name) / "memory.json"

    def user_dir(self, user_id: str) -> Path:
@@ -150,9 +156,17 @@ class Paths:
        """Per-user memory file: `{base_dir}/users/{user_id}/memory.json`."""
        return self.user_dir(user_id) / "memory.json"

+    def user_agents_dir(self, user_id: str) -> Path:
+        """Per-user root for that user's custom agents: `{base_dir}/users/{user_id}/agents/`."""
+        return self.user_dir(user_id) / "agents"
+
+    def user_agent_dir(self, user_id: str, agent_name: str) -> Path:
+        """Per-user per-agent directory: `{base_dir}/users/{user_id}/agents/{name}/`."""
+        return self.user_agents_dir(user_id) / agent_name.lower()
+
    def user_agent_memory_file(self, user_id: str, agent_name: str) -> Path:
        """Per-user per-agent memory: `{base_dir}/users/{user_id}/agents/{name}/memory.json`."""
-        return self.user_dir(user_id) / "agents" / agent_name.lower() / "memory.json"
+        return self.user_agent_dir(user_id, agent_name) / "memory.json"

    def thread_dir(self, thread_id: str, *, user_id: str | None = None) -> Path:
        """
@@ -15,11 +15,10 @@ from __future__ import annotations

 from typing import Literal

-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, Field


 class RunEventsConfig(BaseModel):
-    model_config = ConfigDict(frozen=True)
    backend: Literal["memory", "db", "jsonl"] = Field(
        default="memory",
        description="Storage backend for run events. 'memory' for development (no persistence), 'db' for production (SQL queries), 'jsonl' for lightweight single-node persistence.",
@@ -0,0 +1,41 @@
+"""Runtime path resolution for standalone harness usage."""
+
+import os
+from pathlib import Path
+
+
+def project_root() -> Path:
+    """Return the caller project root for runtime-owned files."""
+    if env_root := os.getenv("DEER_FLOW_PROJECT_ROOT"):
+        root = Path(env_root).resolve()
+        if not root.exists():
+            raise ValueError(f"DEER_FLOW_PROJECT_ROOT is set to '{env_root}', but the resolved path '{root}' does not exist.")
+        if not root.is_dir():
+            raise ValueError(f"DEER_FLOW_PROJECT_ROOT is set to '{env_root}', but the resolved path '{root}' is not a directory.")
+        return root
+    return Path.cwd().resolve()
+
+
+def runtime_home() -> Path:
+    """Return the writable DeerFlow state directory."""
+    if env_home := os.getenv("DEER_FLOW_HOME"):
+        return Path(env_home).resolve()
+    return project_root() / ".deer-flow"
+
+
+def resolve_path(value: str | os.PathLike[str], *, base: Path | None = None) -> Path:
+    """Resolve absolute paths as-is and relative paths against the project root."""
+    path = Path(value)
+    if not path.is_absolute():
+        path = (base or project_root()) / path
+    return path.resolve()
+
+
+def existing_project_file(names: tuple[str, ...]) -> Path | None:
+    """Return the first existing named file under the project root."""
+    root = project_root()
+    for name in names:
+        candidate = root / name
+        if candidate.is_file():
+            return candidate
+    return None
@@ -4,8 +4,6 @@ from pydantic import BaseModel, ConfigDict, Field
 class VolumeMountConfig(BaseModel):
    """Configuration for a volume mount."""

-    model_config = ConfigDict(frozen=True)
-
    host_path: str = Field(..., description="Path on the host machine")
    container_path: str = Field(..., description="Path inside the container")
    read_only: bool = Field(default=False, description="Whether the mount is read-only")
@@ -25,6 +23,9 @@ class SandboxConfig(BaseModel):
        replicas: Maximum number of concurrent sandbox containers (default: 3). When the limit is reached the least-recently-used sandbox is evicted to make room.
        container_prefix: Prefix for container names (default: deer-flow-sandbox)
        idle_timeout: Idle timeout in seconds before sandbox is released (default: 600 = 10 minutes). Set to 0 to disable.
+        auto_restart: Automatically restart sandbox containers that have crashed (default: true). When a tool call
+            detects the container is no longer alive, the sandbox is evicted from cache and transparently recreated
+            on the next acquire. Set to false to disable.
        mounts: List of volume mounts to share directories with the container
        environment: Environment variables to inject into the container (values starting with $ are resolved from host env)
    """
@@ -57,6 +58,10 @@ class SandboxConfig(BaseModel):
        default=None,
        description="Idle timeout in seconds before sandbox is released (default: 600 = 10 minutes). Set to 0 to disable.",
    )
+    auto_restart: bool = Field(
+        default=True,
+        description="Automatically restart sandbox containers that have crashed. When a tool call detects the container is no longer alive, the sandbox is evicted from cache and transparently recreated on the next acquire.",
+    )
    mounts: list[VolumeMountConfig] = Field(
        default_factory=list,
        description="List of volume mounts to share directories between host and container",
@@ -82,4 +87,4 @@ class SandboxConfig(BaseModel):
        description="Maximum characters to keep from ls tool output. Output exceeding this limit is head-truncated. Set to 0 to disable truncation.",
    )

-    model_config = ConfigDict(extra="allow", frozen=True)
+    model_config = ConfigDict(extra="allow")
@@ -1,11 +1,9 @@
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, Field


 class SkillEvolutionConfig(BaseModel):
    """Configuration for agent-managed skill evolution."""

-    model_config = ConfigDict(frozen=True)
-
    enabled: bool = Field(
        default=False,
        description="Whether the agent can create and modify skills under skills/custom.",
@@ -1,21 +1,28 @@
+import os
 from pathlib import Path

-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, Field
+
+from deerflow.config.runtime_paths import project_root, resolve_path


-def _default_repo_root() -> Path:
-    """Resolve the repo root without relying on the current working directory."""
-    return Path(__file__).resolve().parents[5]
+def _legacy_skills_candidates() -> tuple[Path, ...]:
+    """Return source-tree skills locations for monorepo compatibility."""
+    backend_dir = Path(__file__).resolve().parents[4]
+    repo_root = backend_dir.parent
+    return (repo_root / "skills",)


 class SkillsConfig(BaseModel):
    """Configuration for skills system"""

-    model_config = ConfigDict(frozen=True)
-
+    use: str = Field(
+        default="deerflow.skills.storage.local_skill_storage:LocalSkillStorage",
+        description="Class path of the SkillStorage implementation.",
+    )
    path: str | None = Field(
        default=None,
-        description="Path to skills directory. If not specified, defaults to ../skills relative to backend directory",
+        description=("Path to skills directory. If not specified, defaults to `skills` under the caller project root, falling back to the legacy repo-root location for monorepo compatibility."),
    )
    container_path: str = Field(
        default="/mnt/skills",
@@ -26,21 +33,30 @@ class SkillsConfig(BaseModel):
        """
        Get the resolved skills directory path.

-        Returns:
-            Path to the skills directory
+        Resolution order:
+            1. Explicit ``path`` field
+            2. ``DEER_FLOW_SKILLS_PATH`` environment variable
+            3. ``skills`` under the caller project root (``project_root()``)
+            4. Legacy repo-root candidates for monorepo compatibility (``_legacy_skills_candidates``)
+
+        When none of (3) or (4) exist on disk, the project-root default is returned so callers
+        can still surface a stable "no skills" location without raising.
        """
        if self.path:
-            # Use configured path (can be absolute or relative)
-            path = Path(self.path)
-            if not path.is_absolute():
-                # If relative, resolve from the repo root for deterministic behavior.
-                path = _default_repo_root() / path
-            return path.resolve()
-        else:
-            # Default: ../skills relative to backend directory
-            from deerflow.skills.loader import get_skills_root_path
+            # Use configured path (can be absolute or relative to project root)
+            return resolve_path(self.path)
+        if env_path := os.getenv("DEER_FLOW_SKILLS_PATH"):
+            return resolve_path(env_path)

-            return get_skills_root_path()
+        project_default = project_root() / "skills"
+        if project_default.is_dir():
+            return project_default
+
+        for candidate in _legacy_skills_candidates():
+            if candidate.is_dir():
+                return candidate
+
+        return project_default

    def get_skill_container_path(self, skill_name: str, category: str = "public") -> str:
        """
@@ -2,7 +2,7 @@

 from typing import Literal

-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, Field

 StreamBridgeType = Literal["memory", "redis"]

@@ -10,8 +10,6 @@ StreamBridgeType = Literal["memory", "redis"]
 class StreamBridgeConfig(BaseModel):
    """Configuration for the stream bridge that connects agent workers to SSE endpoints."""

-    model_config = ConfigDict(frozen=True)
-
    type: StreamBridgeType = Field(
        default="memory",
        description="Stream bridge backend type. 'memory' uses in-process asyncio.Queue (single-process only). 'redis' uses Redis Streams (planned for Phase 2, not yet implemented).",
@@ -24,3 +22,28 @@ class StreamBridgeConfig(BaseModel):
        default=256,
        description="Maximum number of events buffered per run in the memory bridge.",
    )
+
+
+# Global configuration instance — None means no stream bridge is configured
+# (falls back to memory with defaults).
+_stream_bridge_config: StreamBridgeConfig | None = None
+
+
+def get_stream_bridge_config() -> StreamBridgeConfig | None:
+    """Get the current stream bridge configuration, or None if not configured."""
+    return _stream_bridge_config
+
+
+def set_stream_bridge_config(config: StreamBridgeConfig | None) -> None:
+    """Set the stream bridge configuration."""
+    global _stream_bridge_config
+    _stream_bridge_config = config
+
+
+def load_stream_bridge_config_from_dict(config_dict: dict | None) -> None:
+    """Load stream bridge configuration from a dictionary."""
+    global _stream_bridge_config
+    if config_dict is None:
+        _stream_bridge_config = None
+        return
+    _stream_bridge_config = StreamBridgeConfig(**config_dict)
@@ -1,13 +1,15 @@
 """Configuration for the subagent system loaded from config.yaml."""

-from pydantic import BaseModel, ConfigDict, Field
+import logging
+
+from pydantic import BaseModel, Field
+
+logger = logging.getLogger(__name__)


 class SubagentOverrideConfig(BaseModel):
    """Per-agent configuration overrides."""

-    model_config = ConfigDict(frozen=True)
-
    timeout_seconds: int | None = Field(
        default=None,
        ge=1,
@@ -69,8 +71,6 @@ class CustomSubagentConfig(BaseModel):
 class SubagentsAppConfig(BaseModel):
    """Configuration for the subagent system."""

-    model_config = ConfigDict(frozen=True)
-
    timeout_seconds: int = Field(
        default=900,
        ge=1,
@@ -140,3 +140,42 @@ class SubagentsAppConfig(BaseModel):
        if override is not None and override.skills is not None:
            return override.skills
        return None
+
+
+_subagents_config: SubagentsAppConfig = SubagentsAppConfig()
+
+
+def get_subagents_app_config() -> SubagentsAppConfig:
+    """Get the current subagents configuration."""
+    return _subagents_config
+
+
+def load_subagents_config_from_dict(config_dict: dict) -> None:
+    """Load subagents configuration from a dictionary."""
+    global _subagents_config
+    _subagents_config = SubagentsAppConfig(**config_dict)
+
+    overrides_summary = {}
+    for name, override in _subagents_config.agents.items():
+        parts = []
+        if override.timeout_seconds is not None:
+            parts.append(f"timeout={override.timeout_seconds}s")
+        if override.max_turns is not None:
+            parts.append(f"max_turns={override.max_turns}")
+        if override.model is not None:
+            parts.append(f"model={override.model}")
+        if override.skills is not None:
+            parts.append(f"skills={override.skills}")
+        if parts:
+            overrides_summary[name] = ", ".join(parts)
+
+    custom_agents_names = list(_subagents_config.custom_agents.keys())
+
+    if overrides_summary or custom_agents_names:
+        logger.info(
+            "Subagents config loaded: default timeout=%ss, default max_turns=%s, per-agent overrides=%s, custom_agents=%s",
+            _subagents_config.timeout_seconds,
+            _subagents_config.max_turns,
+            overrides_summary or "none",
+            custom_agents_names or "none",
+        )
--- a/Show More
+++ b/Show More