fix(mcp): add auth interceptor with channel user_id and keep header propagation to mcp tools (#3294)

* 修复channel中的user_id传递到interceptor中的bug, mcp可通过header传递user_id到mcp工具 Co-authored-by: Cursor <cursoragent@cursor.com> * fix(channel,mcp,gateway): normalize channel user_id and add regression tests Normalize external channel user ids into filesystem-safe runtime context while preserving raw channel_user_id, and document gateway user_id propagation semantics. Add regression coverage for channel user_id context mapping, gateway user_id precedence/internal-role behavior, and MCP interceptor header forwarding via meta.headers. Co-authored-by: Cursor <cursoragent@cursor.com> * fix(auth,mcp): harden user id normalization and header handling Increase sanitized user-id digest suffix to 16 hex chars, replace internal system role magic string with a shared constant, and harden MCP header forwarding with Mapping type checks. Add regression tests for empty channel user_id handling, unsupported header types, and updated digest length behavior. Co-authored-by: Cursor <cursoragent@cursor.com> --------- Co-authored-by: zhongli <335302680@qq.com> Co-authored-by: Cursor <cursoragent@cursor.com>
2026-06-10 09:25:57 +00:00 · 2026-06-03 15:48:19 +08:00
parent 5dc2d6cbf5
commit 3ae82dc663
9 changed files with 309 additions and 4 deletions
@@ -1,3 +1,4 @@
+import hashlib
 import os
 import re
 import shutil
@@ -10,6 +11,8 @@ VIRTUAL_PATH_PREFIX = "/mnt/user-data"

 _SAFE_THREAD_ID_RE = re.compile(r"^[A-Za-z0-9_\-]+$")
 _SAFE_USER_ID_RE = re.compile(r"^[A-Za-z0-9_\-]+$")
+_UNSAFE_USER_ID_CHAR_RE = re.compile(r"[^A-Za-z0-9_\-]")
+_SAFE_USER_ID_DIGEST_HEX_LEN = 16


 def _default_local_base_dir() -> Path:
@@ -31,6 +34,23 @@ def _validate_user_id(user_id: str) -> str:
    return user_id


+def make_safe_user_id(raw: str) -> str:
+    """Normalize an external identity into the user-id charset (``[A-Za-z0-9_-]``).
+
+    IM channel ids (Feishu/Slack/Telegram) may contain characters that
+    :func:`_validate_user_id` rejects. Already-safe ids pass through unchanged;
+    lossy ones get a short digest suffix so two distinct inputs never share a
+    storage bucket.
+    """
+    if not raw:
+        raise ValueError("user_id must be a non-empty string.")
+    sanitized = _UNSAFE_USER_ID_CHAR_RE.sub("-", raw)
+    if sanitized == raw:
+        return raw
+    digest = hashlib.sha1(raw.encode("utf-8")).hexdigest()[:_SAFE_USER_ID_DIGEST_HEX_LEN]
+    return f"{sanitized}-{digest}"
+
+
 def _join_host_path(base: str, *parts: str) -> str:
    """Join host filesystem path segments while preserving native style.

@@ -3,6 +3,7 @@
 from __future__ import annotations

 import logging
+from collections.abc import Mapping
 from typing import Any

 from langchain_core.tools import BaseTool, StructuredTool
@@ -137,7 +138,15 @@ def _make_session_pool_tool(
            from langchain_mcp_adapters.interceptors import MCPToolCallRequest

            async def base_handler(request: MCPToolCallRequest) -> Any:
-                return await session.call_tool(request.name, request.args)
+                # Preserve interceptor-injected headers for stdio MCP calls by
+                # forwarding them through MCP call meta.
+                call_kwargs: dict[str, Any] = {}
+                if request.headers:
+                    if isinstance(request.headers, Mapping):
+                        call_kwargs["meta"] = {"headers": dict(request.headers)}
+                    else:
+                        logger.warning("Ignoring MCP interceptor headers with unsupported type: %s", type(request.headers).__name__)
+                return await session.call_tool(request.name, request.args, **call_kwargs)

            handler = base_handler
            for interceptor in reversed(tool_interceptors):