feat(isolation): wire user_id through all Paths and memory callsites

Pass user_id=get_effective_user_id() at every callsite that invokes
Paths methods or memory functions, enabling per-user filesystem isolation
throughout the harness and app layers.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
rayhpeng
2026-04-12 15:16:23 +08:00
parent 9af2f3e73c
commit 7ce9333200
24 changed files with 137 additions and 70 deletions
+4 -2
View File
@@ -13,6 +13,7 @@ from app.channels.base import Channel
from app.channels.commands import KNOWN_CHANNEL_COMMANDS
from app.channels.message_bus import InboundMessage, InboundMessageType, MessageBus, OutboundMessage, ResolvedAttachment
from deerflow.config.paths import VIRTUAL_PATH_PREFIX, get_paths
from deerflow.runtime.user_context import get_effective_user_id
from deerflow.sandbox.sandbox_provider import get_sandbox_provider
logger = logging.getLogger(__name__)
@@ -344,8 +345,9 @@ class FeishuChannel(Channel):
return f"Failed to obtain the [{type}]"
paths = get_paths()
paths.ensure_thread_dirs(thread_id)
uploads_dir = paths.sandbox_uploads_dir(thread_id).resolve()
user_id = get_effective_user_id()
paths.ensure_thread_dirs(thread_id, user_id=user_id)
uploads_dir = paths.sandbox_uploads_dir(thread_id, user_id=user_id).resolve()
ext = "png" if type == "image" else "bin"
raw_filename = getattr(response, "file_name", "") or f"feishu_{file_key[-12:]}.{ext}"
+4 -2
View File
@@ -17,6 +17,7 @@ from langgraph_sdk.errors import ConflictError
from app.channels.commands import KNOWN_CHANNEL_COMMANDS
from app.channels.message_bus import InboundMessage, InboundMessageType, MessageBus, OutboundMessage, ResolvedAttachment
from app.channels.store import ChannelStore
from deerflow.runtime.user_context import get_effective_user_id
logger = logging.getLogger(__name__)
@@ -341,14 +342,15 @@ def _resolve_attachments(thread_id: str, artifacts: list[str]) -> list[ResolvedA
attachments: list[ResolvedAttachment] = []
paths = get_paths()
outputs_dir = paths.sandbox_outputs_dir(thread_id).resolve()
user_id = get_effective_user_id()
outputs_dir = paths.sandbox_outputs_dir(thread_id, user_id=user_id).resolve()
for virtual_path in artifacts:
# Security: only allow files from the agent outputs directory
if not virtual_path.startswith(_OUTPUTS_VIRTUAL_PREFIX):
logger.warning("[Manager] rejected non-outputs artifact path: %s", virtual_path)
continue
try:
actual = paths.resolve_virtual_path(thread_id, virtual_path)
actual = paths.resolve_virtual_path(thread_id, virtual_path, user_id=user_id)
# Verify the resolved path is actually under the outputs directory
# (guards against path-traversal even after prefix check)
try:
+2 -1
View File
@@ -5,6 +5,7 @@ from pathlib import Path
from fastapi import HTTPException
from deerflow.config.paths import get_paths
from deerflow.runtime.user_context import get_effective_user_id
def resolve_thread_virtual_path(thread_id: str, virtual_path: str) -> Path:
@@ -22,7 +23,7 @@ def resolve_thread_virtual_path(thread_id: str, virtual_path: str) -> Path:
HTTPException: If the path is invalid or outside allowed directories.
"""
try:
return get_paths().resolve_virtual_path(thread_id, virtual_path)
return get_paths().resolve_virtual_path(thread_id, virtual_path, user_id=get_effective_user_id())
except ValueError as e:
status = 403 if "traversal" in str(e) else 400
raise HTTPException(status_code=status, detail=str(e))
+10 -7
View File
@@ -13,6 +13,7 @@ from deerflow.agents.memory.updater import (
update_memory_fact,
)
from deerflow.config.memory_config import get_memory_config
from deerflow.runtime.user_context import get_effective_user_id
router = APIRouter(prefix="/api", tags=["memory"])
@@ -147,7 +148,7 @@ async def get_memory() -> MemoryResponse:
}
```
"""
memory_data = get_memory_data()
memory_data = get_memory_data(user_id=get_effective_user_id())
return MemoryResponse(**memory_data)
@@ -167,7 +168,7 @@ async def reload_memory() -> MemoryResponse:
Returns:
The reloaded memory data.
"""
memory_data = reload_memory_data()
memory_data = reload_memory_data(user_id=get_effective_user_id())
return MemoryResponse(**memory_data)
@@ -181,7 +182,7 @@ async def reload_memory() -> MemoryResponse:
async def clear_memory() -> MemoryResponse:
"""Clear all persisted memory data."""
try:
memory_data = clear_memory_data()
memory_data = clear_memory_data(user_id=get_effective_user_id())
except OSError as exc:
raise HTTPException(status_code=500, detail="Failed to clear memory data.") from exc
@@ -202,6 +203,7 @@ async def create_memory_fact_endpoint(request: FactCreateRequest) -> MemoryRespo
content=request.content,
category=request.category,
confidence=request.confidence,
user_id=get_effective_user_id(),
)
except ValueError as exc:
raise _map_memory_fact_value_error(exc) from exc
@@ -221,7 +223,7 @@ async def create_memory_fact_endpoint(request: FactCreateRequest) -> MemoryRespo
async def delete_memory_fact_endpoint(fact_id: str) -> MemoryResponse:
"""Delete a single fact from memory by fact id."""
try:
memory_data = delete_memory_fact(fact_id)
memory_data = delete_memory_fact(fact_id, user_id=get_effective_user_id())
except KeyError as exc:
raise HTTPException(status_code=404, detail=f"Memory fact '{fact_id}' not found.") from exc
except OSError as exc:
@@ -245,6 +247,7 @@ async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest) -
content=request.content,
category=request.category,
confidence=request.confidence,
user_id=get_effective_user_id(),
)
except ValueError as exc:
raise _map_memory_fact_value_error(exc) from exc
@@ -265,7 +268,7 @@ async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest) -
)
async def export_memory() -> MemoryResponse:
"""Export the current memory data."""
memory_data = get_memory_data()
memory_data = get_memory_data(user_id=get_effective_user_id())
return MemoryResponse(**memory_data)
@@ -279,7 +282,7 @@ async def export_memory() -> MemoryResponse:
async def import_memory(request: MemoryResponse) -> MemoryResponse:
"""Import and persist memory data."""
try:
memory_data = import_memory_data(request.model_dump())
memory_data = import_memory_data(request.model_dump(), user_id=get_effective_user_id())
except OSError as exc:
raise HTTPException(status_code=500, detail="Failed to import memory data.") from exc
@@ -337,7 +340,7 @@ async def get_memory_status() -> MemoryStatusResponse:
Combined memory configuration and current data.
"""
config = get_memory_config()
memory_data = get_memory_data()
memory_data = get_memory_data(user_id=get_effective_user_id())
return MemoryStatusResponse(
config=MemoryConfigResponse(
+4 -3
View File
@@ -26,6 +26,7 @@ from app.gateway.deps import get_checkpointer, get_current_user, get_feedback_re
from app.gateway.utils import sanitize_log_param
from deerflow.config.paths import Paths, get_paths
from deerflow.runtime import serialize_channel_values
from deerflow.runtime.user_context import get_effective_user_id
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/threads", tags=["threads"])
@@ -143,11 +144,11 @@ class ThreadHistoryRequest(BaseModel):
# ---------------------------------------------------------------------------
def _delete_thread_data(thread_id: str, paths: Paths | None = None) -> ThreadDeleteResponse:
def _delete_thread_data(thread_id: str, paths: Paths | None = None, *, user_id: str | None = None) -> ThreadDeleteResponse:
"""Delete local persisted filesystem data for a thread."""
path_manager = paths or get_paths()
try:
path_manager.delete_thread_dir(thread_id)
path_manager.delete_thread_dir(thread_id, user_id=user_id)
except ValueError as exc:
raise HTTPException(status_code=422, detail=str(exc)) from exc
except FileNotFoundError:
@@ -198,7 +199,7 @@ async def delete_thread_data(thread_id: str, request: Request) -> ThreadDeleteRe
from app.gateway.deps import get_thread_store
# Clean local filesystem
response = _delete_thread_data(thread_id)
response = _delete_thread_data(thread_id, user_id=get_effective_user_id())
# Remove checkpoints (best-effort)
checkpointer = getattr(request.app.state, "checkpointer", None)
+3 -2
View File
@@ -9,6 +9,7 @@ from pydantic import BaseModel
from app.gateway.authz import require_permission
from deerflow.config.paths import get_paths
from deerflow.runtime.user_context import get_effective_user_id
from deerflow.sandbox.sandbox_provider import get_sandbox_provider
from deerflow.uploads.manager import (
PathTraversalError,
@@ -69,7 +70,7 @@ async def upload_files(
uploads_dir = ensure_uploads_dir(thread_id)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
sandbox_uploads = get_paths().sandbox_uploads_dir(thread_id)
sandbox_uploads = get_paths().sandbox_uploads_dir(thread_id, user_id=get_effective_user_id())
uploaded_files = []
sandbox_provider = get_sandbox_provider()
@@ -147,7 +148,7 @@ async def list_uploaded_files(thread_id: str, request: Request) -> dict:
enrich_file_listing(result, thread_id)
# Gateway additionally includes the sandbox-relative path.
sandbox_uploads = get_paths().sandbox_uploads_dir(thread_id)
sandbox_uploads = get_paths().sandbox_uploads_dir(thread_id, user_id=get_effective_user_id())
for f in result["files"]:
f["path"] = str(sandbox_uploads / f["filename"])