mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-19 06:05:58 +00:00
fix(channels): scope IM files and helper commands to owner (#3579)
* fix(channels): scope IM files and helper commands to owner * fix(memory): honor bound IM owner for /memory gateway endpoints The channel manager already attaches X-DeerFlow-Owner-User-Id for /memory and /models, but the memory router resolved user_id solely from get_effective_user_id(), which returns the synthetic internal user (DEFAULT_USER_ID) for channel workers. A bound IM /memory therefore read the default/internal memory instead of the connection owner's. Resolve the owner via _resolve_memory_user_id(request) across all /api/memory* endpoints: trusted internal callers act for the owner header, browser/API callers fall back to get_effective_user_id(). Mirrors the threads router's get_trusted_internal_owner_user_id pattern, completing acceptance criterion #3 of #3539. Add end-to-end tests asserting the resolved user_id (not just that the header is sent) and that a spoofed owner header from a browser user is ignored. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * fix(channels): align memory bucket and reuse cached storage owner Address PR #3579 review feedback: - Memory router now sanitizes the trusted owner header via make_safe_user_id before routing, matching the channel file pipeline (_safe_user_id_for_run/prepare_user_dir_for_raw_id). A bound owner id needing sanitization now resolves to the same bucket as its files/uploads instead of 500ing in _validate_user_id. - _handle_chat reuses the storage_user_id cached at the top of the method for artifact delivery instead of re-deriving _channel_storage_user_id(msg), so uploads and outputs cannot drift to different buckets if a channel rewrites the InboundMessage in receive_file. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * fix(channels): stage unbound IM files under the run's user bucket Address PR #3579 review feedback (#5): _channel_storage_user_id now mirrors _resolve_run_params' identity policy, falling back to safe(msg.user_id) instead of returning None for unbound auth-enabled channels. Previously an unbound msg ran under safe(platform_user_id) but staged uploads under get_effective_user_id() in the dispatcher task (unset contextvar -> "default"), so files landed in users/default/... while the agent read from users/{safe_platform_user_id}/.... Bound and unbound channels now write where the agent reads. Returns None only when no identity is available. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * fix(channels): reuse cached storage owner in streaming artifact delivery Address PR #3579 review feedback (#6): thread the storage_user_id resolved in _handle_chat into _handle_streaming_chat instead of re-deriving _channel_storage_user_id(msg) in the finally block. Avoids re-running _safe_user_id_for_run (and its possible filesystem touch) on the streaming-error path and guarantees artifact delivery targets the same bucket as the uploads. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * docs(channels): document owner-scoped IM file storage Address PR #3579 review feedback (#4): the IM Channels and File Upload sections still described pre-PR default-bucket behaviour. Document that receive_file, _ingest_inbound_files/ensure_uploads_dir/get_uploads_dir, and _resolve_attachments/_prepare_artifact_delivery are owner-scoped via the user_id kwarg, and that the bucket matches the memory bucket from _resolve_memory_user_id. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * refactor(channels): unify run identity and storage bucket resolution Address PR #3579 review feedback (#3): _resolve_run_params no longer duplicates the owner-resolution rule inline. After the #5 fix the inline block and _channel_storage_user_id computed the identical sanitized-with-platform-fallback value, so the run identity now calls the same helper, making it the single source of truth for run_context["user_id"] and the file/artifact storage bucket. _owner_headers stays deliberately separate: it sends the raw owner id over HTTP for the gateway to re-resolve (no sanitize, no platform fallback), documented on both helpers. test_run_identity_matches_storage_bucket pins the two together so they cannot drift again. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
"""Memory API router for retrieving and managing global memory data."""
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from app.gateway.internal_auth import get_trusted_internal_owner_user_id
|
||||
from deerflow.agents.memory.updater import (
|
||||
clear_memory_data,
|
||||
create_memory_fact,
|
||||
@@ -13,11 +14,34 @@ from deerflow.agents.memory.updater import (
|
||||
update_memory_fact,
|
||||
)
|
||||
from deerflow.config.memory_config import get_memory_config
|
||||
from deerflow.config.paths import make_safe_user_id
|
||||
from deerflow.runtime.user_context import get_effective_user_id
|
||||
|
||||
router = APIRouter(prefix="/api", tags=["memory"])
|
||||
|
||||
|
||||
def _resolve_memory_user_id(request: Request) -> str:
|
||||
"""Resolve the memory owner for this request.
|
||||
|
||||
Honors the trusted internal owner header that channel workers attach when
|
||||
acting for a connection owner, so an IM ``/memory`` command reads the bound
|
||||
owner's memory instead of the synthetic internal user. The header is only
|
||||
honored after ``AuthMiddleware`` validated the internal token (see
|
||||
``get_trusted_internal_owner_user_id``). Browser/API callers are never
|
||||
internal, so this falls back to the normal contextvar-based effective user.
|
||||
|
||||
The trusted owner header carries the *raw* owner id, so sanitize it through
|
||||
``make_safe_user_id`` (the same normalization the channel file pipeline applies
|
||||
via ``_safe_user_id_for_run``/``prepare_user_dir_for_raw_id``). This keeps the
|
||||
memory bucket aligned with the owner's file/upload bucket and avoids a 500 when
|
||||
the raw id contains characters ``_validate_user_id`` would reject.
|
||||
"""
|
||||
raw_owner = get_trusted_internal_owner_user_id(request)
|
||||
if raw_owner:
|
||||
return make_safe_user_id(raw_owner)
|
||||
return get_effective_user_id()
|
||||
|
||||
|
||||
class ContextSection(BaseModel):
|
||||
"""Model for context sections (user and history)."""
|
||||
|
||||
@@ -115,7 +139,7 @@ class MemoryStatusResponse(BaseModel):
|
||||
summary="Get Memory Data",
|
||||
description="Retrieve the current global memory data including user context, history, and facts.",
|
||||
)
|
||||
async def get_memory() -> MemoryResponse:
|
||||
async def get_memory(http_request: Request) -> MemoryResponse:
|
||||
"""Get the current global memory data.
|
||||
|
||||
Returns:
|
||||
@@ -149,7 +173,7 @@ async def get_memory() -> MemoryResponse:
|
||||
}
|
||||
```
|
||||
"""
|
||||
memory_data = get_memory_data(user_id=get_effective_user_id())
|
||||
memory_data = get_memory_data(user_id=_resolve_memory_user_id(http_request))
|
||||
return MemoryResponse(**memory_data)
|
||||
|
||||
|
||||
@@ -160,7 +184,7 @@ async def get_memory() -> MemoryResponse:
|
||||
summary="Reload Memory Data",
|
||||
description="Reload memory data from the storage file, refreshing the in-memory cache.",
|
||||
)
|
||||
async def reload_memory() -> MemoryResponse:
|
||||
async def reload_memory(http_request: Request) -> MemoryResponse:
|
||||
"""Reload memory data from file.
|
||||
|
||||
This forces a reload of the memory data from the storage file,
|
||||
@@ -169,7 +193,7 @@ async def reload_memory() -> MemoryResponse:
|
||||
Returns:
|
||||
The reloaded memory data.
|
||||
"""
|
||||
memory_data = reload_memory_data(user_id=get_effective_user_id())
|
||||
memory_data = reload_memory_data(user_id=_resolve_memory_user_id(http_request))
|
||||
return MemoryResponse(**memory_data)
|
||||
|
||||
|
||||
@@ -180,10 +204,10 @@ async def reload_memory() -> MemoryResponse:
|
||||
summary="Clear All Memory Data",
|
||||
description="Delete all saved memory data and reset the memory structure to an empty state.",
|
||||
)
|
||||
async def clear_memory() -> MemoryResponse:
|
||||
async def clear_memory(http_request: Request) -> MemoryResponse:
|
||||
"""Clear all persisted memory data."""
|
||||
try:
|
||||
memory_data = clear_memory_data(user_id=get_effective_user_id())
|
||||
memory_data = clear_memory_data(user_id=_resolve_memory_user_id(http_request))
|
||||
except OSError as exc:
|
||||
raise HTTPException(status_code=500, detail="Failed to clear memory data.") from exc
|
||||
|
||||
@@ -197,14 +221,14 @@ async def clear_memory() -> MemoryResponse:
|
||||
summary="Create Memory Fact",
|
||||
description="Create a single saved memory fact manually.",
|
||||
)
|
||||
async def create_memory_fact_endpoint(request: FactCreateRequest) -> MemoryResponse:
|
||||
async def create_memory_fact_endpoint(request: FactCreateRequest, http_request: Request) -> MemoryResponse:
|
||||
"""Create a single fact manually."""
|
||||
try:
|
||||
memory_data = create_memory_fact(
|
||||
content=request.content,
|
||||
category=request.category,
|
||||
confidence=request.confidence,
|
||||
user_id=get_effective_user_id(),
|
||||
user_id=_resolve_memory_user_id(http_request),
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise _map_memory_fact_value_error(exc) from exc
|
||||
@@ -221,10 +245,10 @@ async def create_memory_fact_endpoint(request: FactCreateRequest) -> MemoryRespo
|
||||
summary="Delete Memory Fact",
|
||||
description="Delete a single saved memory fact by its fact id.",
|
||||
)
|
||||
async def delete_memory_fact_endpoint(fact_id: str) -> MemoryResponse:
|
||||
async def delete_memory_fact_endpoint(fact_id: str, http_request: Request) -> MemoryResponse:
|
||||
"""Delete a single fact from memory by fact id."""
|
||||
try:
|
||||
memory_data = delete_memory_fact(fact_id, user_id=get_effective_user_id())
|
||||
memory_data = delete_memory_fact(fact_id, user_id=_resolve_memory_user_id(http_request))
|
||||
except KeyError as exc:
|
||||
raise HTTPException(status_code=404, detail=f"Memory fact '{fact_id}' not found.") from exc
|
||||
except OSError as exc:
|
||||
@@ -240,7 +264,7 @@ async def delete_memory_fact_endpoint(fact_id: str) -> MemoryResponse:
|
||||
summary="Patch Memory Fact",
|
||||
description="Partially update a single saved memory fact by its fact id while preserving omitted fields.",
|
||||
)
|
||||
async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest) -> MemoryResponse:
|
||||
async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest, http_request: Request) -> MemoryResponse:
|
||||
"""Partially update a single fact manually."""
|
||||
try:
|
||||
memory_data = update_memory_fact(
|
||||
@@ -248,7 +272,7 @@ async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest) -
|
||||
content=request.content,
|
||||
category=request.category,
|
||||
confidence=request.confidence,
|
||||
user_id=get_effective_user_id(),
|
||||
user_id=_resolve_memory_user_id(http_request),
|
||||
)
|
||||
except ValueError as exc:
|
||||
raise _map_memory_fact_value_error(exc) from exc
|
||||
@@ -267,9 +291,9 @@ async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest) -
|
||||
summary="Export Memory Data",
|
||||
description="Export the current global memory data as JSON for backup or transfer.",
|
||||
)
|
||||
async def export_memory() -> MemoryResponse:
|
||||
async def export_memory(http_request: Request) -> MemoryResponse:
|
||||
"""Export the current memory data."""
|
||||
memory_data = get_memory_data(user_id=get_effective_user_id())
|
||||
memory_data = get_memory_data(user_id=_resolve_memory_user_id(http_request))
|
||||
return MemoryResponse(**memory_data)
|
||||
|
||||
|
||||
@@ -280,10 +304,10 @@ async def export_memory() -> MemoryResponse:
|
||||
summary="Import Memory Data",
|
||||
description="Import and overwrite the current global memory data from a JSON payload.",
|
||||
)
|
||||
async def import_memory(request: MemoryResponse) -> MemoryResponse:
|
||||
async def import_memory(request: MemoryResponse, http_request: Request) -> MemoryResponse:
|
||||
"""Import and persist memory data."""
|
||||
try:
|
||||
memory_data = import_memory_data(request.model_dump(), user_id=get_effective_user_id())
|
||||
memory_data = import_memory_data(request.model_dump(), user_id=_resolve_memory_user_id(http_request))
|
||||
except OSError as exc:
|
||||
raise HTTPException(status_code=500, detail="Failed to import memory data.") from exc
|
||||
|
||||
@@ -336,14 +360,14 @@ async def get_memory_config_endpoint() -> MemoryConfigResponse:
|
||||
summary="Get Memory Status",
|
||||
description="Retrieve both memory configuration and current data in a single request.",
|
||||
)
|
||||
async def get_memory_status() -> MemoryStatusResponse:
|
||||
async def get_memory_status(http_request: Request) -> MemoryStatusResponse:
|
||||
"""Get the memory system status including configuration and data.
|
||||
|
||||
Returns:
|
||||
Combined memory configuration and current data.
|
||||
"""
|
||||
config = get_memory_config()
|
||||
memory_data = get_memory_data(user_id=get_effective_user_id())
|
||||
memory_data = get_memory_data(user_id=_resolve_memory_user_id(http_request))
|
||||
|
||||
return MemoryStatusResponse(
|
||||
config=MemoryConfigResponse(
|
||||
|
||||
Reference in New Issue
Block a user