mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-18 13:46:02 +00:00
fix(channels): scope IM files and helper commands to owner (#3579)
* fix(channels): scope IM files and helper commands to owner * fix(memory): honor bound IM owner for /memory gateway endpoints The channel manager already attaches X-DeerFlow-Owner-User-Id for /memory and /models, but the memory router resolved user_id solely from get_effective_user_id(), which returns the synthetic internal user (DEFAULT_USER_ID) for channel workers. A bound IM /memory therefore read the default/internal memory instead of the connection owner's. Resolve the owner via _resolve_memory_user_id(request) across all /api/memory* endpoints: trusted internal callers act for the owner header, browser/API callers fall back to get_effective_user_id(). Mirrors the threads router's get_trusted_internal_owner_user_id pattern, completing acceptance criterion #3 of #3539. Add end-to-end tests asserting the resolved user_id (not just that the header is sent) and that a spoofed owner header from a browser user is ignored. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * fix(channels): align memory bucket and reuse cached storage owner Address PR #3579 review feedback: - Memory router now sanitizes the trusted owner header via make_safe_user_id before routing, matching the channel file pipeline (_safe_user_id_for_run/prepare_user_dir_for_raw_id). A bound owner id needing sanitization now resolves to the same bucket as its files/uploads instead of 500ing in _validate_user_id. - _handle_chat reuses the storage_user_id cached at the top of the method for artifact delivery instead of re-deriving _channel_storage_user_id(msg), so uploads and outputs cannot drift to different buckets if a channel rewrites the InboundMessage in receive_file. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * fix(channels): stage unbound IM files under the run's user bucket Address PR #3579 review feedback (#5): _channel_storage_user_id now mirrors _resolve_run_params' identity policy, falling back to safe(msg.user_id) instead of returning None for unbound auth-enabled channels. Previously an unbound msg ran under safe(platform_user_id) but staged uploads under get_effective_user_id() in the dispatcher task (unset contextvar -> "default"), so files landed in users/default/... while the agent read from users/{safe_platform_user_id}/.... Bound and unbound channels now write where the agent reads. Returns None only when no identity is available. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * fix(channels): reuse cached storage owner in streaming artifact delivery Address PR #3579 review feedback (#6): thread the storage_user_id resolved in _handle_chat into _handle_streaming_chat instead of re-deriving _channel_storage_user_id(msg) in the finally block. Avoids re-running _safe_user_id_for_run (and its possible filesystem touch) on the streaming-error path and guarantees artifact delivery targets the same bucket as the uploads. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * docs(channels): document owner-scoped IM file storage Address PR #3579 review feedback (#4): the IM Channels and File Upload sections still described pre-PR default-bucket behaviour. Document that receive_file, _ingest_inbound_files/ensure_uploads_dir/get_uploads_dir, and _resolve_attachments/_prepare_artifact_delivery are owner-scoped via the user_id kwarg, and that the bucket matches the memory bucket from _resolve_memory_user_id. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * refactor(channels): unify run identity and storage bucket resolution Address PR #3579 review feedback (#3): _resolve_run_params no longer duplicates the owner-resolution rule inline. After the #5 fix the inline block and _channel_storage_user_id computed the identical sanitized-with-platform-fallback value, so the run identity now calls the same helper, making it the single source of truth for run_context["user_id"] and the file/artifact storage bucket. _owner_headers stays deliberately separate: it sends the raw owner id over HTTP for the gateway to re-resolve (no sanitize, no platform fallback), documented on both helpers. test_run_identity_matches_storage_bucket pins the two together so they cannot drift again. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -679,6 +679,55 @@ class TestChannelManager:
|
||||
|
||||
_run(go())
|
||||
|
||||
def test_fetch_gateway_uses_bound_owner_headers(self, monkeypatch):
|
||||
from app.channels.manager import ChannelManager
|
||||
from app.gateway.internal_auth import INTERNAL_OWNER_USER_ID_HEADER_NAME
|
||||
|
||||
class MockResponse:
|
||||
def raise_for_status(self):
|
||||
return None
|
||||
|
||||
def json(self):
|
||||
return {"facts": [{"text": "owner fact"}]}
|
||||
|
||||
class MockAsyncClient:
|
||||
def __init__(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
async def __aenter__(self):
|
||||
return self
|
||||
|
||||
async def __aexit__(self, exc_type, exc, tb):
|
||||
return None
|
||||
|
||||
async def get(self, url, **kwargs):
|
||||
calls.append({"url": url, **kwargs})
|
||||
return MockResponse()
|
||||
|
||||
calls = []
|
||||
monkeypatch.setattr("app.channels.manager.httpx.AsyncClient", MockAsyncClient)
|
||||
|
||||
async def go():
|
||||
bus = MessageBus()
|
||||
store = ChannelStore(path=Path(tempfile.mkdtemp()) / "store.json")
|
||||
manager = ChannelManager(bus=bus, store=store, gateway_url="http://gateway:8001")
|
||||
msg = InboundMessage(
|
||||
channel_name="slack",
|
||||
chat_id="C123",
|
||||
user_id="U-platform",
|
||||
owner_user_id="deerflow-user-1",
|
||||
connection_id="connection-1",
|
||||
text="/memory",
|
||||
msg_type=InboundMessageType.COMMAND,
|
||||
)
|
||||
|
||||
reply = await manager._fetch_gateway("/api/memory", "memory", msg=msg)
|
||||
|
||||
assert reply == "Memory contains 1 fact(s)."
|
||||
assert calls[0]["headers"][INTERNAL_OWNER_USER_ID_HEADER_NAME] == "deerflow-user-1"
|
||||
|
||||
_run(go())
|
||||
|
||||
def test_handle_chat_calls_channel_receive_file_for_inbound_files(self, monkeypatch):
|
||||
from app.channels.manager import ChannelManager
|
||||
|
||||
@@ -716,7 +765,9 @@ class TestChannelManager:
|
||||
inbound = InboundMessage(
|
||||
channel_name="test",
|
||||
chat_id="chat1",
|
||||
user_id="user1",
|
||||
user_id="platform-user",
|
||||
owner_user_id="owner-1",
|
||||
connection_id="connection-1",
|
||||
text="hi [image]",
|
||||
files=[{"image_key": "img_1"}],
|
||||
)
|
||||
@@ -729,6 +780,7 @@ class TestChannelManager:
|
||||
assert called_msg.text == "hi [image]"
|
||||
assert isinstance(called_thread_id, str)
|
||||
assert called_thread_id
|
||||
assert mock_channel.receive_file.await_args.kwargs["user_id"] == "owner-1"
|
||||
|
||||
mock_client.runs.wait.assert_called_once()
|
||||
run_call_args = mock_client.runs.wait.call_args
|
||||
@@ -736,6 +788,70 @@ class TestChannelManager:
|
||||
|
||||
_run(go())
|
||||
|
||||
def test_ingest_inbound_files_uses_explicit_owner_bucket(self, tmp_path, monkeypatch):
|
||||
from app.channels.manager import INBOUND_FILE_READERS, _ingest_inbound_files
|
||||
from deerflow.config.paths import Paths
|
||||
|
||||
paths = Paths(tmp_path)
|
||||
monkeypatch.setattr("deerflow.uploads.manager.get_paths", lambda: paths)
|
||||
|
||||
async def read_file(file_info, client):
|
||||
del file_info, client
|
||||
return b"owner data"
|
||||
|
||||
INBOUND_FILE_READERS["owner-test"] = read_file
|
||||
|
||||
async def go():
|
||||
try:
|
||||
created = await _ingest_inbound_files(
|
||||
"thread-owner",
|
||||
InboundMessage(
|
||||
channel_name="owner-test",
|
||||
chat_id="C123",
|
||||
user_id="U-platform",
|
||||
text="file",
|
||||
files=[{"filename": "report.txt", "type": "file"}],
|
||||
),
|
||||
user_id="owner-1",
|
||||
)
|
||||
finally:
|
||||
INBOUND_FILE_READERS.pop("owner-test", None)
|
||||
|
||||
assert created == [
|
||||
{
|
||||
"filename": "report.txt",
|
||||
"size": len(b"owner data"),
|
||||
"path": "/mnt/user-data/uploads/report.txt",
|
||||
"is_image": False,
|
||||
}
|
||||
]
|
||||
assert (paths.sandbox_uploads_dir("thread-owner", user_id="owner-1") / "report.txt").read_bytes() == b"owner data"
|
||||
assert not paths.sandbox_uploads_dir("thread-owner").exists()
|
||||
|
||||
_run(go())
|
||||
|
||||
def test_channel_storage_user_id_falls_back_to_platform_user(self, monkeypatch):
|
||||
"""Unbound auth-enabled channels stage files under the same bucket the run uses.
|
||||
|
||||
``_resolve_run_params`` runs an unbound msg under ``safe(msg.user_id)``, so
|
||||
``_channel_storage_user_id`` must resolve to the same value instead of
|
||||
``None`` (which would fall back to ``"default"`` in the dispatcher task and
|
||||
cross buckets — the agent would read uploads the channel never wrote there).
|
||||
"""
|
||||
from app.channels.manager import _channel_storage_user_id, _safe_user_id_for_run
|
||||
|
||||
# Auth enabled (no auth-disabled owner), unbound (no owner_user_id).
|
||||
monkeypatch.setattr("app.channels.manager._auth_disabled_owner_user_id", lambda: None)
|
||||
|
||||
unbound = InboundMessage(channel_name="slack", chat_id="C1", user_id="U-platform", text="hi")
|
||||
assert _channel_storage_user_id(unbound) == _safe_user_id_for_run("U-platform")
|
||||
|
||||
bound = InboundMessage(channel_name="slack", chat_id="C1", user_id="U-platform", text="hi", owner_user_id="owner-1")
|
||||
assert _channel_storage_user_id(bound) == _safe_user_id_for_run("owner-1")
|
||||
|
||||
anonymous = InboundMessage(channel_name="slack", chat_id="C1", user_id="", text="hi")
|
||||
assert _channel_storage_user_id(anonymous) is None
|
||||
|
||||
def test_handle_chat_creates_thread(self):
|
||||
from app.channels.manager import ChannelManager
|
||||
|
||||
@@ -1862,7 +1978,8 @@ class TestChannelManager:
|
||||
def test_handle_command_slash_skill_with_attachment_preserves_original_content(self, monkeypatch, tmp_path):
|
||||
from app.channels.manager import ChannelManager
|
||||
|
||||
async def fake_ingest(thread_id, msg):
|
||||
async def fake_ingest(thread_id, msg, *, user_id=None):
|
||||
del user_id
|
||||
return [
|
||||
{
|
||||
"filename": "report.pdf",
|
||||
@@ -1916,7 +2033,8 @@ class TestChannelManager:
|
||||
def test_streaming_slash_skill_with_attachment_preserves_original_content(self, monkeypatch, tmp_path):
|
||||
from app.channels.manager import ChannelManager
|
||||
|
||||
async def fake_ingest(thread_id, msg):
|
||||
async def fake_ingest(thread_id, msg, *, user_id=None):
|
||||
del user_id
|
||||
return [
|
||||
{
|
||||
"filename": "report.pdf",
|
||||
@@ -2658,6 +2776,31 @@ class TestResolveRunParamsUserId:
|
||||
assert run_context["user_id"] == "123456"
|
||||
assert run_context["channel_user_id"] == "123456"
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"kwargs",
|
||||
[
|
||||
{"user_id": "U-platform", "owner_user_id": "deerflow-user-1"}, # bound
|
||||
{"user_id": "U-platform"}, # unbound auth-enabled
|
||||
{"user_id": "feishu|ou_AbC/123"}, # unbound needing sanitization
|
||||
],
|
||||
)
|
||||
def test_run_identity_matches_storage_bucket(self, kwargs, monkeypatch):
|
||||
"""The run user_id and the file/artifact storage bucket share one resolver.
|
||||
|
||||
Pins #2 and #3 to a single source of truth so they cannot drift: whatever
|
||||
_resolve_run_params puts in run_context["user_id"] is exactly what
|
||||
_channel_storage_user_id scopes uploads/artifacts to.
|
||||
"""
|
||||
from app.channels.manager import _channel_storage_user_id
|
||||
|
||||
manager = self._manager()
|
||||
monkeypatch.delenv("DEER_FLOW_AUTH_DISABLED", raising=False)
|
||||
msg = InboundMessage(channel_name="slack", chat_id="C123", text="hi", **kwargs)
|
||||
|
||||
_, _, run_context = manager._resolve_run_params(msg, "thread-1")
|
||||
|
||||
assert run_context["user_id"] == _channel_storage_user_id(msg)
|
||||
|
||||
def test_connection_owner_user_id_takes_precedence_over_platform_user_id(self, monkeypatch):
|
||||
manager = self._manager()
|
||||
monkeypatch.delenv("DEER_FLOW_AUTH_DISABLED", raising=False)
|
||||
@@ -3429,6 +3572,60 @@ class TestFormatArtifactText:
|
||||
|
||||
|
||||
class TestHandleChatWithArtifacts:
|
||||
def test_bound_owner_artifacts_resolve_from_owner_outputs_bucket(self, tmp_path, monkeypatch):
|
||||
from app.channels.manager import ChannelManager
|
||||
from deerflow.config.paths import Paths
|
||||
|
||||
paths = Paths(tmp_path)
|
||||
monkeypatch.setattr("deerflow.config.paths.get_paths", lambda: paths)
|
||||
outputs_dir = paths.sandbox_outputs_dir("test-thread-123", user_id="owner-1")
|
||||
outputs_dir.mkdir(parents=True)
|
||||
(outputs_dir / "report.md").write_text("owner report", encoding="utf-8")
|
||||
|
||||
async def go():
|
||||
bus = MessageBus()
|
||||
store = ChannelStore(path=tmp_path / "store.json")
|
||||
manager = ChannelManager(bus=bus, store=store)
|
||||
|
||||
run_result = {
|
||||
"messages": [
|
||||
{"type": "human", "content": "generate report"},
|
||||
{
|
||||
"type": "ai",
|
||||
"content": "Here is your report.",
|
||||
"tool_calls": [
|
||||
{"name": "present_files", "args": {"filepaths": ["/mnt/user-data/outputs/report.md"]}},
|
||||
],
|
||||
},
|
||||
{"type": "tool", "name": "present_files", "content": "ok"},
|
||||
],
|
||||
}
|
||||
mock_client = _make_mock_langgraph_client(run_result=run_result)
|
||||
manager._client = mock_client
|
||||
|
||||
outbound_received = []
|
||||
bus.subscribe_outbound(lambda msg: outbound_received.append(msg))
|
||||
await manager.start()
|
||||
|
||||
await bus.publish_inbound(
|
||||
InboundMessage(
|
||||
channel_name="test",
|
||||
chat_id="c1",
|
||||
user_id="U-platform",
|
||||
owner_user_id="owner-1",
|
||||
connection_id="connection-1",
|
||||
text="generate report",
|
||||
)
|
||||
)
|
||||
await _wait_for(lambda: len(outbound_received) >= 1)
|
||||
await manager.stop()
|
||||
|
||||
assert len(outbound_received) == 1
|
||||
assert len(outbound_received[0].attachments) == 1
|
||||
assert outbound_received[0].attachments[0].actual_path == outputs_dir / "report.md"
|
||||
|
||||
_run(go())
|
||||
|
||||
def test_artifacts_appended_to_text(self):
|
||||
from app.channels.manager import ChannelManager
|
||||
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
import asyncio
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
from fastapi import FastAPI
|
||||
@@ -303,3 +305,128 @@ def test_update_memory_fact_route_returns_specific_error_for_invalid_confidence(
|
||||
|
||||
assert response.status_code == 400
|
||||
assert response.json()["detail"] == "Invalid confidence value; must be between 0 and 1."
|
||||
|
||||
|
||||
def _internal_owner_request(owner_user_id: str) -> SimpleNamespace:
|
||||
"""Build a trusted-internal request carrying the connection owner header.
|
||||
|
||||
Mirrors what ``AuthMiddleware`` stamps for a channel worker that holds the
|
||||
internal token (``request.state.user`` is the synthetic internal user) and
|
||||
what ``ChannelManager._fetch_gateway`` attaches via ``_owner_headers``.
|
||||
"""
|
||||
from app.gateway.internal_auth import INTERNAL_OWNER_USER_ID_HEADER_NAME, INTERNAL_SYSTEM_ROLE
|
||||
from deerflow.runtime.user_context import DEFAULT_USER_ID
|
||||
|
||||
return SimpleNamespace(
|
||||
headers={INTERNAL_OWNER_USER_ID_HEADER_NAME: owner_user_id},
|
||||
state=SimpleNamespace(user=SimpleNamespace(id=DEFAULT_USER_ID, system_role=INTERNAL_SYSTEM_ROLE)),
|
||||
)
|
||||
|
||||
|
||||
def test_get_memory_honors_bound_owner_header() -> None:
|
||||
"""A bound IM ``/memory`` reads the owner's bucket, not the internal user's."""
|
||||
seen: dict[str, str] = {}
|
||||
|
||||
def fake_get_memory_data(*, user_id: str) -> dict:
|
||||
seen["user_id"] = user_id
|
||||
return _sample_memory(facts=[{"id": "f", "content": "owner fact", "category": "context", "confidence": 0.9, "createdAt": "", "source": "owner"}])
|
||||
|
||||
with patch("app.gateway.routers.memory.get_memory_data", side_effect=fake_get_memory_data):
|
||||
response = asyncio.run(memory.get_memory(_internal_owner_request("owner-1")))
|
||||
|
||||
assert seen["user_id"] == "owner-1"
|
||||
assert response.facts[0].content == "owner fact"
|
||||
|
||||
|
||||
def test_get_memory_sanitizes_unsafe_owner_header() -> None:
|
||||
"""A bound owner id needing sanitization routes to the safe bucket, not a 500.
|
||||
|
||||
The trusted owner header carries the raw owner id. The memory router must
|
||||
normalize it through the same ``make_safe_user_id`` the channel file pipeline
|
||||
applies, so the memory bucket matches the owner's file/upload bucket and the
|
||||
raw id never reaches ``_validate_user_id`` unsanitized.
|
||||
"""
|
||||
from deerflow.config.paths import make_safe_user_id
|
||||
|
||||
raw_owner = "feishu|ou_AbC/123"
|
||||
seen: dict[str, str] = {}
|
||||
|
||||
def fake_get_memory_data(*, user_id: str) -> dict:
|
||||
seen["user_id"] = user_id
|
||||
return _sample_memory()
|
||||
|
||||
with patch("app.gateway.routers.memory.get_memory_data", side_effect=fake_get_memory_data):
|
||||
asyncio.run(memory.get_memory(_internal_owner_request(raw_owner)))
|
||||
|
||||
expected = make_safe_user_id(raw_owner)
|
||||
assert seen["user_id"] == expected
|
||||
assert seen["user_id"] != raw_owner
|
||||
|
||||
|
||||
def test_get_memory_falls_back_to_effective_user_for_browser_requests() -> None:
|
||||
"""Non-internal callers ignore the owner header and use the effective user."""
|
||||
from app.gateway.internal_auth import INTERNAL_OWNER_USER_ID_HEADER_NAME
|
||||
|
||||
seen: dict[str, str] = {}
|
||||
|
||||
def fake_get_memory_data(*, user_id: str) -> dict:
|
||||
seen["user_id"] = user_id
|
||||
return _sample_memory()
|
||||
|
||||
# A real browser user (system_role "user") must never be overridden even if
|
||||
# a spoofed owner header is present — the header is only honored for the
|
||||
# synthetic internal caller after the internal token is validated.
|
||||
browser_request = SimpleNamespace(
|
||||
headers={INTERNAL_OWNER_USER_ID_HEADER_NAME: "owner-1"},
|
||||
state=SimpleNamespace(user=SimpleNamespace(id="real-user", system_role="user")),
|
||||
)
|
||||
|
||||
with patch("app.gateway.routers.memory.get_effective_user_id", return_value="real-user"):
|
||||
with patch("app.gateway.routers.memory.get_memory_data", side_effect=fake_get_memory_data):
|
||||
asyncio.run(memory.get_memory(browser_request))
|
||||
|
||||
assert seen["user_id"] == "real-user"
|
||||
|
||||
|
||||
def _browser_request_with_spoofed_owner_header() -> SimpleNamespace:
|
||||
from app.gateway.internal_auth import INTERNAL_OWNER_USER_ID_HEADER_NAME
|
||||
|
||||
return SimpleNamespace(
|
||||
headers={INTERNAL_OWNER_USER_ID_HEADER_NAME: "owner-1"},
|
||||
state=SimpleNamespace(user=SimpleNamespace(id="real-user", system_role="user")),
|
||||
)
|
||||
|
||||
|
||||
def test_clear_memory_scopes_destructive_write_to_bound_owner() -> None:
|
||||
"""A bound IM caller clears the owner's bucket; a browser user keeps their own."""
|
||||
seen: dict[str, str] = {}
|
||||
|
||||
def fake_clear(*, user_id: str) -> dict:
|
||||
seen["user_id"] = user_id
|
||||
return _sample_memory()
|
||||
|
||||
with patch("app.gateway.routers.memory.clear_memory_data", side_effect=fake_clear):
|
||||
asyncio.run(memory.clear_memory(_internal_owner_request("owner-1")))
|
||||
assert seen["user_id"] == "owner-1"
|
||||
|
||||
with patch("app.gateway.routers.memory.get_effective_user_id", return_value="real-user"):
|
||||
asyncio.run(memory.clear_memory(_browser_request_with_spoofed_owner_header()))
|
||||
assert seen["user_id"] == "real-user"
|
||||
|
||||
|
||||
def test_import_memory_scopes_overwrite_to_bound_owner() -> None:
|
||||
"""A bound IM caller overwrites the owner's bucket; a spoofed header is ignored."""
|
||||
seen: dict[str, str] = {}
|
||||
payload = memory.MemoryResponse(**_sample_memory())
|
||||
|
||||
def fake_import(_data: dict, *, user_id: str) -> dict:
|
||||
seen["user_id"] = user_id
|
||||
return _sample_memory()
|
||||
|
||||
with patch("app.gateway.routers.memory.import_memory_data", side_effect=fake_import):
|
||||
asyncio.run(memory.import_memory(payload, _internal_owner_request("owner-1")))
|
||||
assert seen["user_id"] == "owner-1"
|
||||
|
||||
with patch("app.gateway.routers.memory.get_effective_user_id", return_value="real-user"):
|
||||
asyncio.run(memory.import_memory(payload, _browser_request_with_spoofed_owner_header()))
|
||||
assert seen["user_id"] == "real-user"
|
||||
|
||||
Reference in New Issue
Block a user