feat(isolation): wire user_id through all Paths and memory callsites

Pass user_id=get_effective_user_id() at every callsite that invokes
Paths methods or memory functions, enabling per-user filesystem isolation
throughout the harness and app layers.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
rayhpeng
2026-04-12 15:16:23 +08:00
parent 9af2f3e73c
commit 7ce9333200
24 changed files with 137 additions and 70 deletions
@@ -57,6 +57,7 @@ def test_get_thread_mounts_includes_acp_workspace(tmp_path, monkeypatch):
"""_get_thread_mounts must include /mnt/acp-workspace (read-only) for docker sandbox."""
aio_mod = importlib.import_module("deerflow.community.aio_sandbox.aio_sandbox_provider")
monkeypatch.setattr(aio_mod, "get_paths", lambda: Paths(base_dir=tmp_path))
monkeypatch.setattr(aio_mod, "get_effective_user_id", lambda: None)
mounts = aio_mod.AioSandboxProvider._get_thread_mounts("thread-3")
@@ -95,6 +96,7 @@ def test_get_thread_mounts_preserves_windows_host_path_style(tmp_path, monkeypat
aio_mod = importlib.import_module("deerflow.community.aio_sandbox.aio_sandbox_provider")
monkeypatch.setenv("DEER_FLOW_HOST_BASE_DIR", r"C:\Users\demo\deer-flow\backend\.deer-flow")
monkeypatch.setattr(aio_mod, "get_paths", lambda: Paths(base_dir=tmp_path))
monkeypatch.setattr(aio_mod, "get_effective_user_id", lambda: None)
mounts = aio_mod.AioSandboxProvider._get_thread_mounts("thread-10")
@@ -231,7 +231,7 @@ class TestResolveAttachments:
mock_paths = MagicMock()
mock_paths.sandbox_outputs_dir.return_value = outputs_dir
def resolve_side_effect(tid, vpath):
def resolve_side_effect(tid, vpath, *, user_id=None):
if "data.csv" in vpath:
return good_file
return tmp_path / "missing.txt"
+37 -10
View File
@@ -1241,7 +1241,10 @@ class TestMemoryManagement:
with patch("deerflow.agents.memory.updater.import_memory_data", return_value=imported) as mock_import:
result = client.import_memory(imported)
mock_import.assert_called_once_with(imported)
assert mock_import.call_count == 1
call_args = mock_import.call_args
assert call_args.args == (imported,)
assert "user_id" in call_args.kwargs
assert result == imported
def test_reload_memory(self, client):
@@ -1487,9 +1490,12 @@ class TestUploads:
class TestArtifacts:
def test_get_artifact(self, client):
from deerflow.runtime.user_context import get_effective_user_id
with tempfile.TemporaryDirectory() as tmp:
paths = Paths(base_dir=tmp)
outputs = paths.sandbox_outputs_dir("t1")
user_id = get_effective_user_id()
outputs = paths.sandbox_outputs_dir("t1", user_id=user_id)
outputs.mkdir(parents=True)
(outputs / "result.txt").write_text("artifact content")
@@ -1500,9 +1506,12 @@ class TestArtifacts:
assert "text" in mime
def test_get_artifact_not_found(self, client):
from deerflow.runtime.user_context import get_effective_user_id
with tempfile.TemporaryDirectory() as tmp:
paths = Paths(base_dir=tmp)
paths.sandbox_user_data_dir("t1").mkdir(parents=True)
user_id = get_effective_user_id()
paths.sandbox_outputs_dir("t1", user_id=user_id).mkdir(parents=True)
with patch("deerflow.client.get_paths", return_value=paths):
with pytest.raises(FileNotFoundError):
@@ -1513,9 +1522,12 @@ class TestArtifacts:
client.get_artifact("t1", "bad/path/file.txt")
def test_get_artifact_path_traversal(self, client):
from deerflow.runtime.user_context import get_effective_user_id
with tempfile.TemporaryDirectory() as tmp:
paths = Paths(base_dir=tmp)
paths.sandbox_user_data_dir("t1").mkdir(parents=True)
user_id = get_effective_user_id()
paths.sandbox_outputs_dir("t1", user_id=user_id).mkdir(parents=True)
with patch("deerflow.client.get_paths", return_value=paths):
with pytest.raises(PathTraversalError):
@@ -1699,13 +1711,16 @@ class TestScenarioFileLifecycle:
def test_upload_then_read_artifact(self, client):
"""Upload a file, simulate agent producing artifact, read it back."""
from deerflow.runtime.user_context import get_effective_user_id
with tempfile.TemporaryDirectory() as tmp:
tmp_path = Path(tmp)
uploads_dir = tmp_path / "uploads"
uploads_dir.mkdir()
paths = Paths(base_dir=tmp_path)
outputs_dir = paths.sandbox_outputs_dir("t-artifact")
user_id = get_effective_user_id()
outputs_dir = paths.sandbox_outputs_dir("t-artifact", user_id=user_id)
outputs_dir.mkdir(parents=True)
# Upload phase
@@ -1955,11 +1970,14 @@ class TestScenarioThreadIsolation:
def test_artifacts_isolated_per_thread(self, client):
"""Artifacts in thread-A are not accessible from thread-B."""
from deerflow.runtime.user_context import get_effective_user_id
with tempfile.TemporaryDirectory() as tmp:
paths = Paths(base_dir=tmp)
outputs_a = paths.sandbox_outputs_dir("thread-a")
user_id = get_effective_user_id()
outputs_a = paths.sandbox_outputs_dir("thread-a", user_id=user_id)
outputs_a.mkdir(parents=True)
paths.sandbox_user_data_dir("thread-b").mkdir(parents=True)
paths.sandbox_outputs_dir("thread-b", user_id=user_id).mkdir(parents=True)
(outputs_a / "result.txt").write_text("thread-a artifact")
with patch("deerflow.client.get_paths", return_value=paths):
@@ -2864,9 +2882,12 @@ class TestUploadDeleteSymlink:
class TestArtifactHardening:
def test_artifact_directory_rejected(self, client):
"""get_artifact rejects paths that resolve to a directory."""
from deerflow.runtime.user_context import get_effective_user_id
with tempfile.TemporaryDirectory() as tmp:
paths = Paths(base_dir=tmp)
subdir = paths.sandbox_outputs_dir("t1") / "subdir"
user_id = get_effective_user_id()
subdir = paths.sandbox_outputs_dir("t1", user_id=user_id) / "subdir"
subdir.mkdir(parents=True)
with patch("deerflow.client.get_paths", return_value=paths):
@@ -2875,9 +2896,12 @@ class TestArtifactHardening:
def test_artifact_leading_slash_stripped(self, client):
"""Paths with leading slash are handled correctly."""
from deerflow.runtime.user_context import get_effective_user_id
with tempfile.TemporaryDirectory() as tmp:
paths = Paths(base_dir=tmp)
outputs = paths.sandbox_outputs_dir("t1")
user_id = get_effective_user_id()
outputs = paths.sandbox_outputs_dir("t1", user_id=user_id)
outputs.mkdir(parents=True)
(outputs / "file.txt").write_text("content")
@@ -2991,9 +3015,12 @@ class TestBugArtifactPrefixMatchTooLoose:
def test_exact_prefix_without_subpath_accepted(self, client):
"""Bare 'mnt/user-data' is accepted (will later fail as directory, not at prefix)."""
from deerflow.runtime.user_context import get_effective_user_id
with tempfile.TemporaryDirectory() as tmp:
paths = Paths(base_dir=tmp)
paths.sandbox_user_data_dir("t1").mkdir(parents=True)
user_id = get_effective_user_id()
paths.sandbox_outputs_dir("t1", user_id=user_id).mkdir(parents=True)
with patch("deerflow.client.get_paths", return_value=paths):
# Accepted at prefix check, but fails because it's a directory.
+6 -3
View File
@@ -262,8 +262,9 @@ class TestFileUploadIntegration:
# Physically exists
from deerflow.config.paths import get_paths
from deerflow.runtime.user_context import get_effective_user_id
assert (get_paths().sandbox_uploads_dir(tid) / "readme.txt").exists()
assert (get_paths().sandbox_uploads_dir(tid, user_id=get_effective_user_id()) / "readme.txt").exists()
def test_upload_duplicate_rename(self, e2e_env, tmp_path):
"""Uploading two files with the same name auto-renames the second."""
@@ -472,12 +473,13 @@ class TestArtifactAccess:
def test_get_artifact_happy_path(self, e2e_env):
"""Write a file to outputs, then read it back via get_artifact()."""
from deerflow.config.paths import get_paths
from deerflow.runtime.user_context import get_effective_user_id
c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
tid = str(uuid.uuid4())
# Create an output file in the thread's outputs directory
outputs_dir = get_paths().sandbox_outputs_dir(tid)
outputs_dir = get_paths().sandbox_outputs_dir(tid, user_id=get_effective_user_id())
outputs_dir.mkdir(parents=True, exist_ok=True)
(outputs_dir / "result.txt").write_text("hello artifact")
@@ -488,11 +490,12 @@ class TestArtifactAccess:
def test_get_artifact_nested_path(self, e2e_env):
"""Artifacts in subdirectories are accessible."""
from deerflow.config.paths import get_paths
from deerflow.runtime.user_context import get_effective_user_id
c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
tid = str(uuid.uuid4())
outputs_dir = get_paths().sandbox_outputs_dir(tid)
outputs_dir = get_paths().sandbox_outputs_dir(tid, user_id=get_effective_user_id())
sub = outputs_dir / "charts"
sub.mkdir(parents=True, exist_ok=True)
(sub / "data.json").write_text('{"x": 1}')
@@ -152,8 +152,10 @@ def test_get_work_dir_uses_base_dir_when_no_thread_id(monkeypatch, tmp_path):
def test_get_work_dir_uses_per_thread_path_when_thread_id_given(monkeypatch, tmp_path):
"""P1.1: _get_work_dir(thread_id) uses {base_dir}/threads/{thread_id}/acp-workspace/."""
from deerflow.config import paths as paths_module
from deerflow.runtime import user_context as uc_module
monkeypatch.setattr(paths_module, "get_paths", lambda: paths_module.Paths(base_dir=tmp_path))
monkeypatch.setattr(uc_module, "get_effective_user_id", lambda: None)
result = _get_work_dir("thread-abc-123")
expected = tmp_path / "threads" / "thread-abc-123" / "acp-workspace"
assert result == str(expected)
@@ -310,8 +312,10 @@ async def test_invoke_acp_agent_uses_fixed_acp_workspace(monkeypatch, tmp_path):
async def test_invoke_acp_agent_uses_per_thread_workspace_when_thread_id_in_config(monkeypatch, tmp_path):
"""P1.1: When thread_id is in the RunnableConfig, ACP agent uses per-thread workspace."""
from deerflow.config import paths as paths_module
from deerflow.runtime import user_context as uc_module
monkeypatch.setattr(paths_module, "get_paths", lambda: paths_module.Paths(base_dir=tmp_path))
monkeypatch.setattr(uc_module, "get_effective_user_id", lambda: None)
monkeypatch.setattr(
"deerflow.config.extensions_config.ExtensionsConfig.from_file",
+7 -6
View File
@@ -258,12 +258,13 @@ def test_update_memory_fact_route_preserves_omitted_fields() -> None:
)
assert response.status_code == 200
update_fact.assert_called_once_with(
fact_id="fact_edit",
content="User prefers spaces",
category=None,
confidence=None,
)
assert update_fact.call_count == 1
call_kwargs = update_fact.call_args.kwargs
assert call_kwargs.get("fact_id") == "fact_edit"
assert call_kwargs.get("content") == "User prefers spaces"
assert call_kwargs.get("category") is None
assert call_kwargs.get("confidence") is None
assert "user_id" in call_kwargs
assert response.json()["facts"] == updated_memory["facts"]
@@ -38,7 +38,7 @@ def test_present_files_keeps_virtual_outputs_path(tmp_path, monkeypatch):
monkeypatch.setattr(
present_file_tool_module,
"get_paths",
lambda: SimpleNamespace(resolve_virtual_path=lambda thread_id, path: artifact_path),
lambda: SimpleNamespace(resolve_virtual_path=lambda thread_id, path, *, user_id=None: artifact_path),
)
result = present_file_tool_module.present_file_tool.func(
+6 -3
View File
@@ -50,10 +50,13 @@ def test_delete_thread_data_rejects_invalid_thread_id(tmp_path):
def test_delete_thread_route_cleans_thread_directory(tmp_path):
from deerflow.runtime.user_context import get_effective_user_id
paths = Paths(tmp_path)
thread_dir = paths.thread_dir("thread-route")
paths.sandbox_work_dir("thread-route").mkdir(parents=True, exist_ok=True)
(paths.sandbox_work_dir("thread-route") / "notes.txt").write_text("hello", encoding="utf-8")
user_id = get_effective_user_id()
thread_dir = paths.thread_dir("thread-route", user_id=user_id)
paths.sandbox_work_dir("thread-route", user_id=user_id).mkdir(parents=True, exist_ok=True)
(paths.sandbox_work_dir("thread-route", user_id=user_id) / "notes.txt").write_text("hello", encoding="utf-8")
app = make_authed_test_app()
app.include_router(threads.router)
@@ -34,7 +34,9 @@ def _runtime(thread_id: str | None = THREAD_ID) -> MagicMock:
def _uploads_dir(tmp_path: Path, thread_id: str = THREAD_ID) -> Path:
d = Paths(str(tmp_path)).sandbox_uploads_dir(thread_id)
from deerflow.runtime.user_context import get_effective_user_id
d = Paths(str(tmp_path)).sandbox_uploads_dir(thread_id, user_id=get_effective_user_id())
d.mkdir(parents=True, exist_ok=True)
return d