mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-21 07:26:50 +00:00
[security] fix(upload): reject symlinked upload destinations (#2623)
* fix: reject symlinked upload destinations * test: harden upload destination checks * fix: address PR feedback for #2623 * test: cover safe upload re-uploads * fix: preserve upload limit checks after rebase * fix(upload): stream safe HTTP upload writes
This commit is contained in:
@@ -3,11 +3,12 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from app.channels.base import Channel
|
||||
from app.channels.message_bus import MessageBus, OutboundMessage, ResolvedAttachment
|
||||
from app.channels.message_bus import InboundMessage, MessageBus, OutboundMessage, ResolvedAttachment
|
||||
|
||||
|
||||
def _run(coro):
|
||||
@@ -248,6 +249,109 @@ class TestResolveAttachments:
|
||||
assert result[0].filename == "data.csv"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Inbound file ingestion tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestInboundFileIngestion:
|
||||
def test_rejects_preexisting_symlink_destination(self, tmp_path):
|
||||
from app.channels import manager
|
||||
|
||||
uploads_dir = tmp_path / "uploads"
|
||||
uploads_dir.mkdir()
|
||||
outside_file = tmp_path / "outside-created.txt"
|
||||
(uploads_dir / "victim.txt").symlink_to(outside_file)
|
||||
|
||||
msg = InboundMessage(
|
||||
channel_name="test-channel",
|
||||
chat_id="chat-1",
|
||||
user_id="user-1",
|
||||
text="see attachment",
|
||||
files=[{"filename": "victim.txt", "url": "https://example.invalid/victim.txt"}],
|
||||
)
|
||||
|
||||
async def fake_reader(file_info, client):
|
||||
return b"attacker data"
|
||||
|
||||
with (
|
||||
patch("deerflow.uploads.manager.ensure_uploads_dir", return_value=uploads_dir),
|
||||
patch.dict(manager.INBOUND_FILE_READERS, {"test-channel": fake_reader}, clear=False),
|
||||
):
|
||||
result = _run(manager._ingest_inbound_files("thread-1", msg))
|
||||
|
||||
assert result == []
|
||||
assert not outside_file.exists()
|
||||
assert (uploads_dir / "victim.txt").is_symlink()
|
||||
|
||||
def test_rejects_dangling_symlink_destination(self, tmp_path):
|
||||
from app.channels import manager
|
||||
|
||||
uploads_dir = tmp_path / "uploads"
|
||||
uploads_dir.mkdir()
|
||||
missing_target = tmp_path / "missing-created.txt"
|
||||
(uploads_dir / "victim.txt").symlink_to(missing_target)
|
||||
|
||||
msg = InboundMessage(
|
||||
channel_name="test-channel",
|
||||
chat_id="chat-1",
|
||||
user_id="user-1",
|
||||
text="see attachment",
|
||||
files=[{"filename": "victim.txt", "url": "https://example.invalid/victim.txt"}],
|
||||
)
|
||||
|
||||
async def fake_reader(file_info, client):
|
||||
return b"attacker data"
|
||||
|
||||
with (
|
||||
patch("deerflow.uploads.manager.ensure_uploads_dir", return_value=uploads_dir),
|
||||
patch.dict(manager.INBOUND_FILE_READERS, {"test-channel": fake_reader}, clear=False),
|
||||
):
|
||||
result = _run(manager._ingest_inbound_files("thread-1", msg))
|
||||
|
||||
assert result == []
|
||||
assert not missing_target.exists()
|
||||
assert (uploads_dir / "victim.txt").is_symlink()
|
||||
|
||||
def test_hardlinked_existing_file_is_not_overwritten(self, tmp_path):
|
||||
from app.channels import manager
|
||||
|
||||
uploads_dir = tmp_path / "uploads"
|
||||
uploads_dir.mkdir()
|
||||
outside_file = tmp_path / "outside-created.txt"
|
||||
outside_file.write_text("protected", encoding="utf-8")
|
||||
os.link(outside_file, uploads_dir / "victim.txt")
|
||||
|
||||
msg = InboundMessage(
|
||||
channel_name="test-channel",
|
||||
chat_id="chat-1",
|
||||
user_id="user-1",
|
||||
text="see attachment",
|
||||
files=[{"filename": "victim.txt", "url": "https://example.invalid/victim.txt"}],
|
||||
)
|
||||
|
||||
async def fake_reader(file_info, client):
|
||||
return b"new attachment data"
|
||||
|
||||
with (
|
||||
patch("deerflow.uploads.manager.ensure_uploads_dir", return_value=uploads_dir),
|
||||
patch.dict(manager.INBOUND_FILE_READERS, {"test-channel": fake_reader}, clear=False),
|
||||
):
|
||||
result = _run(manager._ingest_inbound_files("thread-1", msg))
|
||||
|
||||
assert result == [
|
||||
{
|
||||
"filename": "victim_1.txt",
|
||||
"size": len(b"new attachment data"),
|
||||
"path": "/mnt/user-data/uploads/victim_1.txt",
|
||||
"is_image": False,
|
||||
}
|
||||
]
|
||||
assert outside_file.read_text(encoding="utf-8") == "protected"
|
||||
assert (uploads_dir / "victim.txt").read_text(encoding="utf-8") == "protected"
|
||||
assert (uploads_dir / "victim_1.txt").read_bytes() == b"new attachment data"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Channel base class _on_outbound with attachments
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -1,14 +1,20 @@
|
||||
"""Tests for deerflow.uploads.manager — shared upload management logic."""
|
||||
|
||||
import errno
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from deerflow.uploads.manager import (
|
||||
PathTraversalError,
|
||||
UnsafeUploadPathError,
|
||||
claim_unique_filename,
|
||||
delete_file_safe,
|
||||
list_files_in_dir,
|
||||
normalize_filename,
|
||||
validate_path_traversal,
|
||||
write_upload_file_no_symlink,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -97,6 +103,54 @@ class TestValidatePathTraversal:
|
||||
validate_path_traversal(link, tmp_path)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# write_upload_file_no_symlink
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestWriteUploadFileNoSymlink:
|
||||
def test_writes_new_file(self, tmp_path):
|
||||
dest = write_upload_file_no_symlink(tmp_path, "notes.txt", b"hello")
|
||||
|
||||
assert dest == tmp_path / "notes.txt"
|
||||
assert dest.read_bytes() == b"hello"
|
||||
|
||||
def test_overwrites_existing_regular_file_with_single_link(self, tmp_path):
|
||||
dest = tmp_path / "notes.txt"
|
||||
dest.write_bytes(b"old contents")
|
||||
assert os.stat(dest).st_nlink == 1
|
||||
|
||||
result = write_upload_file_no_symlink(tmp_path, "notes.txt", b"new contents")
|
||||
|
||||
assert result == dest
|
||||
assert dest.read_bytes() == b"new contents"
|
||||
assert os.stat(dest).st_nlink == 1
|
||||
|
||||
def test_fails_closed_without_no_follow_support(self, tmp_path, monkeypatch):
|
||||
monkeypatch.delattr(os, "O_NOFOLLOW", raising=False)
|
||||
|
||||
with pytest.raises(UnsafeUploadPathError, match="O_NOFOLLOW"):
|
||||
write_upload_file_no_symlink(tmp_path, "notes.txt", b"hello")
|
||||
|
||||
assert not (tmp_path / "notes.txt").exists()
|
||||
|
||||
def test_open_uses_nonblocking_flag_when_available(self, tmp_path):
|
||||
with patch("deerflow.uploads.manager.os.open", side_effect=OSError(errno.ENXIO, "no reader")) as open_mock:
|
||||
with pytest.raises(UnsafeUploadPathError, match="Unsafe upload destination"):
|
||||
write_upload_file_no_symlink(tmp_path, "pipe.txt", b"hello")
|
||||
|
||||
flags = open_mock.call_args.args[1]
|
||||
assert flags & os.O_NONBLOCK
|
||||
|
||||
@pytest.mark.parametrize("open_errno", [errno.ENXIO, errno.EAGAIN])
|
||||
def test_nonblocking_special_file_open_errors_are_unsafe(self, tmp_path, open_errno):
|
||||
with patch("deerflow.uploads.manager.os.open", side_effect=OSError(open_errno, "would block")):
|
||||
with pytest.raises(UnsafeUploadPathError, match="Unsafe upload destination"):
|
||||
write_upload_file_no_symlink(tmp_path, "pipe.txt", b"hello")
|
||||
|
||||
assert not (tmp_path / "pipe.txt").exists()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# list_files_in_dir
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import asyncio
|
||||
import os
|
||||
import stat
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
@@ -428,6 +429,105 @@ def test_upload_files_rejects_dotdot_and_dot_filenames(tmp_path):
|
||||
assert [f.name for f in thread_uploads_dir.iterdir()] == ["passwd"]
|
||||
|
||||
|
||||
def test_upload_files_rejects_preexisting_symlink_destination(tmp_path):
|
||||
thread_uploads_dir = tmp_path / "uploads"
|
||||
thread_uploads_dir.mkdir(parents=True)
|
||||
outside_file = tmp_path / "outside.txt"
|
||||
outside_file.write_text("protected", encoding="utf-8")
|
||||
(thread_uploads_dir / "victim.txt").symlink_to(outside_file)
|
||||
|
||||
provider = MagicMock()
|
||||
provider.uses_thread_data_mounts = True
|
||||
|
||||
with (
|
||||
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
|
||||
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
|
||||
patch.object(uploads, "get_sandbox_provider", return_value=provider),
|
||||
):
|
||||
file = UploadFile(filename="victim.txt", file=BytesIO(b"attacker upload"))
|
||||
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
|
||||
|
||||
assert result.success is False
|
||||
assert result.files == []
|
||||
assert result.skipped_files == ["victim.txt"]
|
||||
assert "skipped 1 unsafe file" in result.message
|
||||
assert outside_file.read_text(encoding="utf-8") == "protected"
|
||||
assert (thread_uploads_dir / "victim.txt").is_symlink()
|
||||
|
||||
|
||||
def test_upload_files_rejects_dangling_symlink_destination(tmp_path):
|
||||
thread_uploads_dir = tmp_path / "uploads"
|
||||
thread_uploads_dir.mkdir(parents=True)
|
||||
missing_target = tmp_path / "missing-target.txt"
|
||||
(thread_uploads_dir / "victim.txt").symlink_to(missing_target)
|
||||
|
||||
provider = MagicMock()
|
||||
provider.uses_thread_data_mounts = True
|
||||
|
||||
with (
|
||||
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
|
||||
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
|
||||
patch.object(uploads, "get_sandbox_provider", return_value=provider),
|
||||
):
|
||||
file = UploadFile(filename="victim.txt", file=BytesIO(b"attacker upload"))
|
||||
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
|
||||
|
||||
assert result.success is False
|
||||
assert result.files == []
|
||||
assert result.skipped_files == ["victim.txt"]
|
||||
assert not missing_target.exists()
|
||||
assert (thread_uploads_dir / "victim.txt").is_symlink()
|
||||
|
||||
|
||||
def test_upload_files_rejects_hardlinked_destination_without_truncating(tmp_path):
|
||||
thread_uploads_dir = tmp_path / "uploads"
|
||||
thread_uploads_dir.mkdir(parents=True)
|
||||
outside_file = tmp_path / "outside.txt"
|
||||
outside_file.write_text("protected", encoding="utf-8")
|
||||
os.link(outside_file, thread_uploads_dir / "victim.txt")
|
||||
|
||||
provider = MagicMock()
|
||||
provider.uses_thread_data_mounts = True
|
||||
|
||||
with (
|
||||
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
|
||||
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
|
||||
patch.object(uploads, "get_sandbox_provider", return_value=provider),
|
||||
):
|
||||
file = UploadFile(filename="victim.txt", file=BytesIO(b"attacker upload"))
|
||||
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
|
||||
|
||||
assert result.success is False
|
||||
assert result.files == []
|
||||
assert result.skipped_files == ["victim.txt"]
|
||||
assert outside_file.read_text(encoding="utf-8") == "protected"
|
||||
assert (thread_uploads_dir / "victim.txt").read_text(encoding="utf-8") == "protected"
|
||||
|
||||
|
||||
def test_upload_files_overwrites_existing_regular_file(tmp_path):
|
||||
thread_uploads_dir = tmp_path / "uploads"
|
||||
thread_uploads_dir.mkdir(parents=True)
|
||||
existing_file = thread_uploads_dir / "notes.txt"
|
||||
existing_file.write_bytes(b"old upload")
|
||||
assert existing_file.stat().st_nlink == 1
|
||||
|
||||
provider = MagicMock()
|
||||
provider.uses_thread_data_mounts = True
|
||||
|
||||
with (
|
||||
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
|
||||
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
|
||||
patch.object(uploads, "get_sandbox_provider", return_value=provider),
|
||||
):
|
||||
file = UploadFile(filename="notes.txt", file=BytesIO(b"new upload"))
|
||||
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
|
||||
|
||||
assert result.success is True
|
||||
assert [file_info["filename"] for file_info in result.files] == ["notes.txt"]
|
||||
assert existing_file.read_bytes() == b"new upload"
|
||||
assert existing_file.stat().st_nlink == 1
|
||||
|
||||
|
||||
def test_delete_uploaded_file_removes_generated_markdown_companion(tmp_path):
|
||||
thread_uploads_dir = tmp_path / "uploads"
|
||||
thread_uploads_dir.mkdir(parents=True)
|
||||
|
||||
Reference in New Issue
Block a user