[security] fix(upload): reject symlinked upload destinations (#2623)

* fix: reject symlinked upload destinations

* test: harden upload destination checks

* fix: address PR feedback for #2623

* test: cover safe upload re-uploads

* fix: preserve upload limit checks after rebase

* fix(upload): stream safe HTTP upload writes
This commit is contained in:
Hinotobi
2026-05-02 15:19:28 +08:00
committed by GitHub
parent ca3332f8bf
commit e543bbf5d6
6 changed files with 369 additions and 16 deletions
+105 -1
View File
@@ -3,11 +3,12 @@
from __future__ import annotations
import asyncio
import os
from pathlib import Path
from unittest.mock import MagicMock, patch
from app.channels.base import Channel
from app.channels.message_bus import MessageBus, OutboundMessage, ResolvedAttachment
from app.channels.message_bus import InboundMessage, MessageBus, OutboundMessage, ResolvedAttachment
def _run(coro):
@@ -248,6 +249,109 @@ class TestResolveAttachments:
assert result[0].filename == "data.csv"
# ---------------------------------------------------------------------------
# Inbound file ingestion tests
# ---------------------------------------------------------------------------
class TestInboundFileIngestion:
def test_rejects_preexisting_symlink_destination(self, tmp_path):
from app.channels import manager
uploads_dir = tmp_path / "uploads"
uploads_dir.mkdir()
outside_file = tmp_path / "outside-created.txt"
(uploads_dir / "victim.txt").symlink_to(outside_file)
msg = InboundMessage(
channel_name="test-channel",
chat_id="chat-1",
user_id="user-1",
text="see attachment",
files=[{"filename": "victim.txt", "url": "https://example.invalid/victim.txt"}],
)
async def fake_reader(file_info, client):
return b"attacker data"
with (
patch("deerflow.uploads.manager.ensure_uploads_dir", return_value=uploads_dir),
patch.dict(manager.INBOUND_FILE_READERS, {"test-channel": fake_reader}, clear=False),
):
result = _run(manager._ingest_inbound_files("thread-1", msg))
assert result == []
assert not outside_file.exists()
assert (uploads_dir / "victim.txt").is_symlink()
def test_rejects_dangling_symlink_destination(self, tmp_path):
from app.channels import manager
uploads_dir = tmp_path / "uploads"
uploads_dir.mkdir()
missing_target = tmp_path / "missing-created.txt"
(uploads_dir / "victim.txt").symlink_to(missing_target)
msg = InboundMessage(
channel_name="test-channel",
chat_id="chat-1",
user_id="user-1",
text="see attachment",
files=[{"filename": "victim.txt", "url": "https://example.invalid/victim.txt"}],
)
async def fake_reader(file_info, client):
return b"attacker data"
with (
patch("deerflow.uploads.manager.ensure_uploads_dir", return_value=uploads_dir),
patch.dict(manager.INBOUND_FILE_READERS, {"test-channel": fake_reader}, clear=False),
):
result = _run(manager._ingest_inbound_files("thread-1", msg))
assert result == []
assert not missing_target.exists()
assert (uploads_dir / "victim.txt").is_symlink()
def test_hardlinked_existing_file_is_not_overwritten(self, tmp_path):
from app.channels import manager
uploads_dir = tmp_path / "uploads"
uploads_dir.mkdir()
outside_file = tmp_path / "outside-created.txt"
outside_file.write_text("protected", encoding="utf-8")
os.link(outside_file, uploads_dir / "victim.txt")
msg = InboundMessage(
channel_name="test-channel",
chat_id="chat-1",
user_id="user-1",
text="see attachment",
files=[{"filename": "victim.txt", "url": "https://example.invalid/victim.txt"}],
)
async def fake_reader(file_info, client):
return b"new attachment data"
with (
patch("deerflow.uploads.manager.ensure_uploads_dir", return_value=uploads_dir),
patch.dict(manager.INBOUND_FILE_READERS, {"test-channel": fake_reader}, clear=False),
):
result = _run(manager._ingest_inbound_files("thread-1", msg))
assert result == [
{
"filename": "victim_1.txt",
"size": len(b"new attachment data"),
"path": "/mnt/user-data/uploads/victim_1.txt",
"is_image": False,
}
]
assert outside_file.read_text(encoding="utf-8") == "protected"
assert (uploads_dir / "victim.txt").read_text(encoding="utf-8") == "protected"
assert (uploads_dir / "victim_1.txt").read_bytes() == b"new attachment data"
# ---------------------------------------------------------------------------
# Channel base class _on_outbound with attachments
# ---------------------------------------------------------------------------
+54
View File
@@ -1,14 +1,20 @@
"""Tests for deerflow.uploads.manager — shared upload management logic."""
import errno
import os
from unittest.mock import patch
import pytest
from deerflow.uploads.manager import (
PathTraversalError,
UnsafeUploadPathError,
claim_unique_filename,
delete_file_safe,
list_files_in_dir,
normalize_filename,
validate_path_traversal,
write_upload_file_no_symlink,
)
# ---------------------------------------------------------------------------
@@ -97,6 +103,54 @@ class TestValidatePathTraversal:
validate_path_traversal(link, tmp_path)
# ---------------------------------------------------------------------------
# write_upload_file_no_symlink
# ---------------------------------------------------------------------------
class TestWriteUploadFileNoSymlink:
def test_writes_new_file(self, tmp_path):
dest = write_upload_file_no_symlink(tmp_path, "notes.txt", b"hello")
assert dest == tmp_path / "notes.txt"
assert dest.read_bytes() == b"hello"
def test_overwrites_existing_regular_file_with_single_link(self, tmp_path):
dest = tmp_path / "notes.txt"
dest.write_bytes(b"old contents")
assert os.stat(dest).st_nlink == 1
result = write_upload_file_no_symlink(tmp_path, "notes.txt", b"new contents")
assert result == dest
assert dest.read_bytes() == b"new contents"
assert os.stat(dest).st_nlink == 1
def test_fails_closed_without_no_follow_support(self, tmp_path, monkeypatch):
monkeypatch.delattr(os, "O_NOFOLLOW", raising=False)
with pytest.raises(UnsafeUploadPathError, match="O_NOFOLLOW"):
write_upload_file_no_symlink(tmp_path, "notes.txt", b"hello")
assert not (tmp_path / "notes.txt").exists()
def test_open_uses_nonblocking_flag_when_available(self, tmp_path):
with patch("deerflow.uploads.manager.os.open", side_effect=OSError(errno.ENXIO, "no reader")) as open_mock:
with pytest.raises(UnsafeUploadPathError, match="Unsafe upload destination"):
write_upload_file_no_symlink(tmp_path, "pipe.txt", b"hello")
flags = open_mock.call_args.args[1]
assert flags & os.O_NONBLOCK
@pytest.mark.parametrize("open_errno", [errno.ENXIO, errno.EAGAIN])
def test_nonblocking_special_file_open_errors_are_unsafe(self, tmp_path, open_errno):
with patch("deerflow.uploads.manager.os.open", side_effect=OSError(open_errno, "would block")):
with pytest.raises(UnsafeUploadPathError, match="Unsafe upload destination"):
write_upload_file_no_symlink(tmp_path, "pipe.txt", b"hello")
assert not (tmp_path / "pipe.txt").exists()
# ---------------------------------------------------------------------------
# list_files_in_dir
# ---------------------------------------------------------------------------
+100
View File
@@ -1,4 +1,5 @@
import asyncio
import os
import stat
from io import BytesIO
from pathlib import Path
@@ -428,6 +429,105 @@ def test_upload_files_rejects_dotdot_and_dot_filenames(tmp_path):
assert [f.name for f in thread_uploads_dir.iterdir()] == ["passwd"]
def test_upload_files_rejects_preexisting_symlink_destination(tmp_path):
thread_uploads_dir = tmp_path / "uploads"
thread_uploads_dir.mkdir(parents=True)
outside_file = tmp_path / "outside.txt"
outside_file.write_text("protected", encoding="utf-8")
(thread_uploads_dir / "victim.txt").symlink_to(outside_file)
provider = MagicMock()
provider.uses_thread_data_mounts = True
with (
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
patch.object(uploads, "get_sandbox_provider", return_value=provider),
):
file = UploadFile(filename="victim.txt", file=BytesIO(b"attacker upload"))
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
assert result.success is False
assert result.files == []
assert result.skipped_files == ["victim.txt"]
assert "skipped 1 unsafe file" in result.message
assert outside_file.read_text(encoding="utf-8") == "protected"
assert (thread_uploads_dir / "victim.txt").is_symlink()
def test_upload_files_rejects_dangling_symlink_destination(tmp_path):
thread_uploads_dir = tmp_path / "uploads"
thread_uploads_dir.mkdir(parents=True)
missing_target = tmp_path / "missing-target.txt"
(thread_uploads_dir / "victim.txt").symlink_to(missing_target)
provider = MagicMock()
provider.uses_thread_data_mounts = True
with (
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
patch.object(uploads, "get_sandbox_provider", return_value=provider),
):
file = UploadFile(filename="victim.txt", file=BytesIO(b"attacker upload"))
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
assert result.success is False
assert result.files == []
assert result.skipped_files == ["victim.txt"]
assert not missing_target.exists()
assert (thread_uploads_dir / "victim.txt").is_symlink()
def test_upload_files_rejects_hardlinked_destination_without_truncating(tmp_path):
thread_uploads_dir = tmp_path / "uploads"
thread_uploads_dir.mkdir(parents=True)
outside_file = tmp_path / "outside.txt"
outside_file.write_text("protected", encoding="utf-8")
os.link(outside_file, thread_uploads_dir / "victim.txt")
provider = MagicMock()
provider.uses_thread_data_mounts = True
with (
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
patch.object(uploads, "get_sandbox_provider", return_value=provider),
):
file = UploadFile(filename="victim.txt", file=BytesIO(b"attacker upload"))
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
assert result.success is False
assert result.files == []
assert result.skipped_files == ["victim.txt"]
assert outside_file.read_text(encoding="utf-8") == "protected"
assert (thread_uploads_dir / "victim.txt").read_text(encoding="utf-8") == "protected"
def test_upload_files_overwrites_existing_regular_file(tmp_path):
thread_uploads_dir = tmp_path / "uploads"
thread_uploads_dir.mkdir(parents=True)
existing_file = thread_uploads_dir / "notes.txt"
existing_file.write_bytes(b"old upload")
assert existing_file.stat().st_nlink == 1
provider = MagicMock()
provider.uses_thread_data_mounts = True
with (
patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir),
patch.object(uploads, "ensure_uploads_dir", return_value=thread_uploads_dir),
patch.object(uploads, "get_sandbox_provider", return_value=provider),
):
file = UploadFile(filename="notes.txt", file=BytesIO(b"new upload"))
result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
assert result.success is True
assert [file_info["filename"] for file_info in result.files] == ["notes.txt"]
assert existing_file.read_bytes() == b"new upload"
assert existing_file.stat().st_nlink == 1
def test_delete_uploaded_file_removes_generated_markdown_companion(tmp_path):
thread_uploads_dir = tmp_path / "uploads"
thread_uploads_dir.mkdir(parents=True)