mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-22 07:56:48 +00:00
feat(sandbox) Adds download file interface in Sandbox (#3038)
* Add download interface in Sandbox * fix * fix * del invalidate test * fix * safe download * improve
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
import base64
|
import base64
|
||||||
|
import errno
|
||||||
import logging
|
import logging
|
||||||
import shlex
|
import shlex
|
||||||
import threading
|
import threading
|
||||||
@@ -6,11 +7,14 @@ import uuid
|
|||||||
|
|
||||||
from agent_sandbox import Sandbox as AioSandboxClient
|
from agent_sandbox import Sandbox as AioSandboxClient
|
||||||
|
|
||||||
|
from deerflow.config.paths import VIRTUAL_PATH_PREFIX
|
||||||
from deerflow.sandbox.sandbox import Sandbox
|
from deerflow.sandbox.sandbox import Sandbox
|
||||||
from deerflow.sandbox.search import GrepMatch, path_matches, should_ignore_path, truncate_line
|
from deerflow.sandbox.search import GrepMatch, path_matches, should_ignore_path, truncate_line
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_MAX_DOWNLOAD_SIZE = 100 * 1024 * 1024 # 100 MB
|
||||||
|
|
||||||
_ERROR_OBSERVATION_SIGNATURE = "'ErrorObservation' object has no attribute 'exit_code'"
|
_ERROR_OBSERVATION_SIGNATURE = "'ErrorObservation' object has no attribute 'exit_code'"
|
||||||
|
|
||||||
|
|
||||||
@@ -102,6 +106,49 @@ class AioSandbox(Sandbox):
|
|||||||
logger.error(f"Failed to read file in sandbox: {e}")
|
logger.error(f"Failed to read file in sandbox: {e}")
|
||||||
return f"Error: {e}"
|
return f"Error: {e}"
|
||||||
|
|
||||||
|
def download_file(self, path: str) -> bytes:
|
||||||
|
"""Download file bytes from the sandbox.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
PermissionError: If the path contains '..' traversal segments or is
|
||||||
|
outside ``VIRTUAL_PATH_PREFIX``.
|
||||||
|
OSError: If the file cannot be retrieved from the sandbox.
|
||||||
|
"""
|
||||||
|
# Reject path traversal before sending to the container API.
|
||||||
|
# LocalSandbox gets this implicitly via _resolve_path;
|
||||||
|
# here the path is forwarded verbatim so we must check explicitly.
|
||||||
|
normalised = path.replace("\\", "/")
|
||||||
|
for segment in normalised.split("/"):
|
||||||
|
if segment == "..":
|
||||||
|
logger.error(f"Refused download due to path traversal: {path}")
|
||||||
|
raise PermissionError(f"Access denied: path traversal detected in '{path}'")
|
||||||
|
|
||||||
|
stripped_path = normalised.lstrip("/")
|
||||||
|
allowed_prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
|
||||||
|
if stripped_path != allowed_prefix and not stripped_path.startswith(f"{allowed_prefix}/"):
|
||||||
|
logger.error("Refused download outside allowed directory: path=%s, allowed_prefix=%s", path, VIRTUAL_PATH_PREFIX)
|
||||||
|
raise PermissionError(f"Access denied: path must be under '{VIRTUAL_PATH_PREFIX}': '{path}'")
|
||||||
|
|
||||||
|
with self._lock:
|
||||||
|
try:
|
||||||
|
chunks: list[bytes] = []
|
||||||
|
total = 0
|
||||||
|
for chunk in self._client.file.download_file(path=path):
|
||||||
|
total += len(chunk)
|
||||||
|
if total > _MAX_DOWNLOAD_SIZE:
|
||||||
|
raise OSError(
|
||||||
|
errno.EFBIG,
|
||||||
|
f"File exceeds maximum download size of {_MAX_DOWNLOAD_SIZE} bytes",
|
||||||
|
path,
|
||||||
|
)
|
||||||
|
chunks.append(chunk)
|
||||||
|
return b"".join(chunks)
|
||||||
|
except OSError:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to download file in sandbox: {e}")
|
||||||
|
raise OSError(f"Failed to download file '{path}' from sandbox: {e}") from e
|
||||||
|
|
||||||
def list_dir(self, path: str, max_depth: int = 2) -> list[str]:
|
def list_dir(self, path: str, max_depth: int = 2) -> list[str]:
|
||||||
"""List the contents of a directory in the sandbox.
|
"""List the contents of a directory in the sandbox.
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import errno
|
import errno
|
||||||
|
import logging
|
||||||
import ntpath
|
import ntpath
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
@@ -7,10 +8,13 @@ from dataclasses import dataclass
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import NamedTuple
|
from typing import NamedTuple
|
||||||
|
|
||||||
|
from deerflow.config.paths import VIRTUAL_PATH_PREFIX
|
||||||
from deerflow.sandbox.local.list_dir import list_dir
|
from deerflow.sandbox.local.list_dir import list_dir
|
||||||
from deerflow.sandbox.sandbox import Sandbox
|
from deerflow.sandbox.sandbox import Sandbox
|
||||||
from deerflow.sandbox.search import GrepMatch, find_glob_matches, find_grep_matches
|
from deerflow.sandbox.search import GrepMatch, find_glob_matches, find_grep_matches
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class PathMapping:
|
class PathMapping:
|
||||||
@@ -379,6 +383,28 @@ class LocalSandbox(Sandbox):
|
|||||||
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
||||||
raise type(e)(e.errno, e.strerror, path) from None
|
raise type(e)(e.errno, e.strerror, path) from None
|
||||||
|
|
||||||
|
def download_file(self, path: str) -> bytes:
|
||||||
|
normalised = path.replace("\\", "/")
|
||||||
|
stripped_path = normalised.lstrip("/")
|
||||||
|
allowed_prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
|
||||||
|
if stripped_path != allowed_prefix and not stripped_path.startswith(f"{allowed_prefix}/"):
|
||||||
|
logger.error("Refused download outside allowed directory: path=%s, allowed_prefix=%s", path, VIRTUAL_PATH_PREFIX)
|
||||||
|
raise PermissionError(errno.EACCES, f"Access denied: path must be under '{VIRTUAL_PATH_PREFIX}'", path)
|
||||||
|
|
||||||
|
resolved_path = self._resolve_path(path)
|
||||||
|
max_download_size = 100 * 1024 * 1024
|
||||||
|
try:
|
||||||
|
file_size = os.path.getsize(resolved_path)
|
||||||
|
if file_size > max_download_size:
|
||||||
|
raise OSError(errno.EFBIG, f"File exceeds maximum download size of {max_download_size} bytes", path)
|
||||||
|
# TOCTOU note: the file could grow between getsize() and read(); accepted
|
||||||
|
# tradeoff since this is a controlled sandbox environment.
|
||||||
|
with open(resolved_path, "rb") as f:
|
||||||
|
return f.read()
|
||||||
|
except OSError as e:
|
||||||
|
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
||||||
|
raise type(e)(e.errno, e.strerror, path) from None
|
||||||
|
|
||||||
def write_file(self, path: str, content: str, append: bool = False) -> None:
|
def write_file(self, path: str, content: str, append: bool = False) -> None:
|
||||||
resolved = self._resolve_path_with_mapping(path)
|
resolved = self._resolve_path_with_mapping(path)
|
||||||
resolved_path = resolved.path
|
resolved_path = resolved.path
|
||||||
|
|||||||
@@ -39,6 +39,25 @@ class Sandbox(ABC):
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def download_file(self, path: str) -> bytes:
|
||||||
|
"""Download the binary content of a file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: The absolute path of the file to download.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Raw file bytes.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
PermissionError: If path traversal is detected or the path is outside
|
||||||
|
the allowed virtual prefix.
|
||||||
|
OSError: If the file cannot be read or does not exist. Both local
|
||||||
|
and remote implementations must raise ``OSError`` so callers
|
||||||
|
have a single exception type to handle.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def list_dir(self, path: str, max_depth=2) -> list[str]:
|
def list_dir(self, path: str, max_depth=2) -> list[str]:
|
||||||
"""List the contents of a directory.
|
"""List the contents of a directory.
|
||||||
|
|||||||
@@ -233,3 +233,88 @@ class TestConcurrentFileWrites:
|
|||||||
thread.join()
|
thread.join()
|
||||||
|
|
||||||
assert storage["content"] in {"seed\nA\nB\n", "seed\nB\nA\n"}
|
assert storage["content"] in {"seed\nA\nB\n", "seed\nB\nA\n"}
|
||||||
|
|
||||||
|
|
||||||
|
class TestDownloadFile:
|
||||||
|
"""Tests for AioSandbox.download_file."""
|
||||||
|
|
||||||
|
def test_returns_concatenated_bytes(self, sandbox):
|
||||||
|
"""download_file should join chunks from the client iterator into bytes."""
|
||||||
|
sandbox._client.file.download_file = MagicMock(return_value=[b"hel", b"lo"])
|
||||||
|
|
||||||
|
result = sandbox.download_file("/mnt/user-data/outputs/file.bin")
|
||||||
|
|
||||||
|
assert result == b"hello"
|
||||||
|
sandbox._client.file.download_file.assert_called_once_with(path="/mnt/user-data/outputs/file.bin")
|
||||||
|
|
||||||
|
def test_returns_empty_bytes_for_empty_file(self, sandbox):
|
||||||
|
"""download_file should return b'' when the iterator yields nothing."""
|
||||||
|
sandbox._client.file.download_file = MagicMock(return_value=iter([]))
|
||||||
|
|
||||||
|
result = sandbox.download_file("/mnt/user-data/outputs/empty.bin")
|
||||||
|
|
||||||
|
assert result == b""
|
||||||
|
|
||||||
|
def test_uses_lock_during_download(self, sandbox):
|
||||||
|
"""download_file should hold the lock while calling the client."""
|
||||||
|
lock_was_held = []
|
||||||
|
|
||||||
|
def tracking_download(path):
|
||||||
|
lock_was_held.append(sandbox._lock.locked())
|
||||||
|
return iter([b"data"])
|
||||||
|
|
||||||
|
sandbox._client.file.download_file = tracking_download
|
||||||
|
|
||||||
|
sandbox.download_file("/mnt/user-data/outputs/file.bin")
|
||||||
|
|
||||||
|
assert lock_was_held == [True], "download_file must hold the lock during client call"
|
||||||
|
|
||||||
|
def test_raises_oserror_on_client_error(self, sandbox):
|
||||||
|
"""download_file should wrap client exceptions as OSError."""
|
||||||
|
sandbox._client.file.download_file = MagicMock(side_effect=RuntimeError("network error"))
|
||||||
|
|
||||||
|
with pytest.raises(OSError, match="network error"):
|
||||||
|
sandbox.download_file("/mnt/user-data/outputs/file.bin")
|
||||||
|
|
||||||
|
def test_preserves_oserror_from_client(self, sandbox):
|
||||||
|
"""OSError raised by the client should propagate without re-wrapping."""
|
||||||
|
sandbox._client.file.download_file = MagicMock(side_effect=OSError("disk error"))
|
||||||
|
|
||||||
|
with pytest.raises(OSError, match="disk error"):
|
||||||
|
sandbox.download_file("/mnt/user-data/outputs/file.bin")
|
||||||
|
|
||||||
|
def test_rejects_path_outside_virtual_prefix_and_logs_error(self, sandbox, caplog):
|
||||||
|
"""download_file must reject downloads outside /mnt/user-data and log the reason."""
|
||||||
|
sandbox._client.file.download_file = MagicMock()
|
||||||
|
|
||||||
|
with caplog.at_level("ERROR"):
|
||||||
|
with pytest.raises(PermissionError, match="must be under"):
|
||||||
|
sandbox.download_file("/etc/passwd")
|
||||||
|
|
||||||
|
assert "outside allowed directory" in caplog.text
|
||||||
|
sandbox._client.file.download_file.assert_not_called()
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"path",
|
||||||
|
[
|
||||||
|
"/mnt/workspace/../../etc/passwd",
|
||||||
|
"../secret",
|
||||||
|
"/a/b/../../../etc/shadow",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_rejects_path_traversal(self, sandbox, path):
|
||||||
|
"""download_file must reject paths containing '..' before calling the client."""
|
||||||
|
sandbox._client.file.download_file = MagicMock()
|
||||||
|
|
||||||
|
with pytest.raises(PermissionError, match="path traversal"):
|
||||||
|
sandbox.download_file(path)
|
||||||
|
|
||||||
|
sandbox._client.file.download_file.assert_not_called()
|
||||||
|
|
||||||
|
def test_single_chunk(self, sandbox):
|
||||||
|
"""download_file should work correctly with a single-chunk response."""
|
||||||
|
sandbox._client.file.download_file = MagicMock(return_value=[b"single-chunk"])
|
||||||
|
|
||||||
|
result = sandbox.download_file("/mnt/user-data/outputs/single.bin")
|
||||||
|
|
||||||
|
assert result == b"single-chunk"
|
||||||
|
|||||||
@@ -204,6 +204,26 @@ class TestSymlinkEscapes:
|
|||||||
|
|
||||||
assert exc_info.value.errno == errno.EACCES
|
assert exc_info.value.errno == errno.EACCES
|
||||||
|
|
||||||
|
def test_download_file_blocks_symlink_escape_from_mount(self, tmp_path):
|
||||||
|
mount_dir = tmp_path / "mount"
|
||||||
|
mount_dir.mkdir()
|
||||||
|
outside_dir = tmp_path / "outside"
|
||||||
|
outside_dir.mkdir()
|
||||||
|
(outside_dir / "secret.bin").write_bytes(b"\x00secret")
|
||||||
|
_symlink_to(outside_dir, mount_dir / "escape", target_is_directory=True)
|
||||||
|
|
||||||
|
sandbox = LocalSandbox(
|
||||||
|
"test",
|
||||||
|
[
|
||||||
|
PathMapping(container_path="/mnt/user-data", local_path=str(mount_dir), read_only=False),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(PermissionError) as exc_info:
|
||||||
|
sandbox.download_file("/mnt/user-data/escape/secret.bin")
|
||||||
|
|
||||||
|
assert exc_info.value.errno == errno.EACCES
|
||||||
|
|
||||||
def test_write_file_blocks_symlink_escape_from_mount(self, tmp_path):
|
def test_write_file_blocks_symlink_escape_from_mount(self, tmp_path):
|
||||||
mount_dir = tmp_path / "mount"
|
mount_dir = tmp_path / "mount"
|
||||||
mount_dir.mkdir()
|
mount_dir.mkdir()
|
||||||
@@ -334,6 +354,74 @@ class TestSymlinkEscapes:
|
|||||||
assert existing.read_bytes() == b"original"
|
assert existing.read_bytes() == b"original"
|
||||||
|
|
||||||
|
|
||||||
|
class TestDownloadFileMappings:
|
||||||
|
"""download_file must use _resolve_path_with_mapping so path resolution, symlink
|
||||||
|
containment, and read-only awareness are consistent with read_file."""
|
||||||
|
|
||||||
|
def test_resolves_container_path_via_mapping(self, tmp_path):
|
||||||
|
"""download_file should resolve container paths through path mappings."""
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
data_dir.mkdir()
|
||||||
|
(data_dir / "asset.bin").write_bytes(b"\x01\x02\x03")
|
||||||
|
|
||||||
|
sandbox = LocalSandbox(
|
||||||
|
"test",
|
||||||
|
[PathMapping(container_path="/mnt/user-data", local_path=str(data_dir))],
|
||||||
|
)
|
||||||
|
|
||||||
|
result = sandbox.download_file("/mnt/user-data/asset.bin")
|
||||||
|
|
||||||
|
assert result == b"\x01\x02\x03"
|
||||||
|
|
||||||
|
def test_raises_oserror_with_original_path_when_missing(self, tmp_path):
|
||||||
|
"""OSError filename should show the container path, not the resolved host path."""
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
data_dir.mkdir()
|
||||||
|
|
||||||
|
sandbox = LocalSandbox(
|
||||||
|
"test",
|
||||||
|
[PathMapping(container_path="/mnt/user-data", local_path=str(data_dir))],
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(OSError) as exc_info:
|
||||||
|
sandbox.download_file("/mnt/user-data/missing.bin")
|
||||||
|
|
||||||
|
assert exc_info.value.filename == "/mnt/user-data/missing.bin"
|
||||||
|
|
||||||
|
def test_rejects_path_outside_virtual_prefix_and_logs_error(self, tmp_path, caplog):
|
||||||
|
"""download_file must reject paths outside /mnt/user-data and log the reason."""
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
data_dir.mkdir()
|
||||||
|
(data_dir / "model.bin").write_bytes(b"weights")
|
||||||
|
|
||||||
|
sandbox = LocalSandbox(
|
||||||
|
"test",
|
||||||
|
[PathMapping(container_path="/mnt/user-data", local_path=str(data_dir), read_only=True)],
|
||||||
|
)
|
||||||
|
|
||||||
|
with caplog.at_level("ERROR"):
|
||||||
|
with pytest.raises(PermissionError) as exc_info:
|
||||||
|
sandbox.download_file("/mnt/skills/model.bin")
|
||||||
|
|
||||||
|
assert exc_info.value.errno == errno.EACCES
|
||||||
|
assert "outside allowed directory" in caplog.text
|
||||||
|
|
||||||
|
def test_readable_from_read_only_mount(self, tmp_path):
|
||||||
|
"""Read-only mounts must not block download_file — read-only only restricts writes."""
|
||||||
|
skills_dir = tmp_path / "skills"
|
||||||
|
skills_dir.mkdir()
|
||||||
|
(skills_dir / "model.bin").write_bytes(b"weights")
|
||||||
|
|
||||||
|
sandbox = LocalSandbox(
|
||||||
|
"test",
|
||||||
|
[PathMapping(container_path="/mnt/user-data", local_path=str(skills_dir), read_only=True)],
|
||||||
|
)
|
||||||
|
|
||||||
|
result = sandbox.download_file("/mnt/user-data/model.bin")
|
||||||
|
|
||||||
|
assert result == b"weights"
|
||||||
|
|
||||||
|
|
||||||
class TestMultipleMounts:
|
class TestMultipleMounts:
|
||||||
def test_multiple_read_write_mounts(self, tmp_path):
|
def test_multiple_read_write_mounts(self, tmp_path):
|
||||||
skills_dir = tmp_path / "skills"
|
skills_dir = tmp_path / "skills"
|
||||||
|
|||||||
Reference in New Issue
Block a user