mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-22 16:06:50 +00:00
feat(sandbox) Adds download file interface in Sandbox (#3038)
* Add download interface in Sandbox * fix * fix * del invalidate test * fix * safe download * improve
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import base64
|
||||
import errno
|
||||
import logging
|
||||
import shlex
|
||||
import threading
|
||||
@@ -6,11 +7,14 @@ import uuid
|
||||
|
||||
from agent_sandbox import Sandbox as AioSandboxClient
|
||||
|
||||
from deerflow.config.paths import VIRTUAL_PATH_PREFIX
|
||||
from deerflow.sandbox.sandbox import Sandbox
|
||||
from deerflow.sandbox.search import GrepMatch, path_matches, should_ignore_path, truncate_line
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_MAX_DOWNLOAD_SIZE = 100 * 1024 * 1024 # 100 MB
|
||||
|
||||
_ERROR_OBSERVATION_SIGNATURE = "'ErrorObservation' object has no attribute 'exit_code'"
|
||||
|
||||
|
||||
@@ -102,6 +106,49 @@ class AioSandbox(Sandbox):
|
||||
logger.error(f"Failed to read file in sandbox: {e}")
|
||||
return f"Error: {e}"
|
||||
|
||||
def download_file(self, path: str) -> bytes:
|
||||
"""Download file bytes from the sandbox.
|
||||
|
||||
Raises:
|
||||
PermissionError: If the path contains '..' traversal segments or is
|
||||
outside ``VIRTUAL_PATH_PREFIX``.
|
||||
OSError: If the file cannot be retrieved from the sandbox.
|
||||
"""
|
||||
# Reject path traversal before sending to the container API.
|
||||
# LocalSandbox gets this implicitly via _resolve_path;
|
||||
# here the path is forwarded verbatim so we must check explicitly.
|
||||
normalised = path.replace("\\", "/")
|
||||
for segment in normalised.split("/"):
|
||||
if segment == "..":
|
||||
logger.error(f"Refused download due to path traversal: {path}")
|
||||
raise PermissionError(f"Access denied: path traversal detected in '{path}'")
|
||||
|
||||
stripped_path = normalised.lstrip("/")
|
||||
allowed_prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
|
||||
if stripped_path != allowed_prefix and not stripped_path.startswith(f"{allowed_prefix}/"):
|
||||
logger.error("Refused download outside allowed directory: path=%s, allowed_prefix=%s", path, VIRTUAL_PATH_PREFIX)
|
||||
raise PermissionError(f"Access denied: path must be under '{VIRTUAL_PATH_PREFIX}': '{path}'")
|
||||
|
||||
with self._lock:
|
||||
try:
|
||||
chunks: list[bytes] = []
|
||||
total = 0
|
||||
for chunk in self._client.file.download_file(path=path):
|
||||
total += len(chunk)
|
||||
if total > _MAX_DOWNLOAD_SIZE:
|
||||
raise OSError(
|
||||
errno.EFBIG,
|
||||
f"File exceeds maximum download size of {_MAX_DOWNLOAD_SIZE} bytes",
|
||||
path,
|
||||
)
|
||||
chunks.append(chunk)
|
||||
return b"".join(chunks)
|
||||
except OSError:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to download file in sandbox: {e}")
|
||||
raise OSError(f"Failed to download file '{path}' from sandbox: {e}") from e
|
||||
|
||||
def list_dir(self, path: str, max_depth: int = 2) -> list[str]:
|
||||
"""List the contents of a directory in the sandbox.
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import errno
|
||||
import logging
|
||||
import ntpath
|
||||
import os
|
||||
import shutil
|
||||
@@ -7,10 +8,13 @@ from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple
|
||||
|
||||
from deerflow.config.paths import VIRTUAL_PATH_PREFIX
|
||||
from deerflow.sandbox.local.list_dir import list_dir
|
||||
from deerflow.sandbox.sandbox import Sandbox
|
||||
from deerflow.sandbox.search import GrepMatch, find_glob_matches, find_grep_matches
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PathMapping:
|
||||
@@ -379,6 +383,28 @@ class LocalSandbox(Sandbox):
|
||||
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
||||
raise type(e)(e.errno, e.strerror, path) from None
|
||||
|
||||
def download_file(self, path: str) -> bytes:
|
||||
normalised = path.replace("\\", "/")
|
||||
stripped_path = normalised.lstrip("/")
|
||||
allowed_prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
|
||||
if stripped_path != allowed_prefix and not stripped_path.startswith(f"{allowed_prefix}/"):
|
||||
logger.error("Refused download outside allowed directory: path=%s, allowed_prefix=%s", path, VIRTUAL_PATH_PREFIX)
|
||||
raise PermissionError(errno.EACCES, f"Access denied: path must be under '{VIRTUAL_PATH_PREFIX}'", path)
|
||||
|
||||
resolved_path = self._resolve_path(path)
|
||||
max_download_size = 100 * 1024 * 1024
|
||||
try:
|
||||
file_size = os.path.getsize(resolved_path)
|
||||
if file_size > max_download_size:
|
||||
raise OSError(errno.EFBIG, f"File exceeds maximum download size of {max_download_size} bytes", path)
|
||||
# TOCTOU note: the file could grow between getsize() and read(); accepted
|
||||
# tradeoff since this is a controlled sandbox environment.
|
||||
with open(resolved_path, "rb") as f:
|
||||
return f.read()
|
||||
except OSError as e:
|
||||
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
||||
raise type(e)(e.errno, e.strerror, path) from None
|
||||
|
||||
def write_file(self, path: str, content: str, append: bool = False) -> None:
|
||||
resolved = self._resolve_path_with_mapping(path)
|
||||
resolved_path = resolved.path
|
||||
|
||||
@@ -39,6 +39,25 @@ class Sandbox(ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def download_file(self, path: str) -> bytes:
|
||||
"""Download the binary content of a file.
|
||||
|
||||
Args:
|
||||
path: The absolute path of the file to download.
|
||||
|
||||
Returns:
|
||||
Raw file bytes.
|
||||
|
||||
Raises:
|
||||
PermissionError: If path traversal is detected or the path is outside
|
||||
the allowed virtual prefix.
|
||||
OSError: If the file cannot be read or does not exist. Both local
|
||||
and remote implementations must raise ``OSError`` so callers
|
||||
have a single exception type to handle.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def list_dir(self, path: str, max_depth=2) -> list[str]:
|
||||
"""List the contents of a directory.
|
||||
|
||||
Reference in New Issue
Block a user