mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-23 08:25:57 +00:00
fix(sandbox): resolve paths in read_file/write_file content for LocalSandbox (#1935)
* fix(sandbox): resolve paths in read_file/write_file content for LocalSandbox In LocalSandbox mode, read_file and write_file now transform container paths in file content, matching the path handling behavior of bash tool. - write_file: resolves virtual paths in content to system paths before writing, so scripts with /mnt/user-data paths work when executed - read_file: reverse-resolves system paths back to virtual paths in returned content for consistency This fixes scenarios where agents write Python scripts with virtual paths, then execute them via bash tool expecting the paths to work. Fixes #1778 * fix(sandbox): address Copilot review — dedicated content resolver + forward-slash safety + tests - Extract _resolve_paths_in_content() separate from _resolve_paths_in_command() to decouple file-content path resolution from shell-command parsing - Normalize resolved paths to forward slashes to avoid Windows backslash escape issues in source files (e.g. \U in Python string literals) - Add 4 focused tests: write resolves content, forward-slash guarantee, read reverse-resolves content, and write→read roundtrip * style: fix ruff lint — remove extraneous f-string prefix * fix(sandbox): only reverse-resolve paths in agent-written files read_file previously applied _reverse_resolve_paths_in_output to ALL file content, which could silently rewrite paths in user uploads and external tool output (Willem Jiang review on #1935). Now tracks files written through write_file in _agent_written_paths. Only those files get reverse-resolved on read. Non-agent files are returned as-is. --------- Co-authored-by: JasonOA888 <JasonOA888@users.noreply.github.com>
This commit is contained in:
@@ -62,6 +62,9 @@ class LocalSandbox(Sandbox):
|
||||
"""
|
||||
super().__init__(id)
|
||||
self.path_mappings = path_mappings or []
|
||||
# Track files written through write_file so read_file only
|
||||
# reverse-resolves paths in agent-authored content.
|
||||
self._agent_written_paths: set[str] = set()
|
||||
|
||||
def _is_read_only_path(self, resolved_path: str) -> bool:
|
||||
"""Check if a resolved path is under a read-only mount.
|
||||
@@ -205,6 +208,39 @@ class LocalSandbox(Sandbox):
|
||||
|
||||
return pattern.sub(replace_match, command)
|
||||
|
||||
def _resolve_paths_in_content(self, content: str) -> str:
|
||||
"""Resolve container paths to local paths in arbitrary file content.
|
||||
|
||||
Unlike ``_resolve_paths_in_command`` which uses shell-aware boundary
|
||||
characters, this method treats the content as plain text and resolves
|
||||
every occurrence of a container path prefix. Resolved paths are
|
||||
normalized to forward slashes to avoid backslash-escape issues on
|
||||
Windows hosts (e.g. ``C:\\Users\\..`` breaking Python string literals).
|
||||
|
||||
Args:
|
||||
content: File content that may contain container paths.
|
||||
|
||||
Returns:
|
||||
Content with container paths resolved to local paths (forward slashes).
|
||||
"""
|
||||
import re
|
||||
|
||||
sorted_mappings = sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True)
|
||||
if not sorted_mappings:
|
||||
return content
|
||||
|
||||
patterns = [re.escape(m.container_path) + r"(?=/|$|[^\w./-])(?:/[^\s\"';&|<>()]*)?" for m in sorted_mappings]
|
||||
pattern = re.compile("|".join(f"({p})" for p in patterns))
|
||||
|
||||
def replace_match(match: re.Match) -> str:
|
||||
matched_path = match.group(0)
|
||||
resolved = self._resolve_path(matched_path)
|
||||
# Normalize to forward slashes so that Windows backslash paths
|
||||
# don't create invalid escape sequences in source files.
|
||||
return resolved.replace("\\", "/")
|
||||
|
||||
return pattern.sub(replace_match, content)
|
||||
|
||||
@staticmethod
|
||||
def _get_shell() -> str:
|
||||
"""Detect available shell executable with fallback."""
|
||||
@@ -280,7 +316,14 @@ class LocalSandbox(Sandbox):
|
||||
resolved_path = self._resolve_path(path)
|
||||
try:
|
||||
with open(resolved_path, encoding="utf-8") as f:
|
||||
return f.read()
|
||||
content = f.read()
|
||||
# Only reverse-resolve paths in files that were previously written
|
||||
# by write_file (agent-authored content). User-uploaded files,
|
||||
# external tool output, and other non-agent content should not be
|
||||
# silently rewritten — see discussion on PR #1935.
|
||||
if resolved_path in self._agent_written_paths:
|
||||
content = self._reverse_resolve_paths_in_output(content)
|
||||
return content
|
||||
except OSError as e:
|
||||
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
||||
raise type(e)(e.errno, e.strerror, path) from None
|
||||
@@ -293,9 +336,16 @@ class LocalSandbox(Sandbox):
|
||||
dir_path = os.path.dirname(resolved_path)
|
||||
if dir_path:
|
||||
os.makedirs(dir_path, exist_ok=True)
|
||||
# Resolve container paths in content to local paths
|
||||
# using the content-specific resolver (forward-slash safe)
|
||||
resolved_content = self._resolve_paths_in_content(content)
|
||||
mode = "a" if append else "w"
|
||||
with open(resolved_path, mode, encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
f.write(resolved_content)
|
||||
# Track this path so read_file knows to reverse-resolve on read.
|
||||
# Only agent-written files get reverse-resolved; user uploads and
|
||||
# external tool output are left untouched.
|
||||
self._agent_written_paths.add(resolved_path)
|
||||
except OSError as e:
|
||||
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
||||
raise type(e)(e.errno, e.strerror, path) from None
|
||||
|
||||
Reference in New Issue
Block a user