mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-18 05:25:57 +00:00
fix(sandbox): stop flagging string-literal path fragments as unsafe absolute paths (#3623)
* fix(sandbox): stop flagging string-literal path fragments as unsafe paths
The host-bash absolute-path guard scans the raw command string, so /segment
sequences inside string literals, f-strings, and templates were treated as
absolute path arguments and rejected — e.g. python -c "print(f'/端口{port}')"
or a REST template /devices/{id}/port. Whether a fragment tripped the guard
depended on the character right before the slash (a word char suppressed the
match), so the same literal could pass or fail unpredictably, pushing the model
into retry loops that bloat context and wall-clock time.
Exempt matches carrying non-ASCII characters or format braces: real host paths
a command would open contain neither, so these are text, not paths. The guard
is best-effort (not a security boundary), and plain ASCII host paths like
/etc/passwd — including ones written inside a code string such as
open('/etc/passwd') — stay rejected.
* fix(sandbox): only exempt identifier-template braces, not bash brace expansion
The literal-fragment exemption exempted any path fragment containing { or },
which let bash brace expansion (cat /etc/{passwd,shadow}) and ${VAR} expansion
reconstitute real host paths past validate_local_bash_command_paths. Tighten
the brace branch to only exempt fragments where every {...} block is a single
identifier-like placeholder (/devices/{id}/port, f-string /{port}); reject
${VAR} shell-variable expansion. Add parametrized regression tests for the
brace-expansion and shell-var bypasses.
This commit is contained in:
@@ -24,6 +24,11 @@ from deerflow.sandbox.security import LOCAL_HOST_BASH_DISABLED_MESSAGE, is_host_
|
||||
from deerflow.tools.types import Runtime
|
||||
|
||||
_ABSOLUTE_PATH_PATTERN = re.compile(r"(?<![:\w])(?<!:/)/(?:[^\s\"'`;&|<>()]+)")
|
||||
# A ``{...}`` block holding a single identifier-like placeholder (e.g. ``{id}``
|
||||
# in a REST template or ``{port}`` in an f-string). Bash brace expansion such as
|
||||
# ``{passwd,shadow}`` or ``{,.bak}`` does NOT match (commas/dots/empty inner).
|
||||
_IDENTIFIER_BRACE_BLOCK_PATTERN = re.compile(r"\{([^{}]*)\}")
|
||||
_IDENTIFIER_PATTERN = re.compile(r"[A-Za-z_][A-Za-z0-9_]*")
|
||||
_FILE_URL_PATTERN = re.compile(r"\bfile://\S+", re.IGNORECASE)
|
||||
_URL_WITH_SCHEME_PATTERN = re.compile(r"^[a-z][a-z0-9+.-]*://", re.IGNORECASE)
|
||||
_URL_IN_COMMAND_PATTERN = re.compile(r"\b[a-z][a-z0-9+.-]*://[^\s\"'`;&|<>()]+", re.IGNORECASE)
|
||||
@@ -938,6 +943,54 @@ def resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState)
|
||||
return _resolve_and_validate_user_data_path(path, thread_data)
|
||||
|
||||
|
||||
def _braces_are_identifier_placeholders_only(fragment: str) -> bool:
|
||||
"""Return True only if every ``{...}`` block is a single identifier placeholder.
|
||||
|
||||
Identifier-only blocks (``{id}``, ``{port}``) come from REST templates and
|
||||
f-strings and are text. Bash brace expansion (``{passwd,shadow}``, ``{,.bak}``,
|
||||
``{etc,var}``) reconstitutes real host paths at runtime, so it must NOT be
|
||||
exempted. Stray, empty, or nested braces are rejected too (each ``{``/``}``
|
||||
must belong to one balanced single-placeholder block).
|
||||
|
||||
``${VAR}`` shell variable expansion (e.g. ``/home/${USER}/.ssh/id_rsa``) also
|
||||
expands to a real host path at runtime, so a ``${`` anywhere disqualifies the
|
||||
fragment even though the inner name is identifier-shaped.
|
||||
"""
|
||||
if "${" in fragment:
|
||||
return False
|
||||
blocks = _IDENTIFIER_BRACE_BLOCK_PATTERN.findall(fragment)
|
||||
# Every brace must be part of a balanced ``{...}`` block (no stray/nested braces).
|
||||
if fragment.count("{") != len(blocks) or fragment.count("}") != len(blocks):
|
||||
return False
|
||||
return all(_IDENTIFIER_PATTERN.fullmatch(inner) for inner in blocks)
|
||||
|
||||
|
||||
def _is_non_path_literal_fragment(fragment: str) -> bool:
|
||||
"""Return True if a ``/segment`` match is almost certainly text, not a path.
|
||||
|
||||
The absolute-path scan runs over the raw command string, so it also matches
|
||||
``/segment`` sequences sitting inside string literals, f-strings, and
|
||||
templates (e.g. ``python -c "print(f'/端口{port}')"`` or a REST template
|
||||
like ``/devices/{id}/port``). Non-ASCII characters and single identifier-like
|
||||
``{placeholder}`` braces do not appear in real host filesystem paths a command
|
||||
would open, so treating such fragments as text removes those false positives.
|
||||
|
||||
Bash brace expansion (``cat /etc/{passwd,shadow}``) is deliberately NOT
|
||||
exempted: it expands to plain host paths at runtime, so only braces that are
|
||||
single identifier placeholders are treated as text (see
|
||||
:func:`_braces_are_identifier_placeholders_only`).
|
||||
|
||||
This guard is best-effort, not a security boundary (see
|
||||
:func:`validate_local_bash_command_paths`): plain ASCII host paths such as
|
||||
``/etc/passwd`` contain none of these markers and are still rejected.
|
||||
"""
|
||||
if any(ord(ch) > 127 for ch in fragment):
|
||||
return True
|
||||
if "{" in fragment or "}" in fragment:
|
||||
return _braces_are_identifier_placeholders_only(fragment)
|
||||
return False
|
||||
|
||||
|
||||
def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState | None) -> None:
|
||||
"""Validate absolute paths in local-sandbox bash commands.
|
||||
|
||||
@@ -970,6 +1023,8 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
|
||||
if _is_in_spans(match.start(), url_spans):
|
||||
continue
|
||||
absolute_path = match.group()
|
||||
if _is_non_path_literal_fragment(absolute_path):
|
||||
continue
|
||||
if _is_allowed_local_bash_absolute_path(absolute_path, allowed_paths, allow_system_paths=True):
|
||||
continue
|
||||
|
||||
|
||||
@@ -445,6 +445,54 @@ def test_validate_local_bash_command_paths_allows_http_url_dotdot_segments() ->
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"command",
|
||||
[
|
||||
# f-string / string-literal fragments with CJK text or template braces are
|
||||
# NOT path arguments and must not be flagged as unsafe absolute paths.
|
||||
"python3 -c \"print(f'/端口{port}')\"",
|
||||
"echo '健康检查 /端口 状态'",
|
||||
"python3 -c \"x = f'/{port}'\"",
|
||||
"python3 -c \"print('/devices/{id}/port')\"",
|
||||
],
|
||||
)
|
||||
def test_validate_local_bash_command_paths_allows_non_path_string_literals(command: str) -> None:
|
||||
validate_local_bash_command_paths(command, _THREAD_DATA)
|
||||
|
||||
|
||||
def test_validate_local_bash_command_paths_still_blocks_ascii_host_path_in_code() -> None:
|
||||
"""The literal exemption is shape-based (non-ASCII / identifier-template
|
||||
braces); a plain ASCII host path stays blocked even when written inside a
|
||||
code string, so the guard keeps nudging the model toward virtual paths."""
|
||||
with pytest.raises(PermissionError, match="Unsafe absolute paths"):
|
||||
validate_local_bash_command_paths("python3 -c \"open('/etc/passwd').read()\"", _THREAD_DATA)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"command",
|
||||
[
|
||||
# Bash brace expansion reconstitutes plain host paths at runtime
|
||||
# (`cat /etc/{passwd,shadow}` -> `cat /etc/passwd /etc/shadow`), so the
|
||||
# brace exemption must NOT fire on these — only single identifier-like
|
||||
# template placeholders such as `/devices/{id}/port` are text.
|
||||
"cat /etc/{passwd,shadow}",
|
||||
"cat /etc/passwd{,.bak}",
|
||||
"cat /{etc,var}/passwd",
|
||||
'bash -c "cat /etc/{passwd,shadow}"',
|
||||
# ``${VAR}`` shell variable expansion is the same bypass class: bash
|
||||
# substitutes a real host path at runtime even though `USER` is
|
||||
# identifier-shaped, so it must stay blocked too.
|
||||
"cat /home/${USER}/.ssh/id_rsa",
|
||||
],
|
||||
)
|
||||
def test_validate_local_bash_command_paths_blocks_brace_expansion_host_paths(command: str) -> None:
|
||||
"""Regression for the brace-expansion bypass: a `{...}` block that is not a
|
||||
single identifier placeholder (commas, dots, leading separators) must keep
|
||||
the host path blocked rather than be exempted as a literal."""
|
||||
with pytest.raises(PermissionError, match="Unsafe absolute paths"):
|
||||
validate_local_bash_command_paths(command, _THREAD_DATA)
|
||||
|
||||
|
||||
def test_bash_tool_rejects_host_bash_when_local_sandbox_default(monkeypatch) -> None:
|
||||
runtime = SimpleNamespace(
|
||||
state={"sandbox": {"sandbox_id": "local"}, "thread_data": _THREAD_DATA.copy()},
|
||||
|
||||
Reference in New Issue
Block a user