From 97dd9ecf73f79a7aa45948dd0fe1e53ed46f386c Mon Sep 17 00:00:00 2001 From: Xinmin Zeng <135568692+fancyboi999@users.noreply.github.com> Date: Thu, 18 Jun 2026 09:27:05 +0800 Subject: [PATCH] fix(sandbox): stop flagging string-literal path fragments as unsafe absolute paths (#3623) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(sandbox): stop flagging string-literal path fragments as unsafe paths The host-bash absolute-path guard scans the raw command string, so /segment sequences inside string literals, f-strings, and templates were treated as absolute path arguments and rejected — e.g. python -c "print(f'/端口{port}')" or a REST template /devices/{id}/port. Whether a fragment tripped the guard depended on the character right before the slash (a word char suppressed the match), so the same literal could pass or fail unpredictably, pushing the model into retry loops that bloat context and wall-clock time. Exempt matches carrying non-ASCII characters or format braces: real host paths a command would open contain neither, so these are text, not paths. The guard is best-effort (not a security boundary), and plain ASCII host paths like /etc/passwd — including ones written inside a code string such as open('/etc/passwd') — stay rejected. * fix(sandbox): only exempt identifier-template braces, not bash brace expansion The literal-fragment exemption exempted any path fragment containing { or }, which let bash brace expansion (cat /etc/{passwd,shadow}) and ${VAR} expansion reconstitute real host paths past validate_local_bash_command_paths. Tighten the brace branch to only exempt fragments where every {...} block is a single identifier-like placeholder (/devices/{id}/port, f-string /{port}); reject ${VAR} shell-variable expansion. Add parametrized regression tests for the brace-expansion and shell-var bypasses. --- .../harness/deerflow/sandbox/tools.py | 55 +++++++++++++++++++ backend/tests/test_sandbox_tools_security.py | 48 ++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/backend/packages/harness/deerflow/sandbox/tools.py b/backend/packages/harness/deerflow/sandbox/tools.py index c9bec1569..76286ef44 100644 --- a/backend/packages/harness/deerflow/sandbox/tools.py +++ b/backend/packages/harness/deerflow/sandbox/tools.py @@ -24,6 +24,11 @@ from deerflow.sandbox.security import LOCAL_HOST_BASH_DISABLED_MESSAGE, is_host_ from deerflow.tools.types import Runtime _ABSOLUTE_PATH_PATTERN = re.compile(r"(?()]+)") +# A ``{...}`` block holding a single identifier-like placeholder (e.g. ``{id}`` +# in a REST template or ``{port}`` in an f-string). Bash brace expansion such as +# ``{passwd,shadow}`` or ``{,.bak}`` does NOT match (commas/dots/empty inner). +_IDENTIFIER_BRACE_BLOCK_PATTERN = re.compile(r"\{([^{}]*)\}") +_IDENTIFIER_PATTERN = re.compile(r"[A-Za-z_][A-Za-z0-9_]*") _FILE_URL_PATTERN = re.compile(r"\bfile://\S+", re.IGNORECASE) _URL_WITH_SCHEME_PATTERN = re.compile(r"^[a-z][a-z0-9+.-]*://", re.IGNORECASE) _URL_IN_COMMAND_PATTERN = re.compile(r"\b[a-z][a-z0-9+.-]*://[^\s\"'`;&|<>()]+", re.IGNORECASE) @@ -938,6 +943,54 @@ def resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState) return _resolve_and_validate_user_data_path(path, thread_data) +def _braces_are_identifier_placeholders_only(fragment: str) -> bool: + """Return True only if every ``{...}`` block is a single identifier placeholder. + + Identifier-only blocks (``{id}``, ``{port}``) come from REST templates and + f-strings and are text. Bash brace expansion (``{passwd,shadow}``, ``{,.bak}``, + ``{etc,var}``) reconstitutes real host paths at runtime, so it must NOT be + exempted. Stray, empty, or nested braces are rejected too (each ``{``/``}`` + must belong to one balanced single-placeholder block). + + ``${VAR}`` shell variable expansion (e.g. ``/home/${USER}/.ssh/id_rsa``) also + expands to a real host path at runtime, so a ``${`` anywhere disqualifies the + fragment even though the inner name is identifier-shaped. + """ + if "${" in fragment: + return False + blocks = _IDENTIFIER_BRACE_BLOCK_PATTERN.findall(fragment) + # Every brace must be part of a balanced ``{...}`` block (no stray/nested braces). + if fragment.count("{") != len(blocks) or fragment.count("}") != len(blocks): + return False + return all(_IDENTIFIER_PATTERN.fullmatch(inner) for inner in blocks) + + +def _is_non_path_literal_fragment(fragment: str) -> bool: + """Return True if a ``/segment`` match is almost certainly text, not a path. + + The absolute-path scan runs over the raw command string, so it also matches + ``/segment`` sequences sitting inside string literals, f-strings, and + templates (e.g. ``python -c "print(f'/端口{port}')"`` or a REST template + like ``/devices/{id}/port``). Non-ASCII characters and single identifier-like + ``{placeholder}`` braces do not appear in real host filesystem paths a command + would open, so treating such fragments as text removes those false positives. + + Bash brace expansion (``cat /etc/{passwd,shadow}``) is deliberately NOT + exempted: it expands to plain host paths at runtime, so only braces that are + single identifier placeholders are treated as text (see + :func:`_braces_are_identifier_placeholders_only`). + + This guard is best-effort, not a security boundary (see + :func:`validate_local_bash_command_paths`): plain ASCII host paths such as + ``/etc/passwd`` contain none of these markers and are still rejected. + """ + if any(ord(ch) > 127 for ch in fragment): + return True + if "{" in fragment or "}" in fragment: + return _braces_are_identifier_placeholders_only(fragment) + return False + + def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState | None) -> None: """Validate absolute paths in local-sandbox bash commands. @@ -970,6 +1023,8 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState if _is_in_spans(match.start(), url_spans): continue absolute_path = match.group() + if _is_non_path_literal_fragment(absolute_path): + continue if _is_allowed_local_bash_absolute_path(absolute_path, allowed_paths, allow_system_paths=True): continue diff --git a/backend/tests/test_sandbox_tools_security.py b/backend/tests/test_sandbox_tools_security.py index d43a1fcf0..084411114 100644 --- a/backend/tests/test_sandbox_tools_security.py +++ b/backend/tests/test_sandbox_tools_security.py @@ -445,6 +445,54 @@ def test_validate_local_bash_command_paths_allows_http_url_dotdot_segments() -> ) +@pytest.mark.parametrize( + "command", + [ + # f-string / string-literal fragments with CJK text or template braces are + # NOT path arguments and must not be flagged as unsafe absolute paths. + "python3 -c \"print(f'/端口{port}')\"", + "echo '健康检查 /端口 状态'", + "python3 -c \"x = f'/{port}'\"", + "python3 -c \"print('/devices/{id}/port')\"", + ], +) +def test_validate_local_bash_command_paths_allows_non_path_string_literals(command: str) -> None: + validate_local_bash_command_paths(command, _THREAD_DATA) + + +def test_validate_local_bash_command_paths_still_blocks_ascii_host_path_in_code() -> None: + """The literal exemption is shape-based (non-ASCII / identifier-template + braces); a plain ASCII host path stays blocked even when written inside a + code string, so the guard keeps nudging the model toward virtual paths.""" + with pytest.raises(PermissionError, match="Unsafe absolute paths"): + validate_local_bash_command_paths("python3 -c \"open('/etc/passwd').read()\"", _THREAD_DATA) + + +@pytest.mark.parametrize( + "command", + [ + # Bash brace expansion reconstitutes plain host paths at runtime + # (`cat /etc/{passwd,shadow}` -> `cat /etc/passwd /etc/shadow`), so the + # brace exemption must NOT fire on these — only single identifier-like + # template placeholders such as `/devices/{id}/port` are text. + "cat /etc/{passwd,shadow}", + "cat /etc/passwd{,.bak}", + "cat /{etc,var}/passwd", + 'bash -c "cat /etc/{passwd,shadow}"', + # ``${VAR}`` shell variable expansion is the same bypass class: bash + # substitutes a real host path at runtime even though `USER` is + # identifier-shaped, so it must stay blocked too. + "cat /home/${USER}/.ssh/id_rsa", + ], +) +def test_validate_local_bash_command_paths_blocks_brace_expansion_host_paths(command: str) -> None: + """Regression for the brace-expansion bypass: a `{...}` block that is not a + single identifier placeholder (commas, dots, leading separators) must keep + the host path blocked rather than be exempted as a literal.""" + with pytest.raises(PermissionError, match="Unsafe absolute paths"): + validate_local_bash_command_paths(command, _THREAD_DATA) + + def test_bash_tool_rejects_host_bash_when_local_sandbox_default(monkeypatch) -> None: runtime = SimpleNamespace( state={"sandbox": {"sandbox_id": "local"}, "thread_data": _THREAD_DATA.copy()},