diff --git a/backend/packages/harness/deerflow/sandbox/tools.py b/backend/packages/harness/deerflow/sandbox/tools.py index c9bec1569..76286ef44 100644 --- a/backend/packages/harness/deerflow/sandbox/tools.py +++ b/backend/packages/harness/deerflow/sandbox/tools.py @@ -24,6 +24,11 @@ from deerflow.sandbox.security import LOCAL_HOST_BASH_DISABLED_MESSAGE, is_host_ from deerflow.tools.types import Runtime _ABSOLUTE_PATH_PATTERN = re.compile(r"(?()]+)") +# A ``{...}`` block holding a single identifier-like placeholder (e.g. ``{id}`` +# in a REST template or ``{port}`` in an f-string). Bash brace expansion such as +# ``{passwd,shadow}`` or ``{,.bak}`` does NOT match (commas/dots/empty inner). +_IDENTIFIER_BRACE_BLOCK_PATTERN = re.compile(r"\{([^{}]*)\}") +_IDENTIFIER_PATTERN = re.compile(r"[A-Za-z_][A-Za-z0-9_]*") _FILE_URL_PATTERN = re.compile(r"\bfile://\S+", re.IGNORECASE) _URL_WITH_SCHEME_PATTERN = re.compile(r"^[a-z][a-z0-9+.-]*://", re.IGNORECASE) _URL_IN_COMMAND_PATTERN = re.compile(r"\b[a-z][a-z0-9+.-]*://[^\s\"'`;&|<>()]+", re.IGNORECASE) @@ -938,6 +943,54 @@ def resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState) return _resolve_and_validate_user_data_path(path, thread_data) +def _braces_are_identifier_placeholders_only(fragment: str) -> bool: + """Return True only if every ``{...}`` block is a single identifier placeholder. + + Identifier-only blocks (``{id}``, ``{port}``) come from REST templates and + f-strings and are text. Bash brace expansion (``{passwd,shadow}``, ``{,.bak}``, + ``{etc,var}``) reconstitutes real host paths at runtime, so it must NOT be + exempted. Stray, empty, or nested braces are rejected too (each ``{``/``}`` + must belong to one balanced single-placeholder block). + + ``${VAR}`` shell variable expansion (e.g. ``/home/${USER}/.ssh/id_rsa``) also + expands to a real host path at runtime, so a ``${`` anywhere disqualifies the + fragment even though the inner name is identifier-shaped. + """ + if "${" in fragment: + return False + blocks = _IDENTIFIER_BRACE_BLOCK_PATTERN.findall(fragment) + # Every brace must be part of a balanced ``{...}`` block (no stray/nested braces). + if fragment.count("{") != len(blocks) or fragment.count("}") != len(blocks): + return False + return all(_IDENTIFIER_PATTERN.fullmatch(inner) for inner in blocks) + + +def _is_non_path_literal_fragment(fragment: str) -> bool: + """Return True if a ``/segment`` match is almost certainly text, not a path. + + The absolute-path scan runs over the raw command string, so it also matches + ``/segment`` sequences sitting inside string literals, f-strings, and + templates (e.g. ``python -c "print(f'/端口{port}')"`` or a REST template + like ``/devices/{id}/port``). Non-ASCII characters and single identifier-like + ``{placeholder}`` braces do not appear in real host filesystem paths a command + would open, so treating such fragments as text removes those false positives. + + Bash brace expansion (``cat /etc/{passwd,shadow}``) is deliberately NOT + exempted: it expands to plain host paths at runtime, so only braces that are + single identifier placeholders are treated as text (see + :func:`_braces_are_identifier_placeholders_only`). + + This guard is best-effort, not a security boundary (see + :func:`validate_local_bash_command_paths`): plain ASCII host paths such as + ``/etc/passwd`` contain none of these markers and are still rejected. + """ + if any(ord(ch) > 127 for ch in fragment): + return True + if "{" in fragment or "}" in fragment: + return _braces_are_identifier_placeholders_only(fragment) + return False + + def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState | None) -> None: """Validate absolute paths in local-sandbox bash commands. @@ -970,6 +1023,8 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState if _is_in_spans(match.start(), url_spans): continue absolute_path = match.group() + if _is_non_path_literal_fragment(absolute_path): + continue if _is_allowed_local_bash_absolute_path(absolute_path, allowed_paths, allow_system_paths=True): continue diff --git a/backend/tests/test_sandbox_tools_security.py b/backend/tests/test_sandbox_tools_security.py index d43a1fcf0..084411114 100644 --- a/backend/tests/test_sandbox_tools_security.py +++ b/backend/tests/test_sandbox_tools_security.py @@ -445,6 +445,54 @@ def test_validate_local_bash_command_paths_allows_http_url_dotdot_segments() -> ) +@pytest.mark.parametrize( + "command", + [ + # f-string / string-literal fragments with CJK text or template braces are + # NOT path arguments and must not be flagged as unsafe absolute paths. + "python3 -c \"print(f'/端口{port}')\"", + "echo '健康检查 /端口 状态'", + "python3 -c \"x = f'/{port}'\"", + "python3 -c \"print('/devices/{id}/port')\"", + ], +) +def test_validate_local_bash_command_paths_allows_non_path_string_literals(command: str) -> None: + validate_local_bash_command_paths(command, _THREAD_DATA) + + +def test_validate_local_bash_command_paths_still_blocks_ascii_host_path_in_code() -> None: + """The literal exemption is shape-based (non-ASCII / identifier-template + braces); a plain ASCII host path stays blocked even when written inside a + code string, so the guard keeps nudging the model toward virtual paths.""" + with pytest.raises(PermissionError, match="Unsafe absolute paths"): + validate_local_bash_command_paths("python3 -c \"open('/etc/passwd').read()\"", _THREAD_DATA) + + +@pytest.mark.parametrize( + "command", + [ + # Bash brace expansion reconstitutes plain host paths at runtime + # (`cat /etc/{passwd,shadow}` -> `cat /etc/passwd /etc/shadow`), so the + # brace exemption must NOT fire on these — only single identifier-like + # template placeholders such as `/devices/{id}/port` are text. + "cat /etc/{passwd,shadow}", + "cat /etc/passwd{,.bak}", + "cat /{etc,var}/passwd", + 'bash -c "cat /etc/{passwd,shadow}"', + # ``${VAR}`` shell variable expansion is the same bypass class: bash + # substitutes a real host path at runtime even though `USER` is + # identifier-shaped, so it must stay blocked too. + "cat /home/${USER}/.ssh/id_rsa", + ], +) +def test_validate_local_bash_command_paths_blocks_brace_expansion_host_paths(command: str) -> None: + """Regression for the brace-expansion bypass: a `{...}` block that is not a + single identifier placeholder (commas, dots, leading separators) must keep + the host path blocked rather than be exempted as a literal.""" + with pytest.raises(PermissionError, match="Unsafe absolute paths"): + validate_local_bash_command_paths(command, _THREAD_DATA) + + def test_bash_tool_rejects_host_bash_when_local_sandbox_default(monkeypatch) -> None: runtime = SimpleNamespace( state={"sandbox": {"sandbox_id": "local"}, "thread_data": _THREAD_DATA.copy()},