mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-21 23:46:50 +00:00
Merge branch 'main' into release/2.0-rc
This commit is contained in:
@@ -254,9 +254,11 @@ def _assemble_from_features(
|
||||
from deerflow.agents.middlewares.view_image_middleware import ViewImageMiddleware
|
||||
|
||||
chain.append(ViewImageMiddleware())
|
||||
from deerflow.tools.builtins import view_image_tool
|
||||
|
||||
extra_tools.append(view_image_tool)
|
||||
if feat.sandbox is not False:
|
||||
from deerflow.tools.builtins import view_image_tool
|
||||
|
||||
extra_tools.append(view_image_tool)
|
||||
|
||||
# --- [11] Subagent ---
|
||||
if feat.subagent is not False:
|
||||
|
||||
@@ -9,6 +9,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shlex
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
|
||||
@@ -86,6 +87,46 @@ def _format_container_mount(runtime: str, host_path: str, container_path: str, r
|
||||
return ["-v", mount_spec]
|
||||
|
||||
|
||||
def _redact_container_command_for_log(cmd: list[str]) -> list[str]:
|
||||
"""Return a Docker/Container command with environment values redacted."""
|
||||
redacted: list[str] = []
|
||||
redact_next_env = False
|
||||
|
||||
for arg in cmd:
|
||||
if redact_next_env:
|
||||
if "=" in arg:
|
||||
key = arg.split("=", 1)[0]
|
||||
redacted.append(f"{key}=<redacted>" if key else "<redacted>")
|
||||
else:
|
||||
redacted.append(arg)
|
||||
redact_next_env = False
|
||||
continue
|
||||
|
||||
if arg in {"-e", "--env"}:
|
||||
redacted.append(arg)
|
||||
redact_next_env = True
|
||||
continue
|
||||
|
||||
if arg.startswith("--env="):
|
||||
value = arg.removeprefix("--env=")
|
||||
if "=" in value:
|
||||
key = value.split("=", 1)[0]
|
||||
redacted.append(f"--env={key}=<redacted>" if key else "--env=<redacted>")
|
||||
else:
|
||||
redacted.append(arg)
|
||||
continue
|
||||
|
||||
redacted.append(arg)
|
||||
|
||||
return redacted
|
||||
|
||||
|
||||
def _format_container_command_for_log(cmd: list[str]) -> str:
|
||||
if os.name == "nt":
|
||||
return subprocess.list2cmdline(cmd)
|
||||
return shlex.join(cmd)
|
||||
|
||||
|
||||
class LocalContainerBackend(SandboxBackend):
|
||||
"""Backend that manages sandbox containers locally using Docker or Apple Container.
|
||||
|
||||
@@ -464,7 +505,8 @@ class LocalContainerBackend(SandboxBackend):
|
||||
|
||||
cmd.append(self._image)
|
||||
|
||||
logger.info(f"Starting container using {self._runtime}: {' '.join(cmd)}")
|
||||
log_cmd = _format_container_command_for_log(_redact_container_command_for_log(cmd))
|
||||
logger.info(f"Starting container using {self._runtime}: {log_cmd}")
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import ast
|
||||
import html
|
||||
import json
|
||||
import re
|
||||
import uuid
|
||||
@@ -36,8 +37,8 @@ def _fix_messages(messages: list) -> list:
|
||||
if isinstance(msg, AIMessage) and getattr(msg, "tool_calls", []):
|
||||
xml_parts = []
|
||||
for tool in msg.tool_calls:
|
||||
args_xml = " ".join(f"<parameter={k}>{json.dumps(v, ensure_ascii=False)}</parameter>" for k, v in tool.get("args", {}).items())
|
||||
xml_parts.append(f"<tool_call> <function={tool['name']}> {args_xml} </function> </tool_call>")
|
||||
args_xml = " ".join(f"<parameter={html.escape(str(k), quote=False)}>{html.escape(v if isinstance(v, str) else json.dumps(v, ensure_ascii=False), quote=False)}</parameter>" for k, v in tool.get("args", {}).items())
|
||||
xml_parts.append(f"<tool_call> <function={html.escape(str(tool['name']), quote=False)}> {args_xml} </function> </tool_call>")
|
||||
full_text = f"{text}\n" + "\n".join(xml_parts) if text else "\n".join(xml_parts)
|
||||
fixed.append(AIMessage(content=full_text.strip() or " "))
|
||||
continue
|
||||
@@ -80,13 +81,24 @@ def _parse_xml_tool_call_to_dict(content: str) -> tuple[str, list[dict]]:
|
||||
func_match = re.search(r"<function=([^>]+)>", inner_content)
|
||||
if not func_match:
|
||||
continue
|
||||
function_name = func_match.group(1).strip()
|
||||
function_name = html.unescape(func_match.group(1).strip())
|
||||
|
||||
# Ignore nested tool blocks when extracting parameters for this call.
|
||||
# Nested `<tool_call>` sections represent separate invocations and
|
||||
# their `<parameter>` tags must not leak into the current call args.
|
||||
param_source_parts: list[str] = []
|
||||
nested_cursor = 0
|
||||
for nested_start, nested_end, _ in _iter_tool_call_blocks(inner_content):
|
||||
param_source_parts.append(inner_content[nested_cursor:nested_start])
|
||||
nested_cursor = nested_end
|
||||
param_source_parts.append(inner_content[nested_cursor:])
|
||||
param_source = "".join(param_source_parts)
|
||||
|
||||
args = {}
|
||||
param_pattern = re.compile(r"<parameter=([^>]+)>(.*?)</parameter>", re.DOTALL)
|
||||
for param_match in param_pattern.finditer(inner_content):
|
||||
key = param_match.group(1).strip()
|
||||
raw_value = param_match.group(2).strip()
|
||||
for param_match in param_pattern.finditer(param_source):
|
||||
key = html.unescape(param_match.group(1).strip())
|
||||
raw_value = html.unescape(param_match.group(2).strip())
|
||||
|
||||
# Attempt to deserialize string values into native Python types
|
||||
# to satisfy downstream Pydantic validation.
|
||||
|
||||
@@ -22,6 +22,13 @@ def list_dir(path: str, max_depth: int = 2) -> list[str]:
|
||||
if not root_path.is_dir():
|
||||
return result
|
||||
|
||||
def _is_within_root(candidate: Path) -> bool:
|
||||
try:
|
||||
candidate.relative_to(root_path)
|
||||
return True
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
def _traverse(current_path: Path, current_depth: int) -> None:
|
||||
"""Recursively traverse directories up to max_depth."""
|
||||
if current_depth > max_depth:
|
||||
@@ -32,8 +39,23 @@ def list_dir(path: str, max_depth: int = 2) -> list[str]:
|
||||
if should_ignore_name(item.name):
|
||||
continue
|
||||
|
||||
if item.is_symlink():
|
||||
try:
|
||||
item_resolved = item.resolve()
|
||||
if not _is_within_root(item_resolved):
|
||||
continue
|
||||
except OSError:
|
||||
continue
|
||||
post_fix = "/" if item_resolved.is_dir() else ""
|
||||
result.append(str(item_resolved) + post_fix)
|
||||
continue
|
||||
|
||||
item_resolved = item.resolve()
|
||||
if not _is_within_root(item_resolved):
|
||||
continue
|
||||
|
||||
post_fix = "/" if item.is_dir() else ""
|
||||
result.append(str(item.resolve()) + post_fix)
|
||||
result.append(str(item_resolved) + post_fix)
|
||||
|
||||
# Recurse into subdirectories if not at max depth
|
||||
if item.is_dir() and current_depth < max_depth:
|
||||
|
||||
@@ -5,6 +5,7 @@ import shutil
|
||||
import subprocess
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple
|
||||
|
||||
from deerflow.sandbox.local.list_dir import list_dir
|
||||
from deerflow.sandbox.sandbox import Sandbox
|
||||
@@ -20,6 +21,11 @@ class PathMapping:
|
||||
read_only: bool = False
|
||||
|
||||
|
||||
class ResolvedPath(NamedTuple):
|
||||
path: str
|
||||
mapping: PathMapping | None
|
||||
|
||||
|
||||
class LocalSandbox(Sandbox):
|
||||
@staticmethod
|
||||
def _shell_name(shell: str) -> str:
|
||||
@@ -91,7 +97,23 @@ class LocalSandbox(Sandbox):
|
||||
|
||||
return best_mapping.read_only
|
||||
|
||||
def _resolve_path(self, path: str) -> str:
|
||||
def _find_path_mapping(self, path: str) -> tuple[PathMapping, str] | None:
|
||||
path_str = str(path)
|
||||
|
||||
for mapping in sorted(self.path_mappings, key=lambda m: len(m.container_path.rstrip("/") or "/"), reverse=True):
|
||||
container_path = mapping.container_path.rstrip("/") or "/"
|
||||
if container_path == "/":
|
||||
if path_str.startswith("/"):
|
||||
return mapping, path_str.lstrip("/")
|
||||
continue
|
||||
|
||||
if path_str == container_path or path_str.startswith(container_path + "/"):
|
||||
relative = path_str[len(container_path) :].lstrip("/")
|
||||
return mapping, relative
|
||||
|
||||
return None
|
||||
|
||||
def _resolve_path_with_mapping(self, path: str) -> ResolvedPath:
|
||||
"""
|
||||
Resolve container path to actual local path using mappings.
|
||||
|
||||
@@ -99,22 +121,30 @@ class LocalSandbox(Sandbox):
|
||||
path: Path that might be a container path
|
||||
|
||||
Returns:
|
||||
Resolved local path
|
||||
Resolved local path and the matched mapping, if any
|
||||
"""
|
||||
path_str = str(path)
|
||||
|
||||
# Try each mapping (longest prefix first for more specific matches)
|
||||
for mapping in sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True):
|
||||
container_path = mapping.container_path
|
||||
local_path = mapping.local_path
|
||||
if path_str == container_path or path_str.startswith(container_path + "/"):
|
||||
# Replace the container path prefix with local path
|
||||
relative = path_str[len(container_path) :].lstrip("/")
|
||||
resolved = str(Path(local_path) / relative) if relative else local_path
|
||||
return resolved
|
||||
mapping_match = self._find_path_mapping(path_str)
|
||||
if mapping_match is None:
|
||||
return ResolvedPath(path_str, None)
|
||||
|
||||
# No mapping found, return original path
|
||||
return path_str
|
||||
mapping, relative = mapping_match
|
||||
local_root = Path(mapping.local_path).resolve()
|
||||
resolved_path = (local_root / relative).resolve() if relative else local_root
|
||||
|
||||
try:
|
||||
resolved_path.relative_to(local_root)
|
||||
except ValueError as exc:
|
||||
raise PermissionError(errno.EACCES, "Access denied: path escapes mounted directory", path_str) from exc
|
||||
|
||||
return ResolvedPath(str(resolved_path), mapping)
|
||||
|
||||
def _resolve_path(self, path: str) -> str:
|
||||
return self._resolve_path_with_mapping(path).path
|
||||
|
||||
def _is_resolved_path_read_only(self, resolved: ResolvedPath) -> bool:
|
||||
return bool(resolved.mapping and resolved.mapping.read_only) or self._is_read_only_path(resolved.path)
|
||||
|
||||
def _reverse_resolve_path(self, path: str) -> str:
|
||||
"""
|
||||
@@ -309,8 +339,14 @@ class LocalSandbox(Sandbox):
|
||||
def list_dir(self, path: str, max_depth=2) -> list[str]:
|
||||
resolved_path = self._resolve_path(path)
|
||||
entries = list_dir(resolved_path, max_depth)
|
||||
# Reverse resolve local paths back to container paths in output
|
||||
return [self._reverse_resolve_paths_in_output(entry) for entry in entries]
|
||||
# Reverse resolve local paths back to container paths and preserve
|
||||
# list_dir's trailing "/" marker for directories.
|
||||
result: list[str] = []
|
||||
for entry in entries:
|
||||
is_dir = entry.endswith(("/", "\\"))
|
||||
reversed_entry = self._reverse_resolve_path(entry.rstrip("/\\")) if is_dir else self._reverse_resolve_path(entry)
|
||||
result.append(f"{reversed_entry}/" if is_dir and not reversed_entry.endswith("/") else reversed_entry)
|
||||
return result
|
||||
|
||||
def read_file(self, path: str) -> str:
|
||||
resolved_path = self._resolve_path(path)
|
||||
@@ -329,8 +365,9 @@ class LocalSandbox(Sandbox):
|
||||
raise type(e)(e.errno, e.strerror, path) from None
|
||||
|
||||
def write_file(self, path: str, content: str, append: bool = False) -> None:
|
||||
resolved_path = self._resolve_path(path)
|
||||
if self._is_read_only_path(resolved_path):
|
||||
resolved = self._resolve_path_with_mapping(path)
|
||||
resolved_path = resolved.path
|
||||
if self._is_resolved_path_read_only(resolved):
|
||||
raise OSError(errno.EROFS, "Read-only file system", path)
|
||||
try:
|
||||
dir_path = os.path.dirname(resolved_path)
|
||||
@@ -384,8 +421,9 @@ class LocalSandbox(Sandbox):
|
||||
], truncated
|
||||
|
||||
def update_file(self, path: str, content: bytes) -> None:
|
||||
resolved_path = self._resolve_path(path)
|
||||
if self._is_read_only_path(resolved_path):
|
||||
resolved = self._resolve_path_with_mapping(path)
|
||||
resolved_path = resolved.path
|
||||
if self._is_resolved_path_read_only(resolved):
|
||||
raise OSError(errno.EROFS, "Read-only file system", path)
|
||||
try:
|
||||
dir_path = os.path.dirname(resolved_path)
|
||||
|
||||
@@ -22,6 +22,9 @@ from deerflow.sandbox.security import LOCAL_HOST_BASH_DISABLED_MESSAGE, is_host_
|
||||
|
||||
_ABSOLUTE_PATH_PATTERN = re.compile(r"(?<![:\w])(?<!:/)/(?:[^\s\"'`;&|<>()]+)")
|
||||
_FILE_URL_PATTERN = re.compile(r"\bfile://\S+", re.IGNORECASE)
|
||||
_URL_WITH_SCHEME_PATTERN = re.compile(r"^[a-z][a-z0-9+.-]*://", re.IGNORECASE)
|
||||
_URL_IN_COMMAND_PATTERN = re.compile(r"\b[a-z][a-z0-9+.-]*://[^\s\"'`;&|<>()]+", re.IGNORECASE)
|
||||
_DOTDOT_PATH_SEGMENT_PATTERN = re.compile(r"(?:^|[/\\=])\.\.(?:$|[/\\])")
|
||||
_LOCAL_BASH_SYSTEM_PATH_PREFIXES = (
|
||||
"/bin/",
|
||||
"/usr/bin/",
|
||||
@@ -37,6 +40,42 @@ _DEFAULT_GLOB_MAX_RESULTS = 200
|
||||
_MAX_GLOB_MAX_RESULTS = 1000
|
||||
_DEFAULT_GREP_MAX_RESULTS = 100
|
||||
_MAX_GREP_MAX_RESULTS = 500
|
||||
_LOCAL_BASH_CWD_COMMANDS = {"cd", "pushd"}
|
||||
_LOCAL_BASH_COMMAND_WRAPPERS = {"command", "builtin"}
|
||||
_LOCAL_BASH_COMMAND_PREFIX_KEYWORDS = {"!", "{", "case", "do", "elif", "else", "for", "if", "select", "then", "time", "until", "while"}
|
||||
_LOCAL_BASH_COMMAND_END_KEYWORDS = {"}", "done", "esac", "fi"}
|
||||
_LOCAL_BASH_ROOT_PATH_COMMANDS = {
|
||||
"awk",
|
||||
"cat",
|
||||
"cp",
|
||||
"du",
|
||||
"find",
|
||||
"grep",
|
||||
"head",
|
||||
"less",
|
||||
"ln",
|
||||
"ls",
|
||||
"more",
|
||||
"mv",
|
||||
"rm",
|
||||
"sed",
|
||||
"tail",
|
||||
"tar",
|
||||
}
|
||||
_SHELL_COMMAND_SEPARATORS = {";", "&&", "||", "|", "|&", "&", "(", ")"}
|
||||
_SHELL_REDIRECTION_OPERATORS = {
|
||||
"<",
|
||||
">",
|
||||
"<<",
|
||||
">>",
|
||||
"<<<",
|
||||
"<>",
|
||||
">&",
|
||||
"<&",
|
||||
"&>",
|
||||
"&>>",
|
||||
">|",
|
||||
}
|
||||
|
||||
|
||||
def _get_skills_container_path() -> str:
|
||||
@@ -549,7 +588,7 @@ def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *,
|
||||
This function is a security gate — it checks whether *path* may be
|
||||
accessed and raises on violation. It does **not** resolve the virtual
|
||||
path to a host path; callers are responsible for resolution via
|
||||
``_resolve_and_validate_user_data_path`` or ``_resolve_skills_path``.
|
||||
``resolve_and_validate_user_data_path`` or ``_resolve_skills_path``.
|
||||
|
||||
Allowed virtual-path families:
|
||||
- ``/mnt/user-data/*`` — always allowed (read + write)
|
||||
@@ -636,6 +675,219 @@ def _resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState
|
||||
return str(resolved)
|
||||
|
||||
|
||||
def _is_non_file_url_token(token: str) -> bool:
|
||||
"""Return True for URL tokens that should not be interpreted as paths."""
|
||||
values = [token]
|
||||
if "=" in token:
|
||||
values.append(token.split("=", 1)[1])
|
||||
|
||||
for value in values:
|
||||
match = _URL_WITH_SCHEME_PATTERN.match(value)
|
||||
if match and not value.lower().startswith("file://"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _non_file_url_spans(command: str) -> list[tuple[int, int]]:
|
||||
spans = []
|
||||
for match in _URL_IN_COMMAND_PATTERN.finditer(command):
|
||||
if not match.group().lower().startswith("file://"):
|
||||
spans.append(match.span())
|
||||
return spans
|
||||
|
||||
|
||||
def _is_in_spans(position: int, spans: list[tuple[int, int]]) -> bool:
|
||||
return any(start <= position < end for start, end in spans)
|
||||
|
||||
|
||||
def _has_dotdot_path_segment(token: str) -> bool:
|
||||
if _is_non_file_url_token(token):
|
||||
return False
|
||||
return bool(_DOTDOT_PATH_SEGMENT_PATTERN.search(token))
|
||||
|
||||
|
||||
def _split_shell_tokens(command: str) -> list[str]:
|
||||
try:
|
||||
normalized = command.replace("\r\n", "\n").replace("\r", "\n").replace("\n", " ; ")
|
||||
lexer = shlex.shlex(normalized, posix=True, punctuation_chars=True)
|
||||
lexer.whitespace_split = True
|
||||
lexer.commenters = ""
|
||||
return list(lexer)
|
||||
except ValueError:
|
||||
# The shell will reject malformed quoting later; keep validation
|
||||
# best-effort instead of turning syntax errors into security messages.
|
||||
return command.split()
|
||||
|
||||
|
||||
def _is_shell_command_separator(token: str) -> bool:
|
||||
return token in _SHELL_COMMAND_SEPARATORS
|
||||
|
||||
|
||||
def _is_shell_redirection_operator(token: str) -> bool:
|
||||
return token in _SHELL_REDIRECTION_OPERATORS
|
||||
|
||||
|
||||
def _is_shell_assignment(token: str) -> bool:
|
||||
name, separator, _ = token.partition("=")
|
||||
if not separator or not name:
|
||||
return False
|
||||
return bool(re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", name))
|
||||
|
||||
|
||||
def _is_allowed_local_bash_absolute_path(path: str, allowed_paths: list[str], *, allow_system_paths: bool) -> bool:
|
||||
# Check for MCP filesystem server allowed paths
|
||||
if any(path.startswith(allowed_path) or path == allowed_path.rstrip("/") for allowed_path in allowed_paths):
|
||||
_reject_path_traversal(path)
|
||||
return True
|
||||
|
||||
if path == VIRTUAL_PATH_PREFIX or path.startswith(f"{VIRTUAL_PATH_PREFIX}/"):
|
||||
_reject_path_traversal(path)
|
||||
return True
|
||||
|
||||
# Allow skills container path (resolved by tools.py before passing to sandbox)
|
||||
if _is_skills_path(path):
|
||||
_reject_path_traversal(path)
|
||||
return True
|
||||
|
||||
# Allow ACP workspace path (path-traversal check only)
|
||||
if _is_acp_workspace_path(path):
|
||||
_reject_path_traversal(path)
|
||||
return True
|
||||
|
||||
# Allow custom mount container paths
|
||||
if _is_custom_mount_path(path):
|
||||
_reject_path_traversal(path)
|
||||
return True
|
||||
|
||||
if allow_system_paths and any(path == prefix.rstrip("/") or path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _next_cd_target(tokens: list[str], start_index: int) -> tuple[str | None, int]:
|
||||
index = start_index
|
||||
while index < len(tokens):
|
||||
token = tokens[index]
|
||||
if _is_shell_command_separator(token):
|
||||
return None, index
|
||||
if _is_shell_redirection_operator(token):
|
||||
index += 2
|
||||
continue
|
||||
if token == "--":
|
||||
index += 1
|
||||
continue
|
||||
if token in {"-L", "-P", "-e", "-@"}:
|
||||
index += 1
|
||||
continue
|
||||
if token.startswith("-") and token != "-":
|
||||
index += 1
|
||||
continue
|
||||
return token, index + 1
|
||||
return None, index
|
||||
|
||||
|
||||
def _validate_local_bash_cwd_target(command_name: str, target: str | None, allowed_paths: list[str]) -> None:
|
||||
if target is None or target == "-":
|
||||
raise PermissionError(f"Unsafe working directory change in command: {command_name}. Use paths under {VIRTUAL_PATH_PREFIX}")
|
||||
if target.startswith(("$", "`")):
|
||||
raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}")
|
||||
if target.startswith("~"):
|
||||
raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}")
|
||||
if target.startswith("/"):
|
||||
_reject_path_traversal(target)
|
||||
if not _is_allowed_local_bash_absolute_path(target, allowed_paths, allow_system_paths=False):
|
||||
raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}")
|
||||
|
||||
|
||||
def _looks_like_unsafe_cwd_target(target: str | None) -> bool:
|
||||
if target is None:
|
||||
return False
|
||||
return target == "-" or target.startswith(("$", "`", "~", "/", "..")) or _has_dotdot_path_segment(target)
|
||||
|
||||
|
||||
def _validate_local_bash_root_path_args(command_name: str, tokens: list[str], start_index: int) -> None:
|
||||
if command_name not in _LOCAL_BASH_ROOT_PATH_COMMANDS:
|
||||
return
|
||||
|
||||
index = start_index
|
||||
while index < len(tokens):
|
||||
token = tokens[index]
|
||||
if _is_shell_command_separator(token):
|
||||
return
|
||||
if _is_shell_redirection_operator(token):
|
||||
index += 2
|
||||
continue
|
||||
if token == "/" and not _is_non_file_url_token(token):
|
||||
raise PermissionError(f"Unsafe absolute paths in command: /. Use paths under {VIRTUAL_PATH_PREFIX}")
|
||||
index += 1
|
||||
|
||||
|
||||
def _validate_local_bash_shell_tokens(command: str, allowed_paths: list[str]) -> None:
|
||||
"""Conservatively reject relative path escapes missed by absolute-path scanning."""
|
||||
if re.search(r"\$\([^)]*\b(?:cd|pushd)\b", command):
|
||||
raise PermissionError(f"Unsafe working directory change in command substitution. Use paths under {VIRTUAL_PATH_PREFIX}")
|
||||
|
||||
tokens = _split_shell_tokens(command)
|
||||
|
||||
for token in tokens:
|
||||
if _is_shell_command_separator(token) or _is_shell_redirection_operator(token):
|
||||
continue
|
||||
if _has_dotdot_path_segment(token):
|
||||
raise PermissionError("Access denied: path traversal detected")
|
||||
|
||||
at_command_start = True
|
||||
index = 0
|
||||
while index < len(tokens):
|
||||
token = tokens[index]
|
||||
|
||||
if _is_shell_command_separator(token):
|
||||
at_command_start = True
|
||||
index += 1
|
||||
continue
|
||||
|
||||
if _is_shell_redirection_operator(token):
|
||||
index += 1
|
||||
continue
|
||||
|
||||
if at_command_start and _is_shell_assignment(token):
|
||||
index += 1
|
||||
continue
|
||||
|
||||
command_name = token.rsplit("/", 1)[-1]
|
||||
if at_command_start and command_name in _LOCAL_BASH_COMMAND_PREFIX_KEYWORDS | _LOCAL_BASH_COMMAND_END_KEYWORDS:
|
||||
index += 1
|
||||
continue
|
||||
|
||||
if not at_command_start:
|
||||
index += 1
|
||||
continue
|
||||
|
||||
at_command_start = False
|
||||
if command_name in _LOCAL_BASH_COMMAND_WRAPPERS and index + 1 < len(tokens):
|
||||
wrapped_name = tokens[index + 1].rsplit("/", 1)[-1]
|
||||
if wrapped_name in _LOCAL_BASH_CWD_COMMANDS:
|
||||
target, next_index = _next_cd_target(tokens, index + 2)
|
||||
_validate_local_bash_cwd_target(wrapped_name, target, allowed_paths)
|
||||
index = next_index
|
||||
continue
|
||||
_validate_local_bash_root_path_args(wrapped_name, tokens, index + 2)
|
||||
|
||||
if command_name not in _LOCAL_BASH_CWD_COMMANDS:
|
||||
_validate_local_bash_root_path_args(command_name, tokens, index + 1)
|
||||
index += 1
|
||||
continue
|
||||
|
||||
target, next_index = _next_cd_target(tokens, index + 1)
|
||||
_validate_local_bash_cwd_target(command_name, target, allowed_paths)
|
||||
index = next_index
|
||||
|
||||
|
||||
def resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState) -> str:
|
||||
"""Resolve a /mnt/user-data virtual path and validate it stays in bounds."""
|
||||
return _resolve_and_validate_user_data_path(path, thread_data)
|
||||
|
||||
|
||||
def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState | None) -> None:
|
||||
"""Validate absolute paths in local-sandbox bash commands.
|
||||
|
||||
@@ -661,33 +913,14 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
|
||||
|
||||
unsafe_paths: list[str] = []
|
||||
allowed_paths = _get_mcp_allowed_paths()
|
||||
_validate_local_bash_shell_tokens(command, allowed_paths)
|
||||
url_spans = _non_file_url_spans(command)
|
||||
|
||||
for absolute_path in _ABSOLUTE_PATH_PATTERN.findall(command):
|
||||
# Check for MCP filesystem server allowed paths
|
||||
if any(absolute_path.startswith(path) or absolute_path == path.rstrip("/") for path in allowed_paths):
|
||||
_reject_path_traversal(absolute_path)
|
||||
for match in _ABSOLUTE_PATH_PATTERN.finditer(command):
|
||||
if _is_in_spans(match.start(), url_spans):
|
||||
continue
|
||||
|
||||
if absolute_path == VIRTUAL_PATH_PREFIX or absolute_path.startswith(f"{VIRTUAL_PATH_PREFIX}/"):
|
||||
_reject_path_traversal(absolute_path)
|
||||
continue
|
||||
|
||||
# Allow skills container path (resolved by tools.py before passing to sandbox)
|
||||
if _is_skills_path(absolute_path):
|
||||
_reject_path_traversal(absolute_path)
|
||||
continue
|
||||
|
||||
# Allow ACP workspace path (path-traversal check only)
|
||||
if _is_acp_workspace_path(absolute_path):
|
||||
_reject_path_traversal(absolute_path)
|
||||
continue
|
||||
|
||||
# Allow custom mount container paths
|
||||
if _is_custom_mount_path(absolute_path):
|
||||
_reject_path_traversal(absolute_path)
|
||||
continue
|
||||
|
||||
if any(absolute_path == prefix.rstrip("/") or absolute_path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES):
|
||||
absolute_path = match.group()
|
||||
if _is_allowed_local_bash_absolute_path(absolute_path, allowed_paths, allow_system_paths=True):
|
||||
continue
|
||||
|
||||
unsafe_paths.append(absolute_path)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from .installer import SkillAlreadyExistsError, install_skill_from_archive
|
||||
from .installer import SkillAlreadyExistsError, SkillSecurityScanError, ainstall_skill_from_archive, install_skill_from_archive
|
||||
from .loader import get_skills_root_path, load_skills
|
||||
from .types import Skill
|
||||
from .validation import ALLOWED_FRONTMATTER_PROPERTIES, _validate_skill_frontmatter
|
||||
@@ -10,5 +10,7 @@ __all__ = [
|
||||
"ALLOWED_FRONTMATTER_PROPERTIES",
|
||||
"_validate_skill_frontmatter",
|
||||
"install_skill_from_archive",
|
||||
"ainstall_skill_from_archive",
|
||||
"SkillAlreadyExistsError",
|
||||
"SkillSecurityScanError",
|
||||
]
|
||||
|
||||
@@ -4,6 +4,8 @@ Pure business logic — no FastAPI/HTTP dependencies.
|
||||
Both Gateway and Client delegate to these functions.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
import logging
|
||||
import posixpath
|
||||
import shutil
|
||||
@@ -13,15 +15,23 @@ import zipfile
|
||||
from pathlib import Path, PurePosixPath, PureWindowsPath
|
||||
|
||||
from deerflow.skills.loader import get_skills_root_path
|
||||
from deerflow.skills.security_scanner import scan_skill_content
|
||||
from deerflow.skills.validation import _validate_skill_frontmatter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_PROMPT_INPUT_DIRS = {"references", "templates"}
|
||||
_PROMPT_INPUT_SUFFIXES = frozenset({".json", ".markdown", ".md", ".rst", ".txt", ".yaml", ".yml"})
|
||||
|
||||
|
||||
class SkillAlreadyExistsError(ValueError):
|
||||
"""Raised when a skill with the same name is already installed."""
|
||||
|
||||
|
||||
class SkillSecurityScanError(ValueError):
|
||||
"""Raised when a skill archive fails security scanning."""
|
||||
|
||||
|
||||
def is_unsafe_zip_member(info: zipfile.ZipInfo) -> bool:
|
||||
"""Return True if the zip member path is absolute or attempts directory traversal."""
|
||||
name = info.filename
|
||||
@@ -114,7 +124,78 @@ def safe_extract_skill_archive(
|
||||
dst.write(chunk)
|
||||
|
||||
|
||||
def install_skill_from_archive(
|
||||
def _is_script_support_file(rel_path: Path) -> bool:
|
||||
return bool(rel_path.parts) and rel_path.parts[0] == "scripts"
|
||||
|
||||
|
||||
def _should_scan_support_file(rel_path: Path) -> bool:
|
||||
if _is_script_support_file(rel_path):
|
||||
return True
|
||||
return bool(rel_path.parts) and rel_path.parts[0] in _PROMPT_INPUT_DIRS and rel_path.suffix.lower() in _PROMPT_INPUT_SUFFIXES
|
||||
|
||||
|
||||
def _move_staged_skill_into_reserved_target(staging_target: Path, target: Path) -> None:
|
||||
installed = False
|
||||
reserved = False
|
||||
try:
|
||||
target.mkdir(mode=0o700)
|
||||
reserved = True
|
||||
for child in staging_target.iterdir():
|
||||
shutil.move(str(child), target / child.name)
|
||||
installed = True
|
||||
except FileExistsError as e:
|
||||
raise SkillAlreadyExistsError(f"Skill '{target.name}' already exists") from e
|
||||
finally:
|
||||
if reserved and not installed and target.exists():
|
||||
shutil.rmtree(target)
|
||||
|
||||
|
||||
async def _scan_skill_file_or_raise(skill_dir: Path, path: Path, skill_name: str, *, executable: bool) -> None:
|
||||
rel_path = path.relative_to(skill_dir).as_posix()
|
||||
location = f"{skill_name}/{rel_path}"
|
||||
try:
|
||||
content = path.read_text(encoding="utf-8")
|
||||
except UnicodeDecodeError as e:
|
||||
raise SkillSecurityScanError(f"Security scan failed for skill '{skill_name}': {location} must be valid UTF-8") from e
|
||||
|
||||
try:
|
||||
result = await scan_skill_content(content, executable=executable, location=location)
|
||||
except Exception as e:
|
||||
raise SkillSecurityScanError(f"Security scan failed for {location}: {e}") from e
|
||||
|
||||
decision = getattr(result, "decision", None)
|
||||
reason = str(getattr(result, "reason", "") or "No reason provided.")
|
||||
if decision == "block":
|
||||
if rel_path == "SKILL.md":
|
||||
raise SkillSecurityScanError(f"Security scan blocked skill '{skill_name}': {reason}")
|
||||
raise SkillSecurityScanError(f"Security scan blocked {location}: {reason}")
|
||||
if executable and decision != "allow":
|
||||
raise SkillSecurityScanError(f"Security scan rejected executable {location}: {reason}")
|
||||
if decision not in {"allow", "warn"}:
|
||||
raise SkillSecurityScanError(f"Security scan failed for {location}: invalid scanner decision {decision!r}")
|
||||
|
||||
|
||||
async def _scan_skill_archive_contents_or_raise(skill_dir: Path, skill_name: str) -> None:
|
||||
"""Run the skill security scanner against all installable text and script files."""
|
||||
skill_md = skill_dir / "SKILL.md"
|
||||
await _scan_skill_file_or_raise(skill_dir, skill_md, skill_name, executable=False)
|
||||
|
||||
for path in sorted(skill_dir.rglob("*")):
|
||||
if not path.is_file():
|
||||
continue
|
||||
|
||||
rel_path = path.relative_to(skill_dir)
|
||||
if rel_path == Path("SKILL.md"):
|
||||
continue
|
||||
if path.name == "SKILL.md":
|
||||
raise SkillSecurityScanError(f"Security scan failed for skill '{skill_name}': nested SKILL.md is not allowed at {skill_name}/{rel_path.as_posix()}")
|
||||
if not _should_scan_support_file(rel_path):
|
||||
continue
|
||||
|
||||
await _scan_skill_file_or_raise(skill_dir, path, skill_name, executable=_is_script_support_file(rel_path))
|
||||
|
||||
|
||||
async def ainstall_skill_from_archive(
|
||||
zip_path: str | Path,
|
||||
*,
|
||||
skills_root: Path | None = None,
|
||||
@@ -173,7 +254,12 @@ def install_skill_from_archive(
|
||||
if target.exists():
|
||||
raise SkillAlreadyExistsError(f"Skill '{skill_name}' already exists")
|
||||
|
||||
shutil.copytree(skill_dir, target)
|
||||
await _scan_skill_archive_contents_or_raise(skill_dir, skill_name)
|
||||
|
||||
with tempfile.TemporaryDirectory(prefix=f".installing-{skill_name}-", dir=custom_dir) as staging_root:
|
||||
staging_target = Path(staging_root) / skill_name
|
||||
shutil.copytree(skill_dir, staging_target)
|
||||
_move_staged_skill_into_reserved_target(staging_target, target)
|
||||
logger.info("Skill %r installed to %s", skill_name, target)
|
||||
|
||||
return {
|
||||
@@ -181,3 +267,24 @@ def install_skill_from_archive(
|
||||
"skill_name": skill_name,
|
||||
"message": f"Skill '{skill_name}' installed successfully",
|
||||
}
|
||||
|
||||
|
||||
def _run_async_install(coro):
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
except RuntimeError:
|
||||
loop = None
|
||||
|
||||
if loop is not None and loop.is_running():
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
|
||||
return executor.submit(asyncio.run, coro).result()
|
||||
return asyncio.run(coro)
|
||||
|
||||
|
||||
def install_skill_from_archive(
|
||||
zip_path: str | Path,
|
||||
*,
|
||||
skills_root: Path | None = None,
|
||||
) -> dict:
|
||||
"""Install a skill from a .skill archive (ZIP)."""
|
||||
return _run_async_install(ainstall_skill_from_archive(zip_path, skills_root=skills_root))
|
||||
|
||||
@@ -8,7 +8,42 @@ from langchain_core.messages import ToolMessage
|
||||
from langgraph.types import Command
|
||||
from langgraph.typing import ContextT
|
||||
|
||||
from deerflow.agents.thread_state import ThreadState
|
||||
from deerflow.agents.thread_state import ThreadDataState, ThreadState
|
||||
from deerflow.config.paths import VIRTUAL_PATH_PREFIX
|
||||
|
||||
_ALLOWED_IMAGE_VIRTUAL_ROOTS = (
|
||||
f"{VIRTUAL_PATH_PREFIX}/workspace",
|
||||
f"{VIRTUAL_PATH_PREFIX}/uploads",
|
||||
f"{VIRTUAL_PATH_PREFIX}/outputs",
|
||||
)
|
||||
_ALLOWED_IMAGE_VIRTUAL_ROOTS_TEXT = ", ".join(_ALLOWED_IMAGE_VIRTUAL_ROOTS)
|
||||
_MAX_IMAGE_BYTES = 20 * 1024 * 1024
|
||||
_EXTENSION_TO_MIME = {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".webp": "image/webp",
|
||||
}
|
||||
|
||||
|
||||
def _is_allowed_image_virtual_path(image_path: str) -> bool:
|
||||
return any(image_path == root or image_path.startswith(f"{root}/") for root in _ALLOWED_IMAGE_VIRTUAL_ROOTS)
|
||||
|
||||
|
||||
def _detect_image_mime(image_data: bytes) -> str | None:
|
||||
if image_data.startswith(b"\xff\xd8\xff"):
|
||||
return "image/jpeg"
|
||||
if image_data.startswith(b"\x89PNG\r\n\x1a\n"):
|
||||
return "image/png"
|
||||
if len(image_data) >= 12 and image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
|
||||
return "image/webp"
|
||||
return None
|
||||
|
||||
|
||||
def _sanitize_image_error(error: Exception, thread_data: ThreadDataState | None) -> str:
|
||||
from deerflow.sandbox.tools import mask_local_paths_in_output
|
||||
|
||||
return mask_local_paths_in_output(f"{type(error).__name__}: {error}", thread_data)
|
||||
|
||||
|
||||
@tool("view_image", parse_docstring=True)
|
||||
@@ -29,22 +64,39 @@ def view_image_tool(
|
||||
- For multiple files at once (use present_files instead)
|
||||
|
||||
Args:
|
||||
image_path: Absolute path to the image file. Common formats supported: jpg, jpeg, png, webp.
|
||||
image_path: Absolute /mnt/user-data virtual path to the image file. Common formats supported: jpg, jpeg, png, webp.
|
||||
"""
|
||||
from deerflow.sandbox.tools import get_thread_data, replace_virtual_path
|
||||
from deerflow.sandbox.exceptions import SandboxRuntimeError
|
||||
from deerflow.sandbox.tools import (
|
||||
get_thread_data,
|
||||
resolve_and_validate_user_data_path,
|
||||
validate_local_tool_path,
|
||||
)
|
||||
|
||||
# Replace virtual path with actual path
|
||||
# /mnt/user-data/* paths are mapped to thread-specific directories
|
||||
thread_data = get_thread_data(runtime)
|
||||
actual_path = replace_virtual_path(image_path, thread_data)
|
||||
|
||||
# Validate that the path is absolute
|
||||
path = Path(actual_path)
|
||||
if not path.is_absolute():
|
||||
if not _is_allowed_image_virtual_path(image_path):
|
||||
return Command(
|
||||
update={"messages": [ToolMessage(f"Error: Path must be absolute, got: {image_path}", tool_call_id=tool_call_id)]},
|
||||
update={
|
||||
"messages": [
|
||||
ToolMessage(
|
||||
f"Error: Only image paths under {_ALLOWED_IMAGE_VIRTUAL_ROOTS_TEXT} are allowed",
|
||||
tool_call_id=tool_call_id,
|
||||
)
|
||||
]
|
||||
},
|
||||
)
|
||||
|
||||
try:
|
||||
validate_local_tool_path(image_path, thread_data, read_only=True)
|
||||
actual_path = resolve_and_validate_user_data_path(image_path, thread_data)
|
||||
except (PermissionError, SandboxRuntimeError) as e:
|
||||
return Command(
|
||||
update={"messages": [ToolMessage(f"Error: {str(e)}", tool_call_id=tool_call_id)]},
|
||||
)
|
||||
|
||||
path = Path(actual_path)
|
||||
|
||||
# Validate that the file exists
|
||||
if not path.exists():
|
||||
return Command(
|
||||
@@ -58,34 +110,49 @@ def view_image_tool(
|
||||
)
|
||||
|
||||
# Validate image extension
|
||||
valid_extensions = {".jpg", ".jpeg", ".png", ".webp"}
|
||||
if path.suffix.lower() not in valid_extensions:
|
||||
expected_mime_type = _EXTENSION_TO_MIME.get(path.suffix.lower())
|
||||
if expected_mime_type is None:
|
||||
return Command(
|
||||
update={"messages": [ToolMessage(f"Error: Unsupported image format: {path.suffix}. Supported formats: {', '.join(valid_extensions)}", tool_call_id=tool_call_id)]},
|
||||
update={"messages": [ToolMessage(f"Error: Unsupported image format: {path.suffix}. Supported formats: {', '.join(_EXTENSION_TO_MIME)}", tool_call_id=tool_call_id)]},
|
||||
)
|
||||
|
||||
# Detect MIME type from file extension
|
||||
mime_type, _ = mimetypes.guess_type(actual_path)
|
||||
if mime_type is None:
|
||||
# Fallback to default MIME types for common image formats
|
||||
extension_to_mime = {
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".png": "image/png",
|
||||
".webp": "image/webp",
|
||||
}
|
||||
mime_type = extension_to_mime.get(path.suffix.lower(), "application/octet-stream")
|
||||
mime_type = expected_mime_type
|
||||
|
||||
try:
|
||||
image_size = path.stat().st_size
|
||||
except OSError as e:
|
||||
return Command(
|
||||
update={"messages": [ToolMessage(f"Error reading image metadata: {_sanitize_image_error(e, thread_data)}", tool_call_id=tool_call_id)]},
|
||||
)
|
||||
if image_size > _MAX_IMAGE_BYTES:
|
||||
return Command(
|
||||
update={"messages": [ToolMessage(f"Error: Image file is too large: {image_size} bytes. Maximum supported size is {_MAX_IMAGE_BYTES} bytes", tool_call_id=tool_call_id)]},
|
||||
)
|
||||
|
||||
# Read image file and convert to base64
|
||||
try:
|
||||
with open(actual_path, "rb") as f:
|
||||
image_data = f.read()
|
||||
image_base64 = base64.b64encode(image_data).decode("utf-8")
|
||||
except Exception as e:
|
||||
return Command(
|
||||
update={"messages": [ToolMessage(f"Error reading image file: {str(e)}", tool_call_id=tool_call_id)]},
|
||||
update={"messages": [ToolMessage(f"Error reading image file: {_sanitize_image_error(e, thread_data)}", tool_call_id=tool_call_id)]},
|
||||
)
|
||||
|
||||
detected_mime_type = _detect_image_mime(image_data)
|
||||
if detected_mime_type is None:
|
||||
return Command(
|
||||
update={"messages": [ToolMessage("Error: File contents do not match a supported image format", tool_call_id=tool_call_id)]},
|
||||
)
|
||||
if detected_mime_type != expected_mime_type:
|
||||
return Command(
|
||||
update={"messages": [ToolMessage(f"Error: Image contents are {detected_mime_type}, but file extension indicates {expected_mime_type}", tool_call_id=tool_call_id)]},
|
||||
)
|
||||
mime_type = detected_mime_type
|
||||
image_base64 = base64.b64encode(image_data).decode("utf-8")
|
||||
|
||||
# Update viewed_images in state
|
||||
# The merge_viewed_images reducer will handle merging with existing images
|
||||
new_viewed_images = {image_path: {"base64": image_base64, "mime_type": mime_type}}
|
||||
|
||||
Reference in New Issue
Block a user