Merge branch 'main' into release/2.0-rc

2026-05-21 23:46:50 +00:00 · 2026-04-28 15:44:02 +08:00
parent e82940c03d 395c14357b
commit 844ad8e528
20 changed files with 1531 additions and 98 deletions
@@ -254,9 +254,11 @@ def _assemble_from_features(
            from deerflow.agents.middlewares.view_image_middleware import ViewImageMiddleware

            chain.append(ViewImageMiddleware())
-        from deerflow.tools.builtins import view_image_tool

-        extra_tools.append(view_image_tool)
+        if feat.sandbox is not False:
+            from deerflow.tools.builtins import view_image_tool
+
+            extra_tools.append(view_image_tool)

    # --- [11] Subagent ---
    if feat.subagent is not False:
@@ -9,6 +9,7 @@ from __future__ import annotations
 import json
 import logging
 import os
+import shlex
 import subprocess
 from datetime import datetime

@@ -86,6 +87,46 @@ def _format_container_mount(runtime: str, host_path: str, container_path: str, r
    return ["-v", mount_spec]


+def _redact_container_command_for_log(cmd: list[str]) -> list[str]:
+    """Return a Docker/Container command with environment values redacted."""
+    redacted: list[str] = []
+    redact_next_env = False
+
+    for arg in cmd:
+        if redact_next_env:
+            if "=" in arg:
+                key = arg.split("=", 1)[0]
+                redacted.append(f"{key}=<redacted>" if key else "<redacted>")
+            else:
+                redacted.append(arg)
+            redact_next_env = False
+            continue
+
+        if arg in {"-e", "--env"}:
+            redacted.append(arg)
+            redact_next_env = True
+            continue
+
+        if arg.startswith("--env="):
+            value = arg.removeprefix("--env=")
+            if "=" in value:
+                key = value.split("=", 1)[0]
+                redacted.append(f"--env={key}=<redacted>" if key else "--env=<redacted>")
+            else:
+                redacted.append(arg)
+            continue
+
+        redacted.append(arg)
+
+    return redacted
+
+
+def _format_container_command_for_log(cmd: list[str]) -> str:
+    if os.name == "nt":
+        return subprocess.list2cmdline(cmd)
+    return shlex.join(cmd)
+
+
 class LocalContainerBackend(SandboxBackend):
    """Backend that manages sandbox containers locally using Docker or Apple Container.

@@ -464,7 +505,8 @@ class LocalContainerBackend(SandboxBackend):

        cmd.append(self._image)

-        logger.info(f"Starting container using {self._runtime}: {' '.join(cmd)}")
+        log_cmd = _format_container_command_for_log(_redact_container_command_for_log(cmd))
+        logger.info(f"Starting container using {self._runtime}: {log_cmd}")

        try:
            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
@@ -1,4 +1,5 @@
 import ast
+import html
 import json
 import re
 import uuid
@@ -36,8 +37,8 @@ def _fix_messages(messages: list) -> list:
        if isinstance(msg, AIMessage) and getattr(msg, "tool_calls", []):
            xml_parts = []
            for tool in msg.tool_calls:
-                args_xml = " ".join(f"<parameter={k}>{json.dumps(v, ensure_ascii=False)}</parameter>" for k, v in tool.get("args", {}).items())
-                xml_parts.append(f"<tool_call> <function={tool['name']}> {args_xml} </function> </tool_call>")
+                args_xml = " ".join(f"<parameter={html.escape(str(k), quote=False)}>{html.escape(v if isinstance(v, str) else json.dumps(v, ensure_ascii=False), quote=False)}</parameter>" for k, v in tool.get("args", {}).items())
+                xml_parts.append(f"<tool_call> <function={html.escape(str(tool['name']), quote=False)}> {args_xml} </function> </tool_call>")
            full_text = f"{text}\n" + "\n".join(xml_parts) if text else "\n".join(xml_parts)
            fixed.append(AIMessage(content=full_text.strip() or " "))
            continue
@@ -80,13 +81,24 @@ def _parse_xml_tool_call_to_dict(content: str) -> tuple[str, list[dict]]:
        func_match = re.search(r"<function=([^>]+)>", inner_content)
        if not func_match:
            continue
-        function_name = func_match.group(1).strip()
+        function_name = html.unescape(func_match.group(1).strip())
+
+        # Ignore nested tool blocks when extracting parameters for this call.
+        # Nested `<tool_call>` sections represent separate invocations and
+        # their `<parameter>` tags must not leak into the current call args.
+        param_source_parts: list[str] = []
+        nested_cursor = 0
+        for nested_start, nested_end, _ in _iter_tool_call_blocks(inner_content):
+            param_source_parts.append(inner_content[nested_cursor:nested_start])
+            nested_cursor = nested_end
+        param_source_parts.append(inner_content[nested_cursor:])
+        param_source = "".join(param_source_parts)

        args = {}
        param_pattern = re.compile(r"<parameter=([^>]+)>(.*?)</parameter>", re.DOTALL)
-        for param_match in param_pattern.finditer(inner_content):
-            key = param_match.group(1).strip()
-            raw_value = param_match.group(2).strip()
+        for param_match in param_pattern.finditer(param_source):
+            key = html.unescape(param_match.group(1).strip())
+            raw_value = html.unescape(param_match.group(2).strip())

            # Attempt to deserialize string values into native Python types
            # to satisfy downstream Pydantic validation.
@@ -22,6 +22,13 @@ def list_dir(path: str, max_depth: int = 2) -> list[str]:
    if not root_path.is_dir():
        return result

+    def _is_within_root(candidate: Path) -> bool:
+        try:
+            candidate.relative_to(root_path)
+            return True
+        except ValueError:
+            return False
+
    def _traverse(current_path: Path, current_depth: int) -> None:
        """Recursively traverse directories up to max_depth."""
        if current_depth > max_depth:
@@ -32,8 +39,23 @@ def list_dir(path: str, max_depth: int = 2) -> list[str]:
                if should_ignore_name(item.name):
                    continue

+                if item.is_symlink():
+                    try:
+                        item_resolved = item.resolve()
+                        if not _is_within_root(item_resolved):
+                            continue
+                    except OSError:
+                        continue
+                    post_fix = "/" if item_resolved.is_dir() else ""
+                    result.append(str(item_resolved) + post_fix)
+                    continue
+
+                item_resolved = item.resolve()
+                if not _is_within_root(item_resolved):
+                    continue
+
                post_fix = "/" if item.is_dir() else ""
-                result.append(str(item.resolve()) + post_fix)
+                result.append(str(item_resolved) + post_fix)

                # Recurse into subdirectories if not at max depth
                if item.is_dir() and current_depth < max_depth:
@@ -5,6 +5,7 @@ import shutil
 import subprocess
 from dataclasses import dataclass
 from pathlib import Path
+from typing import NamedTuple

 from deerflow.sandbox.local.list_dir import list_dir
 from deerflow.sandbox.sandbox import Sandbox
@@ -20,6 +21,11 @@ class PathMapping:
    read_only: bool = False


+class ResolvedPath(NamedTuple):
+    path: str
+    mapping: PathMapping | None
+
+
 class LocalSandbox(Sandbox):
    @staticmethod
    def _shell_name(shell: str) -> str:
@@ -91,7 +97,23 @@ class LocalSandbox(Sandbox):

        return best_mapping.read_only

-    def _resolve_path(self, path: str) -> str:
+    def _find_path_mapping(self, path: str) -> tuple[PathMapping, str] | None:
+        path_str = str(path)
+
+        for mapping in sorted(self.path_mappings, key=lambda m: len(m.container_path.rstrip("/") or "/"), reverse=True):
+            container_path = mapping.container_path.rstrip("/") or "/"
+            if container_path == "/":
+                if path_str.startswith("/"):
+                    return mapping, path_str.lstrip("/")
+                continue
+
+            if path_str == container_path or path_str.startswith(container_path + "/"):
+                relative = path_str[len(container_path) :].lstrip("/")
+                return mapping, relative
+
+        return None
+
+    def _resolve_path_with_mapping(self, path: str) -> ResolvedPath:
        """
        Resolve container path to actual local path using mappings.

@@ -99,22 +121,30 @@ class LocalSandbox(Sandbox):
            path: Path that might be a container path

        Returns:
-            Resolved local path
+            Resolved local path and the matched mapping, if any
        """
        path_str = str(path)

-        # Try each mapping (longest prefix first for more specific matches)
-        for mapping in sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True):
-            container_path = mapping.container_path
-            local_path = mapping.local_path
-            if path_str == container_path or path_str.startswith(container_path + "/"):
-                # Replace the container path prefix with local path
-                relative = path_str[len(container_path) :].lstrip("/")
-                resolved = str(Path(local_path) / relative) if relative else local_path
-                return resolved
+        mapping_match = self._find_path_mapping(path_str)
+        if mapping_match is None:
+            return ResolvedPath(path_str, None)

-        # No mapping found, return original path
-        return path_str
+        mapping, relative = mapping_match
+        local_root = Path(mapping.local_path).resolve()
+        resolved_path = (local_root / relative).resolve() if relative else local_root
+
+        try:
+            resolved_path.relative_to(local_root)
+        except ValueError as exc:
+            raise PermissionError(errno.EACCES, "Access denied: path escapes mounted directory", path_str) from exc
+
+        return ResolvedPath(str(resolved_path), mapping)
+
+    def _resolve_path(self, path: str) -> str:
+        return self._resolve_path_with_mapping(path).path
+
+    def _is_resolved_path_read_only(self, resolved: ResolvedPath) -> bool:
+        return bool(resolved.mapping and resolved.mapping.read_only) or self._is_read_only_path(resolved.path)

    def _reverse_resolve_path(self, path: str) -> str:
        """
@@ -309,8 +339,14 @@ class LocalSandbox(Sandbox):
    def list_dir(self, path: str, max_depth=2) -> list[str]:
        resolved_path = self._resolve_path(path)
        entries = list_dir(resolved_path, max_depth)
-        # Reverse resolve local paths back to container paths in output
-        return [self._reverse_resolve_paths_in_output(entry) for entry in entries]
+        # Reverse resolve local paths back to container paths and preserve
+        # list_dir's trailing "/" marker for directories.
+        result: list[str] = []
+        for entry in entries:
+            is_dir = entry.endswith(("/", "\\"))
+            reversed_entry = self._reverse_resolve_path(entry.rstrip("/\\")) if is_dir else self._reverse_resolve_path(entry)
+            result.append(f"{reversed_entry}/" if is_dir and not reversed_entry.endswith("/") else reversed_entry)
+        return result

    def read_file(self, path: str) -> str:
        resolved_path = self._resolve_path(path)
@@ -329,8 +365,9 @@ class LocalSandbox(Sandbox):
            raise type(e)(e.errno, e.strerror, path) from None

    def write_file(self, path: str, content: str, append: bool = False) -> None:
-        resolved_path = self._resolve_path(path)
-        if self._is_read_only_path(resolved_path):
+        resolved = self._resolve_path_with_mapping(path)
+        resolved_path = resolved.path
+        if self._is_resolved_path_read_only(resolved):
            raise OSError(errno.EROFS, "Read-only file system", path)
        try:
            dir_path = os.path.dirname(resolved_path)
@@ -384,8 +421,9 @@ class LocalSandbox(Sandbox):
        ], truncated

    def update_file(self, path: str, content: bytes) -> None:
-        resolved_path = self._resolve_path(path)
-        if self._is_read_only_path(resolved_path):
+        resolved = self._resolve_path_with_mapping(path)
+        resolved_path = resolved.path
+        if self._is_resolved_path_read_only(resolved):
            raise OSError(errno.EROFS, "Read-only file system", path)
        try:
            dir_path = os.path.dirname(resolved_path)
@@ -22,6 +22,9 @@ from deerflow.sandbox.security import LOCAL_HOST_BASH_DISABLED_MESSAGE, is_host_

 _ABSOLUTE_PATH_PATTERN = re.compile(r"(?<![:\w])(?<!:/)/(?:[^\s\"'`;&|<>()]+)")
 _FILE_URL_PATTERN = re.compile(r"\bfile://\S+", re.IGNORECASE)
+_URL_WITH_SCHEME_PATTERN = re.compile(r"^[a-z][a-z0-9+.-]*://", re.IGNORECASE)
+_URL_IN_COMMAND_PATTERN = re.compile(r"\b[a-z][a-z0-9+.-]*://[^\s\"'`;&|<>()]+", re.IGNORECASE)
+_DOTDOT_PATH_SEGMENT_PATTERN = re.compile(r"(?:^|[/\\=])\.\.(?:$|[/\\])")
 _LOCAL_BASH_SYSTEM_PATH_PREFIXES = (
    "/bin/",
    "/usr/bin/",
@@ -37,6 +40,42 @@ _DEFAULT_GLOB_MAX_RESULTS = 200
 _MAX_GLOB_MAX_RESULTS = 1000
 _DEFAULT_GREP_MAX_RESULTS = 100
 _MAX_GREP_MAX_RESULTS = 500
+_LOCAL_BASH_CWD_COMMANDS = {"cd", "pushd"}
+_LOCAL_BASH_COMMAND_WRAPPERS = {"command", "builtin"}
+_LOCAL_BASH_COMMAND_PREFIX_KEYWORDS = {"!", "{", "case", "do", "elif", "else", "for", "if", "select", "then", "time", "until", "while"}
+_LOCAL_BASH_COMMAND_END_KEYWORDS = {"}", "done", "esac", "fi"}
+_LOCAL_BASH_ROOT_PATH_COMMANDS = {
+    "awk",
+    "cat",
+    "cp",
+    "du",
+    "find",
+    "grep",
+    "head",
+    "less",
+    "ln",
+    "ls",
+    "more",
+    "mv",
+    "rm",
+    "sed",
+    "tail",
+    "tar",
+}
+_SHELL_COMMAND_SEPARATORS = {";", "&&", "||", "|", "|&", "&", "(", ")"}
+_SHELL_REDIRECTION_OPERATORS = {
+    "<",
+    ">",
+    "<<",
+    ">>",
+    "<<<",
+    "<>",
+    ">&",
+    "<&",
+    "&>",
+    "&>>",
+    ">|",
+}


 def _get_skills_container_path() -> str:
@@ -549,7 +588,7 @@ def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *,
    This function is a security gate — it checks whether *path* may be
    accessed and raises on violation.  It does **not** resolve the virtual
    path to a host path; callers are responsible for resolution via
-    ``_resolve_and_validate_user_data_path`` or ``_resolve_skills_path``.
+    ``resolve_and_validate_user_data_path`` or ``_resolve_skills_path``.

    Allowed virtual-path families:
      - ``/mnt/user-data/*``  — always allowed (read + write)
@@ -636,6 +675,219 @@ def _resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState
    return str(resolved)


+def _is_non_file_url_token(token: str) -> bool:
+    """Return True for URL tokens that should not be interpreted as paths."""
+    values = [token]
+    if "=" in token:
+        values.append(token.split("=", 1)[1])
+
+    for value in values:
+        match = _URL_WITH_SCHEME_PATTERN.match(value)
+        if match and not value.lower().startswith("file://"):
+            return True
+    return False
+
+
+def _non_file_url_spans(command: str) -> list[tuple[int, int]]:
+    spans = []
+    for match in _URL_IN_COMMAND_PATTERN.finditer(command):
+        if not match.group().lower().startswith("file://"):
+            spans.append(match.span())
+    return spans
+
+
+def _is_in_spans(position: int, spans: list[tuple[int, int]]) -> bool:
+    return any(start <= position < end for start, end in spans)
+
+
+def _has_dotdot_path_segment(token: str) -> bool:
+    if _is_non_file_url_token(token):
+        return False
+    return bool(_DOTDOT_PATH_SEGMENT_PATTERN.search(token))
+
+
+def _split_shell_tokens(command: str) -> list[str]:
+    try:
+        normalized = command.replace("\r\n", "\n").replace("\r", "\n").replace("\n", " ; ")
+        lexer = shlex.shlex(normalized, posix=True, punctuation_chars=True)
+        lexer.whitespace_split = True
+        lexer.commenters = ""
+        return list(lexer)
+    except ValueError:
+        # The shell will reject malformed quoting later; keep validation
+        # best-effort instead of turning syntax errors into security messages.
+        return command.split()
+
+
+def _is_shell_command_separator(token: str) -> bool:
+    return token in _SHELL_COMMAND_SEPARATORS
+
+
+def _is_shell_redirection_operator(token: str) -> bool:
+    return token in _SHELL_REDIRECTION_OPERATORS
+
+
+def _is_shell_assignment(token: str) -> bool:
+    name, separator, _ = token.partition("=")
+    if not separator or not name:
+        return False
+    return bool(re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", name))
+
+
+def _is_allowed_local_bash_absolute_path(path: str, allowed_paths: list[str], *, allow_system_paths: bool) -> bool:
+    # Check for MCP filesystem server allowed paths
+    if any(path.startswith(allowed_path) or path == allowed_path.rstrip("/") for allowed_path in allowed_paths):
+        _reject_path_traversal(path)
+        return True
+
+    if path == VIRTUAL_PATH_PREFIX or path.startswith(f"{VIRTUAL_PATH_PREFIX}/"):
+        _reject_path_traversal(path)
+        return True
+
+    # Allow skills container path (resolved by tools.py before passing to sandbox)
+    if _is_skills_path(path):
+        _reject_path_traversal(path)
+        return True
+
+    # Allow ACP workspace path (path-traversal check only)
+    if _is_acp_workspace_path(path):
+        _reject_path_traversal(path)
+        return True
+
+    # Allow custom mount container paths
+    if _is_custom_mount_path(path):
+        _reject_path_traversal(path)
+        return True
+
+    if allow_system_paths and any(path == prefix.rstrip("/") or path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES):
+        return True
+
+    return False
+
+
+def _next_cd_target(tokens: list[str], start_index: int) -> tuple[str | None, int]:
+    index = start_index
+    while index < len(tokens):
+        token = tokens[index]
+        if _is_shell_command_separator(token):
+            return None, index
+        if _is_shell_redirection_operator(token):
+            index += 2
+            continue
+        if token == "--":
+            index += 1
+            continue
+        if token in {"-L", "-P", "-e", "-@"}:
+            index += 1
+            continue
+        if token.startswith("-") and token != "-":
+            index += 1
+            continue
+        return token, index + 1
+    return None, index
+
+
+def _validate_local_bash_cwd_target(command_name: str, target: str | None, allowed_paths: list[str]) -> None:
+    if target is None or target == "-":
+        raise PermissionError(f"Unsafe working directory change in command: {command_name}. Use paths under {VIRTUAL_PATH_PREFIX}")
+    if target.startswith(("$", "`")):
+        raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}")
+    if target.startswith("~"):
+        raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}")
+    if target.startswith("/"):
+        _reject_path_traversal(target)
+        if not _is_allowed_local_bash_absolute_path(target, allowed_paths, allow_system_paths=False):
+            raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}")
+
+
+def _looks_like_unsafe_cwd_target(target: str | None) -> bool:
+    if target is None:
+        return False
+    return target == "-" or target.startswith(("$", "`", "~", "/", "..")) or _has_dotdot_path_segment(target)
+
+
+def _validate_local_bash_root_path_args(command_name: str, tokens: list[str], start_index: int) -> None:
+    if command_name not in _LOCAL_BASH_ROOT_PATH_COMMANDS:
+        return
+
+    index = start_index
+    while index < len(tokens):
+        token = tokens[index]
+        if _is_shell_command_separator(token):
+            return
+        if _is_shell_redirection_operator(token):
+            index += 2
+            continue
+        if token == "/" and not _is_non_file_url_token(token):
+            raise PermissionError(f"Unsafe absolute paths in command: /. Use paths under {VIRTUAL_PATH_PREFIX}")
+        index += 1
+
+
+def _validate_local_bash_shell_tokens(command: str, allowed_paths: list[str]) -> None:
+    """Conservatively reject relative path escapes missed by absolute-path scanning."""
+    if re.search(r"\$\([^)]*\b(?:cd|pushd)\b", command):
+        raise PermissionError(f"Unsafe working directory change in command substitution. Use paths under {VIRTUAL_PATH_PREFIX}")
+
+    tokens = _split_shell_tokens(command)
+
+    for token in tokens:
+        if _is_shell_command_separator(token) or _is_shell_redirection_operator(token):
+            continue
+        if _has_dotdot_path_segment(token):
+            raise PermissionError("Access denied: path traversal detected")
+
+    at_command_start = True
+    index = 0
+    while index < len(tokens):
+        token = tokens[index]
+
+        if _is_shell_command_separator(token):
+            at_command_start = True
+            index += 1
+            continue
+
+        if _is_shell_redirection_operator(token):
+            index += 1
+            continue
+
+        if at_command_start and _is_shell_assignment(token):
+            index += 1
+            continue
+
+        command_name = token.rsplit("/", 1)[-1]
+        if at_command_start and command_name in _LOCAL_BASH_COMMAND_PREFIX_KEYWORDS | _LOCAL_BASH_COMMAND_END_KEYWORDS:
+            index += 1
+            continue
+
+        if not at_command_start:
+            index += 1
+            continue
+
+        at_command_start = False
+        if command_name in _LOCAL_BASH_COMMAND_WRAPPERS and index + 1 < len(tokens):
+            wrapped_name = tokens[index + 1].rsplit("/", 1)[-1]
+            if wrapped_name in _LOCAL_BASH_CWD_COMMANDS:
+                target, next_index = _next_cd_target(tokens, index + 2)
+                _validate_local_bash_cwd_target(wrapped_name, target, allowed_paths)
+                index = next_index
+                continue
+            _validate_local_bash_root_path_args(wrapped_name, tokens, index + 2)
+
+        if command_name not in _LOCAL_BASH_CWD_COMMANDS:
+            _validate_local_bash_root_path_args(command_name, tokens, index + 1)
+            index += 1
+            continue
+
+        target, next_index = _next_cd_target(tokens, index + 1)
+        _validate_local_bash_cwd_target(command_name, target, allowed_paths)
+        index = next_index
+
+
+def resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState) -> str:
+    """Resolve a /mnt/user-data virtual path and validate it stays in bounds."""
+    return _resolve_and_validate_user_data_path(path, thread_data)
+
+
 def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState | None) -> None:
    """Validate absolute paths in local-sandbox bash commands.

@@ -661,33 +913,14 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState

    unsafe_paths: list[str] = []
    allowed_paths = _get_mcp_allowed_paths()
+    _validate_local_bash_shell_tokens(command, allowed_paths)
+    url_spans = _non_file_url_spans(command)

-    for absolute_path in _ABSOLUTE_PATH_PATTERN.findall(command):
-        # Check for MCP filesystem server allowed paths
-        if any(absolute_path.startswith(path) or absolute_path == path.rstrip("/") for path in allowed_paths):
-            _reject_path_traversal(absolute_path)
+    for match in _ABSOLUTE_PATH_PATTERN.finditer(command):
+        if _is_in_spans(match.start(), url_spans):
            continue
-
-        if absolute_path == VIRTUAL_PATH_PREFIX or absolute_path.startswith(f"{VIRTUAL_PATH_PREFIX}/"):
-            _reject_path_traversal(absolute_path)
-            continue
-
-        # Allow skills container path (resolved by tools.py before passing to sandbox)
-        if _is_skills_path(absolute_path):
-            _reject_path_traversal(absolute_path)
-            continue
-
-        # Allow ACP workspace path (path-traversal check only)
-        if _is_acp_workspace_path(absolute_path):
-            _reject_path_traversal(absolute_path)
-            continue
-
-        # Allow custom mount container paths
-        if _is_custom_mount_path(absolute_path):
-            _reject_path_traversal(absolute_path)
-            continue
-
-        if any(absolute_path == prefix.rstrip("/") or absolute_path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES):
+        absolute_path = match.group()
+        if _is_allowed_local_bash_absolute_path(absolute_path, allowed_paths, allow_system_paths=True):
            continue

        unsafe_paths.append(absolute_path)
@@ -1,4 +1,4 @@
-from .installer import SkillAlreadyExistsError, install_skill_from_archive
+from .installer import SkillAlreadyExistsError, SkillSecurityScanError, ainstall_skill_from_archive, install_skill_from_archive
 from .loader import get_skills_root_path, load_skills
 from .types import Skill
 from .validation import ALLOWED_FRONTMATTER_PROPERTIES, _validate_skill_frontmatter
@@ -10,5 +10,7 @@ __all__ = [
    "ALLOWED_FRONTMATTER_PROPERTIES",
    "_validate_skill_frontmatter",
    "install_skill_from_archive",
+    "ainstall_skill_from_archive",
    "SkillAlreadyExistsError",
+    "SkillSecurityScanError",
 ]
@@ -4,6 +4,8 @@ Pure business logic — no FastAPI/HTTP dependencies.
 Both Gateway and Client delegate to these functions.
 """

+import asyncio
+import concurrent.futures
 import logging
 import posixpath
 import shutil
@@ -13,15 +15,23 @@ import zipfile
 from pathlib import Path, PurePosixPath, PureWindowsPath

 from deerflow.skills.loader import get_skills_root_path
+from deerflow.skills.security_scanner import scan_skill_content
 from deerflow.skills.validation import _validate_skill_frontmatter

 logger = logging.getLogger(__name__)

+_PROMPT_INPUT_DIRS = {"references", "templates"}
+_PROMPT_INPUT_SUFFIXES = frozenset({".json", ".markdown", ".md", ".rst", ".txt", ".yaml", ".yml"})
+

 class SkillAlreadyExistsError(ValueError):
    """Raised when a skill with the same name is already installed."""


+class SkillSecurityScanError(ValueError):
+    """Raised when a skill archive fails security scanning."""
+
+
 def is_unsafe_zip_member(info: zipfile.ZipInfo) -> bool:
    """Return True if the zip member path is absolute or attempts directory traversal."""
    name = info.filename
@@ -114,7 +124,78 @@ def safe_extract_skill_archive(
                dst.write(chunk)


-def install_skill_from_archive(
+def _is_script_support_file(rel_path: Path) -> bool:
+    return bool(rel_path.parts) and rel_path.parts[0] == "scripts"
+
+
+def _should_scan_support_file(rel_path: Path) -> bool:
+    if _is_script_support_file(rel_path):
+        return True
+    return bool(rel_path.parts) and rel_path.parts[0] in _PROMPT_INPUT_DIRS and rel_path.suffix.lower() in _PROMPT_INPUT_SUFFIXES
+
+
+def _move_staged_skill_into_reserved_target(staging_target: Path, target: Path) -> None:
+    installed = False
+    reserved = False
+    try:
+        target.mkdir(mode=0o700)
+        reserved = True
+        for child in staging_target.iterdir():
+            shutil.move(str(child), target / child.name)
+        installed = True
+    except FileExistsError as e:
+        raise SkillAlreadyExistsError(f"Skill '{target.name}' already exists") from e
+    finally:
+        if reserved and not installed and target.exists():
+            shutil.rmtree(target)
+
+
+async def _scan_skill_file_or_raise(skill_dir: Path, path: Path, skill_name: str, *, executable: bool) -> None:
+    rel_path = path.relative_to(skill_dir).as_posix()
+    location = f"{skill_name}/{rel_path}"
+    try:
+        content = path.read_text(encoding="utf-8")
+    except UnicodeDecodeError as e:
+        raise SkillSecurityScanError(f"Security scan failed for skill '{skill_name}': {location} must be valid UTF-8") from e
+
+    try:
+        result = await scan_skill_content(content, executable=executable, location=location)
+    except Exception as e:
+        raise SkillSecurityScanError(f"Security scan failed for {location}: {e}") from e
+
+    decision = getattr(result, "decision", None)
+    reason = str(getattr(result, "reason", "") or "No reason provided.")
+    if decision == "block":
+        if rel_path == "SKILL.md":
+            raise SkillSecurityScanError(f"Security scan blocked skill '{skill_name}': {reason}")
+        raise SkillSecurityScanError(f"Security scan blocked {location}: {reason}")
+    if executable and decision != "allow":
+        raise SkillSecurityScanError(f"Security scan rejected executable {location}: {reason}")
+    if decision not in {"allow", "warn"}:
+        raise SkillSecurityScanError(f"Security scan failed for {location}: invalid scanner decision {decision!r}")
+
+
+async def _scan_skill_archive_contents_or_raise(skill_dir: Path, skill_name: str) -> None:
+    """Run the skill security scanner against all installable text and script files."""
+    skill_md = skill_dir / "SKILL.md"
+    await _scan_skill_file_or_raise(skill_dir, skill_md, skill_name, executable=False)
+
+    for path in sorted(skill_dir.rglob("*")):
+        if not path.is_file():
+            continue
+
+        rel_path = path.relative_to(skill_dir)
+        if rel_path == Path("SKILL.md"):
+            continue
+        if path.name == "SKILL.md":
+            raise SkillSecurityScanError(f"Security scan failed for skill '{skill_name}': nested SKILL.md is not allowed at {skill_name}/{rel_path.as_posix()}")
+        if not _should_scan_support_file(rel_path):
+            continue
+
+        await _scan_skill_file_or_raise(skill_dir, path, skill_name, executable=_is_script_support_file(rel_path))
+
+
+async def ainstall_skill_from_archive(
    zip_path: str | Path,
    *,
    skills_root: Path | None = None,
@@ -173,7 +254,12 @@ def install_skill_from_archive(
        if target.exists():
            raise SkillAlreadyExistsError(f"Skill '{skill_name}' already exists")

-        shutil.copytree(skill_dir, target)
+        await _scan_skill_archive_contents_or_raise(skill_dir, skill_name)
+
+        with tempfile.TemporaryDirectory(prefix=f".installing-{skill_name}-", dir=custom_dir) as staging_root:
+            staging_target = Path(staging_root) / skill_name
+            shutil.copytree(skill_dir, staging_target)
+            _move_staged_skill_into_reserved_target(staging_target, target)
        logger.info("Skill %r installed to %s", skill_name, target)

    return {
@@ -181,3 +267,24 @@ def install_skill_from_archive(
        "skill_name": skill_name,
        "message": f"Skill '{skill_name}' installed successfully",
    }
+
+
+def _run_async_install(coro):
+    try:
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = None
+
+    if loop is not None and loop.is_running():
+        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
+            return executor.submit(asyncio.run, coro).result()
+    return asyncio.run(coro)
+
+
+def install_skill_from_archive(
+    zip_path: str | Path,
+    *,
+    skills_root: Path | None = None,
+) -> dict:
+    """Install a skill from a .skill archive (ZIP)."""
+    return _run_async_install(ainstall_skill_from_archive(zip_path, skills_root=skills_root))
@@ -8,7 +8,42 @@ from langchain_core.messages import ToolMessage
 from langgraph.types import Command
 from langgraph.typing import ContextT

-from deerflow.agents.thread_state import ThreadState
+from deerflow.agents.thread_state import ThreadDataState, ThreadState
+from deerflow.config.paths import VIRTUAL_PATH_PREFIX
+
+_ALLOWED_IMAGE_VIRTUAL_ROOTS = (
+    f"{VIRTUAL_PATH_PREFIX}/workspace",
+    f"{VIRTUAL_PATH_PREFIX}/uploads",
+    f"{VIRTUAL_PATH_PREFIX}/outputs",
+)
+_ALLOWED_IMAGE_VIRTUAL_ROOTS_TEXT = ", ".join(_ALLOWED_IMAGE_VIRTUAL_ROOTS)
+_MAX_IMAGE_BYTES = 20 * 1024 * 1024
+_EXTENSION_TO_MIME = {
+    ".jpg": "image/jpeg",
+    ".jpeg": "image/jpeg",
+    ".png": "image/png",
+    ".webp": "image/webp",
+}
+
+
+def _is_allowed_image_virtual_path(image_path: str) -> bool:
+    return any(image_path == root or image_path.startswith(f"{root}/") for root in _ALLOWED_IMAGE_VIRTUAL_ROOTS)
+
+
+def _detect_image_mime(image_data: bytes) -> str | None:
+    if image_data.startswith(b"\xff\xd8\xff"):
+        return "image/jpeg"
+    if image_data.startswith(b"\x89PNG\r\n\x1a\n"):
+        return "image/png"
+    if len(image_data) >= 12 and image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
+        return "image/webp"
+    return None
+
+
+def _sanitize_image_error(error: Exception, thread_data: ThreadDataState | None) -> str:
+    from deerflow.sandbox.tools import mask_local_paths_in_output
+
+    return mask_local_paths_in_output(f"{type(error).__name__}: {error}", thread_data)


@tool("view_image", parse_docstring=True)
@@ -29,22 +64,39 @@ def view_image_tool(
    - For multiple files at once (use present_files instead)

    Args:
-        image_path: Absolute path to the image file. Common formats supported: jpg, jpeg, png, webp.
+        image_path: Absolute /mnt/user-data virtual path to the image file. Common formats supported: jpg, jpeg, png, webp.
    """
-    from deerflow.sandbox.tools import get_thread_data, replace_virtual_path
+    from deerflow.sandbox.exceptions import SandboxRuntimeError
+    from deerflow.sandbox.tools import (
+        get_thread_data,
+        resolve_and_validate_user_data_path,
+        validate_local_tool_path,
+    )

-    # Replace virtual path with actual path
-    # /mnt/user-data/* paths are mapped to thread-specific directories
    thread_data = get_thread_data(runtime)
-    actual_path = replace_virtual_path(image_path, thread_data)

-    # Validate that the path is absolute
-    path = Path(actual_path)
-    if not path.is_absolute():
+    if not _is_allowed_image_virtual_path(image_path):
        return Command(
-            update={"messages": [ToolMessage(f"Error: Path must be absolute, got: {image_path}", tool_call_id=tool_call_id)]},
+            update={
+                "messages": [
+                    ToolMessage(
+                        f"Error: Only image paths under {_ALLOWED_IMAGE_VIRTUAL_ROOTS_TEXT} are allowed",
+                        tool_call_id=tool_call_id,
+                    )
+                ]
+            },
        )

+    try:
+        validate_local_tool_path(image_path, thread_data, read_only=True)
+        actual_path = resolve_and_validate_user_data_path(image_path, thread_data)
+    except (PermissionError, SandboxRuntimeError) as e:
+        return Command(
+            update={"messages": [ToolMessage(f"Error: {str(e)}", tool_call_id=tool_call_id)]},
+        )
+
+    path = Path(actual_path)
+
    # Validate that the file exists
    if not path.exists():
        return Command(
@@ -58,34 +110,49 @@ def view_image_tool(
        )

    # Validate image extension
-    valid_extensions = {".jpg", ".jpeg", ".png", ".webp"}
-    if path.suffix.lower() not in valid_extensions:
+    expected_mime_type = _EXTENSION_TO_MIME.get(path.suffix.lower())
+    if expected_mime_type is None:
        return Command(
-            update={"messages": [ToolMessage(f"Error: Unsupported image format: {path.suffix}. Supported formats: {', '.join(valid_extensions)}", tool_call_id=tool_call_id)]},
+            update={"messages": [ToolMessage(f"Error: Unsupported image format: {path.suffix}. Supported formats: {', '.join(_EXTENSION_TO_MIME)}", tool_call_id=tool_call_id)]},
        )

    # Detect MIME type from file extension
    mime_type, _ = mimetypes.guess_type(actual_path)
    if mime_type is None:
-        # Fallback to default MIME types for common image formats
-        extension_to_mime = {
-            ".jpg": "image/jpeg",
-            ".jpeg": "image/jpeg",
-            ".png": "image/png",
-            ".webp": "image/webp",
-        }
-        mime_type = extension_to_mime.get(path.suffix.lower(), "application/octet-stream")
+        mime_type = expected_mime_type
+
+    try:
+        image_size = path.stat().st_size
+    except OSError as e:
+        return Command(
+            update={"messages": [ToolMessage(f"Error reading image metadata: {_sanitize_image_error(e, thread_data)}", tool_call_id=tool_call_id)]},
+        )
+    if image_size > _MAX_IMAGE_BYTES:
+        return Command(
+            update={"messages": [ToolMessage(f"Error: Image file is too large: {image_size} bytes. Maximum supported size is {_MAX_IMAGE_BYTES} bytes", tool_call_id=tool_call_id)]},
+        )

    # Read image file and convert to base64
    try:
        with open(actual_path, "rb") as f:
            image_data = f.read()
-            image_base64 = base64.b64encode(image_data).decode("utf-8")
    except Exception as e:
        return Command(
-            update={"messages": [ToolMessage(f"Error reading image file: {str(e)}", tool_call_id=tool_call_id)]},
+            update={"messages": [ToolMessage(f"Error reading image file: {_sanitize_image_error(e, thread_data)}", tool_call_id=tool_call_id)]},
        )

+    detected_mime_type = _detect_image_mime(image_data)
+    if detected_mime_type is None:
+        return Command(
+            update={"messages": [ToolMessage("Error: File contents do not match a supported image format", tool_call_id=tool_call_id)]},
+        )
+    if detected_mime_type != expected_mime_type:
+        return Command(
+            update={"messages": [ToolMessage(f"Error: Image contents are {detected_mime_type}, but file extension indicates {expected_mime_type}", tool_call_id=tool_call_id)]},
+        )
+    mime_type = detected_mime_type
+    image_base64 = base64.b64encode(image_data).decode("utf-8")
+
    # Update viewed_images in state
    # The merge_viewed_images reducer will handle merging with existing images
    new_viewed_images = {image_path: {"base64": image_base64, "mime_type": mime_type}}