feat(auth): authentication module with multi-tenant isolation (RFC-001)

Introduce an always-on auth layer with auto-created admin on first boot,
multi-tenant isolation for threads/stores, and a full setup/login flow.

Backend
- JWT access tokens with `ver` field for stale-token rejection; bump on
  password/email change
- Password hashing, HttpOnly+Secure cookies (Secure derived from request
  scheme at runtime)
- CSRF middleware covering both REST and LangGraph routes
- IP-based login rate limiting (5 attempts / 5-min lockout) with bounded
  dict growth and X-Forwarded-For bypass fix
- Multi-worker-safe admin auto-creation (single DB write, WAL once)
- needs_setup + token_version on User model; SQLite schema migration
- Thread/store isolation by owner; orphan thread migration on first admin
  registration
- thread_id validated as UUID to prevent log injection
- CLI tool to reset admin password
- Decorator-based authz module extracted from auth core

Frontend
- Login and setup pages with SSR guard for needs_setup flow
- Account settings page (change password / email)
- AuthProvider + route guards; skips redirect when no users registered
- i18n (en-US / zh-CN) for auth surfaces
- Typed auth API client; parseAuthError unwraps FastAPI detail envelope

Infra & tooling
- Unified `serve.sh` with gateway mode + auto dep install
- Public PyPI uv.toml pin for CI compatibility
- Regenerated uv.lock with public index

Tests
- HTTP vs HTTPS cookie security tests
- Auth middleware, rate limiter, CSRF, setup flow coverage
This commit is contained in:
greatmengqi
2026-04-08 00:31:43 +08:00
parent 636053fb6d
commit 27b66d6753
214 changed files with 18830 additions and 1065 deletions
@@ -1,72 +1,6 @@
import fnmatch
from pathlib import Path
IGNORE_PATTERNS = [
# Version Control
".git",
".svn",
".hg",
".bzr",
# Dependencies
"node_modules",
"__pycache__",
".venv",
"venv",
".env",
"env",
".tox",
".nox",
".eggs",
"*.egg-info",
"site-packages",
# Build outputs
"dist",
"build",
".next",
".nuxt",
".output",
".turbo",
"target",
"out",
# IDE & Editor
".idea",
".vscode",
"*.swp",
"*.swo",
"*~",
".project",
".classpath",
".settings",
# OS generated
".DS_Store",
"Thumbs.db",
"desktop.ini",
"*.lnk",
# Logs & temp files
"*.log",
"*.tmp",
"*.temp",
"*.bak",
"*.cache",
".cache",
"logs",
# Coverage & test artifacts
".coverage",
"coverage",
".nyc_output",
"htmlcov",
".pytest_cache",
".mypy_cache",
".ruff_cache",
]
def _should_ignore(name: str) -> bool:
"""Check if a file/directory name matches any ignore pattern."""
for pattern in IGNORE_PATTERNS:
if fnmatch.fnmatch(name, pattern):
return True
return False
from deerflow.sandbox.search import should_ignore_name
def list_dir(path: str, max_depth: int = 2) -> list[str]:
@@ -95,7 +29,7 @@ def list_dir(path: str, max_depth: int = 2) -> list[str]:
try:
for item in current_path.iterdir():
if _should_ignore(item.name):
if should_ignore_name(item.name):
continue
post_fix = "/" if item.is_dir() else ""
@@ -1,11 +1,23 @@
import errno
import ntpath
import os
import shutil
import subprocess
from dataclasses import dataclass
from pathlib import Path
from deerflow.sandbox.local.list_dir import list_dir
from deerflow.sandbox.sandbox import Sandbox
from deerflow.sandbox.search import GrepMatch, find_glob_matches, find_grep_matches
@dataclass(frozen=True)
class PathMapping:
"""A path mapping from a container path to a local path with optional read-only flag."""
container_path: str
local_path: str
read_only: bool = False
class LocalSandbox(Sandbox):
@@ -39,17 +51,42 @@ class LocalSandbox(Sandbox):
return None
def __init__(self, id: str, path_mappings: dict[str, str] | None = None):
def __init__(self, id: str, path_mappings: list[PathMapping] | None = None):
"""
Initialize local sandbox with optional path mappings.
Args:
id: Sandbox identifier
path_mappings: Dictionary mapping container paths to local paths
Example: {"/mnt/skills": "/absolute/path/to/skills"}
path_mappings: List of path mappings with optional read-only flag.
Skills directory is read-only by default.
"""
super().__init__(id)
self.path_mappings = path_mappings or {}
self.path_mappings = path_mappings or []
def _is_read_only_path(self, resolved_path: str) -> bool:
"""Check if a resolved path is under a read-only mount.
When multiple mappings match (nested mounts), prefer the most specific
mapping (i.e. the one whose local_path is the longest prefix of the
resolved path), similar to how ``_resolve_path`` handles container paths.
"""
resolved = str(Path(resolved_path).resolve())
best_mapping: PathMapping | None = None
best_prefix_len = -1
for mapping in self.path_mappings:
local_resolved = str(Path(mapping.local_path).resolve())
if resolved == local_resolved or resolved.startswith(local_resolved + os.sep):
prefix_len = len(local_resolved)
if prefix_len > best_prefix_len:
best_prefix_len = prefix_len
best_mapping = mapping
if best_mapping is None:
return False
return best_mapping.read_only
def _resolve_path(self, path: str) -> str:
"""
@@ -64,7 +101,9 @@ class LocalSandbox(Sandbox):
path_str = str(path)
# Try each mapping (longest prefix first for more specific matches)
for container_path, local_path in sorted(self.path_mappings.items(), key=lambda x: len(x[0]), reverse=True):
for mapping in sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True):
container_path = mapping.container_path
local_path = mapping.local_path
if path_str == container_path or path_str.startswith(container_path + "/"):
# Replace the container path prefix with local path
relative = path_str[len(container_path) :].lstrip("/")
@@ -84,15 +123,16 @@ class LocalSandbox(Sandbox):
Returns:
Container path if mapping exists, otherwise original path
"""
path_str = str(Path(path).resolve())
normalized_path = path.replace("\\", "/")
path_str = str(Path(normalized_path).resolve())
# Try each mapping (longest local path first for more specific matches)
for container_path, local_path in sorted(self.path_mappings.items(), key=lambda x: len(x[1]), reverse=True):
local_path_resolved = str(Path(local_path).resolve())
if path_str.startswith(local_path_resolved):
for mapping in sorted(self.path_mappings, key=lambda m: len(m.local_path), reverse=True):
local_path_resolved = str(Path(mapping.local_path).resolve())
if path_str == local_path_resolved or path_str.startswith(local_path_resolved + "/"):
# Replace the local path prefix with container path
relative = path_str[len(local_path_resolved) :].lstrip("/")
resolved = f"{container_path}/{relative}" if relative else container_path
resolved = f"{mapping.container_path}/{relative}" if relative else mapping.container_path
return resolved
# No mapping found, return original path
@@ -111,7 +151,7 @@ class LocalSandbox(Sandbox):
import re
# Sort mappings by local path length (longest first) for correct prefix matching
sorted_mappings = sorted(self.path_mappings.items(), key=lambda x: len(x[1]), reverse=True)
sorted_mappings = sorted(self.path_mappings, key=lambda m: len(m.local_path), reverse=True)
if not sorted_mappings:
return output
@@ -119,12 +159,11 @@ class LocalSandbox(Sandbox):
# Create pattern that matches absolute paths
# Match paths like /Users/... or other absolute paths
result = output
for container_path, local_path in sorted_mappings:
local_path_resolved = str(Path(local_path).resolve())
for mapping in sorted_mappings:
# Escape the local path for use in regex
escaped_local = re.escape(local_path_resolved)
# Match the local path followed by optional path components
pattern = re.compile(escaped_local + r"(?:/[^\s\"';&|<>()]*)?")
escaped_local = re.escape(str(Path(mapping.local_path).resolve()))
# Match the local path followed by optional path components with either separator
pattern = re.compile(escaped_local + r"(?:[/\\][^\s\"';&|<>()]*)?")
def replace_match(match: re.Match) -> str:
matched_path = match.group(0)
@@ -147,7 +186,7 @@ class LocalSandbox(Sandbox):
import re
# Sort mappings by length (longest first) for correct prefix matching
sorted_mappings = sorted(self.path_mappings.items(), key=lambda x: len(x[0]), reverse=True)
sorted_mappings = sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True)
# Build regex pattern to match all container paths
# Match container path followed by optional path components
@@ -157,7 +196,7 @@ class LocalSandbox(Sandbox):
# Create pattern that matches any of the container paths.
# The lookahead (?=/|$|...) ensures we only match at a path-segment boundary,
# preventing /mnt/skills from matching inside /mnt/skills-extra.
patterns = [re.escape(container_path) + r"(?=/|$|[\s\"';&|<>()])(?:/[^\s\"';&|<>()]*)?" for container_path, _ in sorted_mappings]
patterns = [re.escape(m.container_path) + r"(?=/|$|[\s\"';&|<>()])(?:/[^\s\"';&|<>()]*)?" for m in sorted_mappings]
pattern = re.compile("|".join(f"({p})" for p in patterns))
def replace_match(match: re.Match) -> str:
@@ -248,6 +287,8 @@ class LocalSandbox(Sandbox):
def write_file(self, path: str, content: str, append: bool = False) -> None:
resolved_path = self._resolve_path(path)
if self._is_read_only_path(resolved_path):
raise OSError(errno.EROFS, "Read-only file system", path)
try:
dir_path = os.path.dirname(resolved_path)
if dir_path:
@@ -259,8 +300,43 @@ class LocalSandbox(Sandbox):
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
raise type(e)(e.errno, e.strerror, path) from None
def glob(self, path: str, pattern: str, *, include_dirs: bool = False, max_results: int = 200) -> tuple[list[str], bool]:
resolved_path = Path(self._resolve_path(path))
matches, truncated = find_glob_matches(resolved_path, pattern, include_dirs=include_dirs, max_results=max_results)
return [self._reverse_resolve_path(match) for match in matches], truncated
def grep(
self,
path: str,
pattern: str,
*,
glob: str | None = None,
literal: bool = False,
case_sensitive: bool = False,
max_results: int = 100,
) -> tuple[list[GrepMatch], bool]:
resolved_path = Path(self._resolve_path(path))
matches, truncated = find_grep_matches(
resolved_path,
pattern,
glob_pattern=glob,
literal=literal,
case_sensitive=case_sensitive,
max_results=max_results,
)
return [
GrepMatch(
path=self._reverse_resolve_path(match.path),
line_number=match.line_number,
line=match.line,
)
for match in matches
], truncated
def update_file(self, path: str, content: bytes) -> None:
resolved_path = self._resolve_path(path)
if self._is_read_only_path(resolved_path):
raise OSError(errno.EROFS, "Read-only file system", path)
try:
dir_path = os.path.dirname(resolved_path)
if dir_path:
@@ -1,6 +1,7 @@
import logging
from pathlib import Path
from deerflow.sandbox.local.local_sandbox import LocalSandbox
from deerflow.sandbox.local.local_sandbox import LocalSandbox, PathMapping
from deerflow.sandbox.sandbox import Sandbox
from deerflow.sandbox.sandbox_provider import SandboxProvider
@@ -14,16 +15,17 @@ class LocalSandboxProvider(SandboxProvider):
"""Initialize the local sandbox provider with path mappings."""
self._path_mappings = self._setup_path_mappings()
def _setup_path_mappings(self) -> dict[str, str]:
def _setup_path_mappings(self) -> list[PathMapping]:
"""
Setup path mappings for local sandbox.
Maps container paths to actual local paths, including skills directory.
Maps container paths to actual local paths, including skills directory
and any custom mounts configured in config.yaml.
Returns:
Dictionary of path mappings
List of path mappings
"""
mappings = {}
mappings: list[PathMapping] = []
# Map skills container path to local skills directory
try:
@@ -35,10 +37,63 @@ class LocalSandboxProvider(SandboxProvider):
# Only add mapping if skills directory exists
if skills_path.exists():
mappings[container_path] = str(skills_path)
mappings.append(
PathMapping(
container_path=container_path,
local_path=str(skills_path),
read_only=True, # Skills directory is always read-only
)
)
# Map custom mounts from sandbox config
_RESERVED_CONTAINER_PREFIXES = [container_path, "/mnt/acp-workspace", "/mnt/user-data"]
sandbox_config = config.sandbox
if sandbox_config and sandbox_config.mounts:
for mount in sandbox_config.mounts:
host_path = Path(mount.host_path)
container_path = mount.container_path.rstrip("/") or "/"
if not host_path.is_absolute():
logger.warning(
"Mount host_path must be absolute, skipping: %s -> %s",
mount.host_path,
mount.container_path,
)
continue
if not container_path.startswith("/"):
logger.warning(
"Mount container_path must be absolute, skipping: %s -> %s",
mount.host_path,
mount.container_path,
)
continue
# Reject mounts that conflict with reserved container paths
if any(container_path == p or container_path.startswith(p + "/") for p in _RESERVED_CONTAINER_PREFIXES):
logger.warning(
"Mount container_path conflicts with reserved prefix, skipping: %s",
mount.container_path,
)
continue
# Ensure the host path exists before adding mapping
if host_path.exists():
mappings.append(
PathMapping(
container_path=container_path,
local_path=str(host_path.resolve()),
read_only=mount.read_only,
)
)
else:
logger.warning(
"Mount host_path does not exist, skipping: %s -> %s",
mount.host_path,
mount.container_path,
)
except Exception as e:
# Log but don't fail if config loading fails
logger.warning("Could not setup skills path mapping: %s", e, exc_info=True)
logger.warning("Could not setup path mappings: %s", e, exc_info=True)
return mappings
@@ -1,5 +1,7 @@
from abc import ABC, abstractmethod
from deerflow.sandbox.search import GrepMatch
class Sandbox(ABC):
"""Abstract base class for sandbox environments"""
@@ -61,6 +63,25 @@ class Sandbox(ABC):
"""
pass
@abstractmethod
def glob(self, path: str, pattern: str, *, include_dirs: bool = False, max_results: int = 200) -> tuple[list[str], bool]:
"""Find paths that match a glob pattern under a root directory."""
pass
@abstractmethod
def grep(
self,
path: str,
pattern: str,
*,
glob: str | None = None,
literal: bool = False,
case_sensitive: bool = False,
max_results: int = 100,
) -> tuple[list[GrepMatch], bool]:
"""Search for matches inside text files under a directory."""
pass
@abstractmethod
def update_file(self, path: str, content: bytes) -> None:
"""Update a file with binary content.
@@ -0,0 +1,210 @@
import fnmatch
import os
import re
from dataclasses import dataclass
from pathlib import Path, PurePosixPath
IGNORE_PATTERNS = [
".git",
".svn",
".hg",
".bzr",
"node_modules",
"__pycache__",
".venv",
"venv",
".env",
"env",
".tox",
".nox",
".eggs",
"*.egg-info",
"site-packages",
"dist",
"build",
".next",
".nuxt",
".output",
".turbo",
"target",
"out",
".idea",
".vscode",
"*.swp",
"*.swo",
"*~",
".project",
".classpath",
".settings",
".DS_Store",
"Thumbs.db",
"desktop.ini",
"*.lnk",
"*.log",
"*.tmp",
"*.temp",
"*.bak",
"*.cache",
".cache",
"logs",
".coverage",
"coverage",
".nyc_output",
"htmlcov",
".pytest_cache",
".mypy_cache",
".ruff_cache",
]
DEFAULT_MAX_FILE_SIZE_BYTES = 1_000_000
DEFAULT_LINE_SUMMARY_LENGTH = 200
@dataclass(frozen=True)
class GrepMatch:
path: str
line_number: int
line: str
def should_ignore_name(name: str) -> bool:
for pattern in IGNORE_PATTERNS:
if fnmatch.fnmatch(name, pattern):
return True
return False
def should_ignore_path(path: str) -> bool:
return any(should_ignore_name(segment) for segment in path.replace("\\", "/").split("/") if segment)
def path_matches(pattern: str, rel_path: str) -> bool:
path = PurePosixPath(rel_path)
if path.match(pattern):
return True
if pattern.startswith("**/"):
return path.match(pattern[3:])
return False
def truncate_line(line: str, max_chars: int = DEFAULT_LINE_SUMMARY_LENGTH) -> str:
line = line.rstrip("\n\r")
if len(line) <= max_chars:
return line
return line[: max_chars - 3] + "..."
def is_binary_file(path: Path, sample_size: int = 8192) -> bool:
try:
with path.open("rb") as handle:
return b"\0" in handle.read(sample_size)
except OSError:
return True
def find_glob_matches(root: Path, pattern: str, *, include_dirs: bool = False, max_results: int = 200) -> tuple[list[str], bool]:
matches: list[str] = []
truncated = False
root = root.resolve()
if not root.exists():
raise FileNotFoundError(root)
if not root.is_dir():
raise NotADirectoryError(root)
for current_root, dirs, files in os.walk(root):
dirs[:] = [name for name in dirs if not should_ignore_name(name)]
# root is already resolved; os.walk builds current_root by joining under root,
# so relative_to() works without an extra stat()/resolve() per directory.
rel_dir = Path(current_root).relative_to(root)
if include_dirs:
for name in dirs:
rel_path = (rel_dir / name).as_posix()
if path_matches(pattern, rel_path):
matches.append(str(Path(current_root) / name))
if len(matches) >= max_results:
truncated = True
return matches, truncated
for name in files:
if should_ignore_name(name):
continue
rel_path = (rel_dir / name).as_posix()
if path_matches(pattern, rel_path):
matches.append(str(Path(current_root) / name))
if len(matches) >= max_results:
truncated = True
return matches, truncated
return matches, truncated
def find_grep_matches(
root: Path,
pattern: str,
*,
glob_pattern: str | None = None,
literal: bool = False,
case_sensitive: bool = False,
max_results: int = 100,
max_file_size: int = DEFAULT_MAX_FILE_SIZE_BYTES,
line_summary_length: int = DEFAULT_LINE_SUMMARY_LENGTH,
) -> tuple[list[GrepMatch], bool]:
matches: list[GrepMatch] = []
truncated = False
root = root.resolve()
if not root.exists():
raise FileNotFoundError(root)
if not root.is_dir():
raise NotADirectoryError(root)
regex_source = re.escape(pattern) if literal else pattern
flags = 0 if case_sensitive else re.IGNORECASE
regex = re.compile(regex_source, flags)
# Skip lines longer than this to prevent ReDoS on minified / no-newline files.
_max_line_chars = line_summary_length * 10
for current_root, dirs, files in os.walk(root):
dirs[:] = [name for name in dirs if not should_ignore_name(name)]
rel_dir = Path(current_root).relative_to(root)
for name in files:
if should_ignore_name(name):
continue
candidate_path = Path(current_root) / name
rel_path = (rel_dir / name).as_posix()
if glob_pattern is not None and not path_matches(glob_pattern, rel_path):
continue
try:
if candidate_path.is_symlink():
continue
file_path = candidate_path.resolve()
if not file_path.is_relative_to(root):
continue
if file_path.stat().st_size > max_file_size or is_binary_file(file_path):
continue
with file_path.open(encoding="utf-8", errors="replace") as handle:
for line_number, line in enumerate(handle, start=1):
if len(line) > _max_line_chars:
continue
if regex.search(line):
matches.append(
GrepMatch(
path=str(file_path),
line_number=line_number,
line=truncate_line(line, line_summary_length),
)
)
if len(matches) >= max_results:
truncated = True
return matches, truncated
except OSError:
continue
return matches, truncated
@@ -7,6 +7,7 @@ from langchain.tools import ToolRuntime, tool
from langgraph.typing import ContextT
from deerflow.agents.thread_state import ThreadDataState, ThreadState
from deerflow.config import get_app_config
from deerflow.config.paths import VIRTUAL_PATH_PREFIX
from deerflow.sandbox.exceptions import (
SandboxError,
@@ -16,6 +17,7 @@ from deerflow.sandbox.exceptions import (
from deerflow.sandbox.file_operation_lock import get_file_operation_lock
from deerflow.sandbox.sandbox import Sandbox
from deerflow.sandbox.sandbox_provider import get_sandbox_provider
from deerflow.sandbox.search import GrepMatch
from deerflow.sandbox.security import LOCAL_HOST_BASH_DISABLED_MESSAGE, is_host_bash_allowed
_ABSOLUTE_PATH_PATTERN = re.compile(r"(?<![:\w])(?<!:/)/(?:[^\s\"'`;&|<>()]+)")
@@ -31,6 +33,10 @@ _LOCAL_BASH_SYSTEM_PATH_PREFIXES = (
_DEFAULT_SKILLS_CONTAINER_PATH = "/mnt/skills"
_ACP_WORKSPACE_VIRTUAL_PATH = "/mnt/acp-workspace"
_DEFAULT_GLOB_MAX_RESULTS = 200
_MAX_GLOB_MAX_RESULTS = 1000
_DEFAULT_GREP_MAX_RESULTS = 100
_MAX_GREP_MAX_RESULTS = 500
def _get_skills_container_path() -> str:
@@ -113,6 +119,54 @@ def _is_acp_workspace_path(path: str) -> bool:
return path == _ACP_WORKSPACE_VIRTUAL_PATH or path.startswith(f"{_ACP_WORKSPACE_VIRTUAL_PATH}/")
def _get_custom_mounts():
"""Get custom volume mounts from sandbox config.
Result is cached after the first successful config load. If config loading
fails an empty list is returned *without* caching so that a later call can
pick up the real value once the config is available.
"""
cached = getattr(_get_custom_mounts, "_cached", None)
if cached is not None:
return cached
try:
from pathlib import Path
from deerflow.config import get_app_config
config = get_app_config()
mounts = []
if config.sandbox and config.sandbox.mounts:
# Only include mounts whose host_path exists, consistent with
# LocalSandboxProvider._setup_path_mappings() which also filters
# by host_path.exists().
mounts = [m for m in config.sandbox.mounts if Path(m.host_path).exists()]
_get_custom_mounts._cached = mounts # type: ignore[attr-defined]
return mounts
except Exception:
# If config loading fails, return an empty list without caching so that
# a later call can retry once the config is available.
return []
def _is_custom_mount_path(path: str) -> bool:
"""Check if path is under a custom mount container_path."""
for mount in _get_custom_mounts():
if path == mount.container_path or path.startswith(f"{mount.container_path}/"):
return True
return False
def _get_custom_mount_for_path(path: str):
"""Get the mount config matching this path (longest prefix first)."""
best = None
for mount in _get_custom_mounts():
if path == mount.container_path or path.startswith(f"{mount.container_path}/"):
if best is None or len(mount.container_path) > len(best.container_path):
best = mount
return best
def _extract_thread_id_from_thread_data(thread_data: "ThreadDataState | None") -> str | None:
"""Extract thread_id from thread_data by inspecting workspace_path.
@@ -245,16 +299,84 @@ def _get_mcp_allowed_paths() -> list[str]:
return allowed_paths
def _get_tool_config_int(name: str, key: str, default: int) -> int:
try:
tool_config = get_app_config().get_tool_config(name)
if tool_config is not None and key in tool_config.model_extra:
value = tool_config.model_extra.get(key)
if isinstance(value, int):
return value
except Exception:
pass
return default
def _clamp_max_results(value: int, *, default: int, upper_bound: int) -> int:
if value <= 0:
return default
return min(value, upper_bound)
def _resolve_max_results(name: str, requested: int, *, default: int, upper_bound: int) -> int:
requested_max_results = _clamp_max_results(requested, default=default, upper_bound=upper_bound)
configured_max_results = _clamp_max_results(
_get_tool_config_int(name, "max_results", default),
default=default,
upper_bound=upper_bound,
)
return min(requested_max_results, configured_max_results)
def _resolve_local_read_path(path: str, thread_data: ThreadDataState) -> str:
validate_local_tool_path(path, thread_data, read_only=True)
if _is_skills_path(path):
return _resolve_skills_path(path)
if _is_acp_workspace_path(path):
return _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data))
return _resolve_and_validate_user_data_path(path, thread_data)
def _format_glob_results(root_path: str, matches: list[str], truncated: bool) -> str:
if not matches:
return f"No files matched under {root_path}"
lines = [f"Found {len(matches)} paths under {root_path}"]
if truncated:
lines[0] += f" (showing first {len(matches)})"
lines.extend(f"{index}. {path}" for index, path in enumerate(matches, start=1))
if truncated:
lines.append("Results truncated. Narrow the path or pattern to see fewer matches.")
return "\n".join(lines)
def _format_grep_results(root_path: str, matches: list[GrepMatch], truncated: bool) -> str:
if not matches:
return f"No matches found under {root_path}"
lines = [f"Found {len(matches)} matches under {root_path}"]
if truncated:
lines[0] += f" (showing first {len(matches)})"
lines.extend(f"{match.path}:{match.line_number}: {match.line}" for match in matches)
if truncated:
lines.append("Results truncated. Narrow the path or add a glob filter.")
return "\n".join(lines)
def _path_variants(path: str) -> set[str]:
return {path, path.replace("\\", "/"), path.replace("/", "\\")}
def _path_separator_for_style(path: str) -> str:
return "\\" if "\\" in path and "/" not in path else "/"
def _join_path_preserving_style(base: str, relative: str) -> str:
if not relative:
return base
if "/" in base and "\\" not in base:
return f"{base.rstrip('/')}/{relative}"
return str(Path(base) / relative)
separator = _path_separator_for_style(base)
normalized_relative = relative.replace("\\" if separator == "/" else "/", separator).lstrip("/\\")
stripped_base = base.rstrip("/\\")
return f"{stripped_base}{separator}{normalized_relative}"
def _sanitize_error(error: Exception, runtime: "ToolRuntime[ContextT, ThreadState] | None" = None) -> str:
@@ -299,7 +421,10 @@ def replace_virtual_path(path: str, thread_data: ThreadDataState | None) -> str:
return actual_base
if path.startswith(f"{virtual_base}/"):
rest = path[len(virtual_base) :].lstrip("/")
return _join_path_preserving_style(actual_base, rest)
result = _join_path_preserving_style(actual_base, rest)
if path.endswith("/") and not result.endswith(("/", "\\")):
result += _path_separator_for_style(actual_base)
return result
return path
@@ -379,6 +504,8 @@ def mask_local_paths_in_output(output: str, thread_data: ThreadDataState | None)
result = pattern.sub(replace_acp, result)
# Custom mount host paths are masked by LocalSandbox._reverse_resolve_paths_in_output()
# Mask user-data host paths
if thread_data is None:
return result
@@ -427,6 +554,7 @@ def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *,
- ``/mnt/user-data/*`` — always allowed (read + write)
- ``/mnt/skills/*`` — allowed only when *read_only* is True
- ``/mnt/acp-workspace/*`` — allowed only when *read_only* is True
- Custom mount paths (from config.yaml) — respects per-mount ``read_only`` flag
Args:
path: The virtual path to validate.
@@ -458,7 +586,14 @@ def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *,
if path.startswith(f"{VIRTUAL_PATH_PREFIX}/"):
return
raise PermissionError(f"Only paths under {VIRTUAL_PATH_PREFIX}/, {_get_skills_container_path()}/, or {_ACP_WORKSPACE_VIRTUAL_PATH}/ are allowed")
# Custom mount paths — respect read_only config
if _is_custom_mount_path(path):
mount = _get_custom_mount_for_path(path)
if mount and mount.read_only and not read_only:
raise PermissionError(f"Write access to read-only mount is not allowed: {path}")
return
raise PermissionError(f"Only paths under {VIRTUAL_PATH_PREFIX}/, {_get_skills_container_path()}/, {_ACP_WORKSPACE_VIRTUAL_PATH}/, or configured mount paths are allowed")
def _validate_resolved_user_data_path(resolved: Path, thread_data: ThreadDataState) -> None:
@@ -508,9 +643,10 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
boundary and must not be treated as isolation from the host filesystem.
In local mode, commands must use virtual paths under /mnt/user-data for
user data access. Skills paths under /mnt/skills and ACP workspace paths
under /mnt/acp-workspace are allowed (path-traversal checks only; write
prevention for bash commands is not enforced here).
user data access. Skills paths under /mnt/skills, ACP workspace paths
under /mnt/acp-workspace, and custom mount container paths (configured in
config.yaml) are allowed (path-traversal checks only; write prevention
for bash commands is not enforced here).
A small allowlist of common system path prefixes is kept for executable
and device references (e.g. /bin/sh, /dev/null).
"""
@@ -545,6 +681,11 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
_reject_path_traversal(absolute_path)
continue
# Allow custom mount container paths
if _is_custom_mount_path(absolute_path):
_reject_path_traversal(absolute_path)
continue
if any(absolute_path == prefix.rstrip("/") or absolute_path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES):
continue
@@ -589,6 +730,8 @@ def replace_virtual_paths_in_command(command: str, thread_data: ThreadDataState
result = acp_pattern.sub(replace_acp_match, result)
# Custom mount paths are resolved by LocalSandbox._resolve_paths_in_command()
# Replace user-data paths
if VIRTUAL_PATH_PREFIX in result and thread_data is not None:
pattern = re.compile(rf"{re.escape(VIRTUAL_PATH_PREFIX)}(/[^\s\"';&|<>()]*)?")
@@ -666,7 +809,8 @@ def sandbox_from_runtime(runtime: ToolRuntime[ContextT, ThreadState] | None = No
if sandbox is None:
raise SandboxNotFoundError(f"Sandbox with ID '{sandbox_id}' not found", sandbox_id=sandbox_id)
runtime.context["sandbox_id"] = sandbox_id # Ensure sandbox_id is in context for downstream use
if runtime.context is not None:
runtime.context["sandbox_id"] = sandbox_id # Ensure sandbox_id is in context for downstream use
return sandbox
@@ -701,7 +845,8 @@ def ensure_sandbox_initialized(runtime: ToolRuntime[ContextT, ThreadState] | Non
if sandbox_id is not None:
sandbox = get_sandbox_provider().get(sandbox_id)
if sandbox is not None:
runtime.context["sandbox_id"] = sandbox_id # Ensure sandbox_id is in context for releasing in after_agent
if runtime.context is not None:
runtime.context["sandbox_id"] = sandbox_id # Ensure sandbox_id is in context for releasing in after_agent
return sandbox
# Sandbox was released, fall through to acquire new one
@@ -723,7 +868,8 @@ def ensure_sandbox_initialized(runtime: ToolRuntime[ContextT, ThreadState] | Non
if sandbox is None:
raise SandboxNotFoundError("Sandbox not found after acquisition", sandbox_id=sandbox_id)
runtime.context["sandbox_id"] = sandbox_id # Ensure sandbox_id is in context for releasing in after_agent
if runtime.context is not None:
runtime.context["sandbox_id"] = sandbox_id # Ensure sandbox_id is in context for releasing in after_agent
return sandbox
@@ -885,8 +1031,9 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
path = _resolve_skills_path(path)
elif _is_acp_workspace_path(path):
path = _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data))
else:
elif not _is_custom_mount_path(path):
path = _resolve_and_validate_user_data_path(path, thread_data)
# Custom mount paths are resolved by LocalSandbox._resolve_path()
children = sandbox.list_dir(path)
if not children:
return "(empty)"
@@ -901,6 +1048,126 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
return f"Error: Unexpected error listing directory: {_sanitize_error(e, runtime)}"
@tool("glob", parse_docstring=True)
def glob_tool(
runtime: ToolRuntime[ContextT, ThreadState],
description: str,
pattern: str,
path: str,
include_dirs: bool = False,
max_results: int = _DEFAULT_GLOB_MAX_RESULTS,
) -> str:
"""Find files or directories that match a glob pattern under a root directory.
Args:
description: Explain why you are searching for these paths in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
pattern: The glob pattern to match relative to the root path, for example `**/*.py`.
path: The **absolute** root directory to search under.
include_dirs: Whether matching directories should also be returned. Default is False.
max_results: Maximum number of paths to return. Default is 200.
"""
try:
sandbox = ensure_sandbox_initialized(runtime)
ensure_thread_directories_exist(runtime)
requested_path = path
effective_max_results = _resolve_max_results(
"glob",
max_results,
default=_DEFAULT_GLOB_MAX_RESULTS,
upper_bound=_MAX_GLOB_MAX_RESULTS,
)
thread_data = None
if is_local_sandbox(runtime):
thread_data = get_thread_data(runtime)
if thread_data is None:
raise SandboxRuntimeError("Thread data not available for local sandbox")
path = _resolve_local_read_path(path, thread_data)
matches, truncated = sandbox.glob(path, pattern, include_dirs=include_dirs, max_results=effective_max_results)
if thread_data is not None:
matches = [mask_local_paths_in_output(match, thread_data) for match in matches]
return _format_glob_results(requested_path, matches, truncated)
except SandboxError as e:
return f"Error: {e}"
except FileNotFoundError:
return f"Error: Directory not found: {requested_path}"
except NotADirectoryError:
return f"Error: Path is not a directory: {requested_path}"
except PermissionError:
return f"Error: Permission denied: {requested_path}"
except Exception as e:
return f"Error: Unexpected error searching paths: {_sanitize_error(e, runtime)}"
@tool("grep", parse_docstring=True)
def grep_tool(
runtime: ToolRuntime[ContextT, ThreadState],
description: str,
pattern: str,
path: str,
glob: str | None = None,
literal: bool = False,
case_sensitive: bool = False,
max_results: int = _DEFAULT_GREP_MAX_RESULTS,
) -> str:
"""Search for matching lines inside text files under a root directory.
Args:
description: Explain why you are searching file contents in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
pattern: The string or regex pattern to search for.
path: The **absolute** root directory to search under.
glob: Optional glob filter for candidate files, for example `**/*.py`.
literal: Whether to treat `pattern` as a plain string. Default is False.
case_sensitive: Whether matching is case-sensitive. Default is False.
max_results: Maximum number of matching lines to return. Default is 100.
"""
try:
sandbox = ensure_sandbox_initialized(runtime)
ensure_thread_directories_exist(runtime)
requested_path = path
effective_max_results = _resolve_max_results(
"grep",
max_results,
default=_DEFAULT_GREP_MAX_RESULTS,
upper_bound=_MAX_GREP_MAX_RESULTS,
)
thread_data = None
if is_local_sandbox(runtime):
thread_data = get_thread_data(runtime)
if thread_data is None:
raise SandboxRuntimeError("Thread data not available for local sandbox")
path = _resolve_local_read_path(path, thread_data)
matches, truncated = sandbox.grep(
path,
pattern,
glob=glob,
literal=literal,
case_sensitive=case_sensitive,
max_results=effective_max_results,
)
if thread_data is not None:
matches = [
GrepMatch(
path=mask_local_paths_in_output(match.path, thread_data),
line_number=match.line_number,
line=match.line,
)
for match in matches
]
return _format_grep_results(requested_path, matches, truncated)
except SandboxError as e:
return f"Error: {e}"
except FileNotFoundError:
return f"Error: Directory not found: {requested_path}"
except NotADirectoryError:
return f"Error: Path is not a directory: {requested_path}"
except re.error as e:
return f"Error: Invalid regex pattern: {e}"
except PermissionError:
return f"Error: Permission denied: {requested_path}"
except Exception as e:
return f"Error: Unexpected error searching file contents: {_sanitize_error(e, runtime)}"
@tool("read_file", parse_docstring=True)
def read_file_tool(
runtime: ToolRuntime[ContextT, ThreadState],
@@ -928,8 +1195,9 @@ def read_file_tool(
path = _resolve_skills_path(path)
elif _is_acp_workspace_path(path):
path = _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data))
else:
elif not _is_custom_mount_path(path):
path = _resolve_and_validate_user_data_path(path, thread_data)
# Custom mount paths are resolved by LocalSandbox._resolve_path()
content = sandbox.read_file(path)
if not content:
return "(empty)"
@@ -977,7 +1245,9 @@ def write_file_tool(
if is_local_sandbox(runtime):
thread_data = get_thread_data(runtime)
validate_local_tool_path(path, thread_data)
path = _resolve_and_validate_user_data_path(path, thread_data)
if not _is_custom_mount_path(path):
path = _resolve_and_validate_user_data_path(path, thread_data)
# Custom mount paths are resolved by LocalSandbox._resolve_path()
with get_file_operation_lock(sandbox, path):
sandbox.write_file(path, content, append)
return "OK"
@@ -1019,7 +1289,9 @@ def str_replace_tool(
if is_local_sandbox(runtime):
thread_data = get_thread_data(runtime)
validate_local_tool_path(path, thread_data)
path = _resolve_and_validate_user_data_path(path, thread_data)
if not _is_custom_mount_path(path):
path = _resolve_and_validate_user_data_path(path, thread_data)
# Custom mount paths are resolved by LocalSandbox._resolve_path()
with get_file_operation_lock(sandbox, path):
content = sandbox.read_file(path)
if not content: