feat(auth): authentication module with multi-tenant isolation (RFC-001)

Introduce an always-on auth layer with auto-created admin on first boot, multi-tenant isolation for threads/stores, and a full setup/login flow. Backend - JWT access tokens with `ver` field for stale-token rejection; bump on password/email change - Password hashing, HttpOnly+Secure cookies (Secure derived from request scheme at runtime) - CSRF middleware covering both REST and LangGraph routes - IP-based login rate limiting (5 attempts / 5-min lockout) with bounded dict growth and X-Forwarded-For bypass fix - Multi-worker-safe admin auto-creation (single DB write, WAL once) - needs_setup + token_version on User model; SQLite schema migration - Thread/store isolation by owner; orphan thread migration on first admin registration - thread_id validated as UUID to prevent log injection - CLI tool to reset admin password - Decorator-based authz module extracted from auth core Frontend - Login and setup pages with SSR guard for needs_setup flow - Account settings page (change password / email) - AuthProvider + route guards; skips redirect when no users registered - i18n (en-US / zh-CN) for auth surfaces - Typed auth API client; parseAuthError unwraps FastAPI detail envelope Infra & tooling - Unified `serve.sh` with gateway mode + auto dep install - Public PyPI uv.toml pin for CI compatibility - Regenerated uv.lock with public index Tests - HTTP vs HTTPS cookie security tests - Auth middleware, rate limiter, CSRF, setup flow coverage
2026-05-24 17:06:00 +00:00 · 2026-04-08 00:31:43 +08:00
parent 636053fb6d
commit 27b66d6753
214 changed files with 18830 additions and 1065 deletions
@@ -7,6 +7,7 @@ from langchain.tools import ToolRuntime, tool
 from langgraph.typing import ContextT

 from deerflow.agents.thread_state import ThreadDataState, ThreadState
+from deerflow.config import get_app_config
 from deerflow.config.paths import VIRTUAL_PATH_PREFIX
 from deerflow.sandbox.exceptions import (
    SandboxError,
@@ -16,6 +17,7 @@ from deerflow.sandbox.exceptions import (
 from deerflow.sandbox.file_operation_lock import get_file_operation_lock
 from deerflow.sandbox.sandbox import Sandbox
 from deerflow.sandbox.sandbox_provider import get_sandbox_provider
+from deerflow.sandbox.search import GrepMatch
 from deerflow.sandbox.security import LOCAL_HOST_BASH_DISABLED_MESSAGE, is_host_bash_allowed

 _ABSOLUTE_PATH_PATTERN = re.compile(r"(?<![:\w])(?<!:/)/(?:[^\s\"'`;&|<>()]+)")
@@ -31,6 +33,10 @@ _LOCAL_BASH_SYSTEM_PATH_PREFIXES = (

 _DEFAULT_SKILLS_CONTAINER_PATH = "/mnt/skills"
 _ACP_WORKSPACE_VIRTUAL_PATH = "/mnt/acp-workspace"
+_DEFAULT_GLOB_MAX_RESULTS = 200
+_MAX_GLOB_MAX_RESULTS = 1000
+_DEFAULT_GREP_MAX_RESULTS = 100
+_MAX_GREP_MAX_RESULTS = 500


 def _get_skills_container_path() -> str:
@@ -113,6 +119,54 @@ def _is_acp_workspace_path(path: str) -> bool:
    return path == _ACP_WORKSPACE_VIRTUAL_PATH or path.startswith(f"{_ACP_WORKSPACE_VIRTUAL_PATH}/")


+def _get_custom_mounts():
+    """Get custom volume mounts from sandbox config.
+
+    Result is cached after the first successful config load.  If config loading
+    fails an empty list is returned *without* caching so that a later call can
+    pick up the real value once the config is available.
+    """
+    cached = getattr(_get_custom_mounts, "_cached", None)
+    if cached is not None:
+        return cached
+    try:
+        from pathlib import Path
+
+        from deerflow.config import get_app_config
+
+        config = get_app_config()
+        mounts = []
+        if config.sandbox and config.sandbox.mounts:
+            # Only include mounts whose host_path exists, consistent with
+            # LocalSandboxProvider._setup_path_mappings() which also filters
+            # by host_path.exists().
+            mounts = [m for m in config.sandbox.mounts if Path(m.host_path).exists()]
+        _get_custom_mounts._cached = mounts  # type: ignore[attr-defined]
+        return mounts
+    except Exception:
+        # If config loading fails, return an empty list without caching so that
+        # a later call can retry once the config is available.
+        return []
+
+
+def _is_custom_mount_path(path: str) -> bool:
+    """Check if path is under a custom mount container_path."""
+    for mount in _get_custom_mounts():
+        if path == mount.container_path or path.startswith(f"{mount.container_path}/"):
+            return True
+    return False
+
+
+def _get_custom_mount_for_path(path: str):
+    """Get the mount config matching this path (longest prefix first)."""
+    best = None
+    for mount in _get_custom_mounts():
+        if path == mount.container_path or path.startswith(f"{mount.container_path}/"):
+            if best is None or len(mount.container_path) > len(best.container_path):
+                best = mount
+    return best
+
+
 def _extract_thread_id_from_thread_data(thread_data: "ThreadDataState | None") -> str | None:
    """Extract thread_id from thread_data by inspecting workspace_path.

@@ -245,16 +299,84 @@ def _get_mcp_allowed_paths() -> list[str]:
    return allowed_paths


+def _get_tool_config_int(name: str, key: str, default: int) -> int:
+    try:
+        tool_config = get_app_config().get_tool_config(name)
+        if tool_config is not None and key in tool_config.model_extra:
+            value = tool_config.model_extra.get(key)
+            if isinstance(value, int):
+                return value
+    except Exception:
+        pass
+    return default
+
+
+def _clamp_max_results(value: int, *, default: int, upper_bound: int) -> int:
+    if value <= 0:
+        return default
+    return min(value, upper_bound)
+
+
+def _resolve_max_results(name: str, requested: int, *, default: int, upper_bound: int) -> int:
+    requested_max_results = _clamp_max_results(requested, default=default, upper_bound=upper_bound)
+    configured_max_results = _clamp_max_results(
+        _get_tool_config_int(name, "max_results", default),
+        default=default,
+        upper_bound=upper_bound,
+    )
+    return min(requested_max_results, configured_max_results)
+
+
+def _resolve_local_read_path(path: str, thread_data: ThreadDataState) -> str:
+    validate_local_tool_path(path, thread_data, read_only=True)
+    if _is_skills_path(path):
+        return _resolve_skills_path(path)
+    if _is_acp_workspace_path(path):
+        return _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data))
+    return _resolve_and_validate_user_data_path(path, thread_data)
+
+
+def _format_glob_results(root_path: str, matches: list[str], truncated: bool) -> str:
+    if not matches:
+        return f"No files matched under {root_path}"
+
+    lines = [f"Found {len(matches)} paths under {root_path}"]
+    if truncated:
+        lines[0] += f" (showing first {len(matches)})"
+    lines.extend(f"{index}. {path}" for index, path in enumerate(matches, start=1))
+    if truncated:
+        lines.append("Results truncated. Narrow the path or pattern to see fewer matches.")
+    return "\n".join(lines)
+
+
+def _format_grep_results(root_path: str, matches: list[GrepMatch], truncated: bool) -> str:
+    if not matches:
+        return f"No matches found under {root_path}"
+
+    lines = [f"Found {len(matches)} matches under {root_path}"]
+    if truncated:
+        lines[0] += f" (showing first {len(matches)})"
+    lines.extend(f"{match.path}:{match.line_number}: {match.line}" for match in matches)
+    if truncated:
+        lines.append("Results truncated. Narrow the path or add a glob filter.")
+    return "\n".join(lines)
+
+
 def _path_variants(path: str) -> set[str]:
    return {path, path.replace("\\", "/"), path.replace("/", "\\")}


+def _path_separator_for_style(path: str) -> str:
+    return "\\" if "\\" in path and "/" not in path else "/"
+
+
 def _join_path_preserving_style(base: str, relative: str) -> str:
    if not relative:
        return base
-    if "/" in base and "\\" not in base:
-        return f"{base.rstrip('/')}/{relative}"
-    return str(Path(base) / relative)
+    separator = _path_separator_for_style(base)
+    normalized_relative = relative.replace("\\" if separator == "/" else "/", separator).lstrip("/\\")
+    stripped_base = base.rstrip("/\\")
+    return f"{stripped_base}{separator}{normalized_relative}"


 def _sanitize_error(error: Exception, runtime: "ToolRuntime[ContextT, ThreadState] | None" = None) -> str:
@@ -299,7 +421,10 @@ def replace_virtual_path(path: str, thread_data: ThreadDataState | None) -> str:
            return actual_base
        if path.startswith(f"{virtual_base}/"):
            rest = path[len(virtual_base) :].lstrip("/")
-            return _join_path_preserving_style(actual_base, rest)
+            result = _join_path_preserving_style(actual_base, rest)
+            if path.endswith("/") and not result.endswith(("/", "\\")):
+                result += _path_separator_for_style(actual_base)
+            return result

    return path

@@ -379,6 +504,8 @@ def mask_local_paths_in_output(output: str, thread_data: ThreadDataState | None)

            result = pattern.sub(replace_acp, result)

+    # Custom mount host paths are masked by LocalSandbox._reverse_resolve_paths_in_output()
+
    # Mask user-data host paths
    if thread_data is None:
        return result
@@ -427,6 +554,7 @@ def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *,
      - ``/mnt/user-data/*``  — always allowed (read + write)
      - ``/mnt/skills/*``     — allowed only when *read_only* is True
      - ``/mnt/acp-workspace/*`` — allowed only when *read_only* is True
+      - Custom mount paths (from config.yaml) — respects per-mount ``read_only`` flag

    Args:
        path: The virtual path to validate.
@@ -458,7 +586,14 @@ def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *,
    if path.startswith(f"{VIRTUAL_PATH_PREFIX}/"):
        return

-    raise PermissionError(f"Only paths under {VIRTUAL_PATH_PREFIX}/, {_get_skills_container_path()}/, or {_ACP_WORKSPACE_VIRTUAL_PATH}/ are allowed")
+    # Custom mount paths — respect read_only config
+    if _is_custom_mount_path(path):
+        mount = _get_custom_mount_for_path(path)
+        if mount and mount.read_only and not read_only:
+            raise PermissionError(f"Write access to read-only mount is not allowed: {path}")
+        return
+
+    raise PermissionError(f"Only paths under {VIRTUAL_PATH_PREFIX}/, {_get_skills_container_path()}/, {_ACP_WORKSPACE_VIRTUAL_PATH}/, or configured mount paths are allowed")


 def _validate_resolved_user_data_path(resolved: Path, thread_data: ThreadDataState) -> None:
@@ -508,9 +643,10 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
    boundary and must not be treated as isolation from the host filesystem.

    In local mode, commands must use virtual paths under /mnt/user-data for
-    user data access. Skills paths under /mnt/skills and ACP workspace paths
-    under /mnt/acp-workspace are allowed (path-traversal checks only; write
-    prevention for bash commands is not enforced here).
+    user data access. Skills paths under /mnt/skills, ACP workspace paths
+    under /mnt/acp-workspace, and custom mount container paths (configured in
+    config.yaml) are allowed (path-traversal checks only; write prevention
+    for bash commands is not enforced here).
    A small allowlist of common system path prefixes is kept for executable
    and device references (e.g. /bin/sh, /dev/null).
    """
@@ -545,6 +681,11 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
            _reject_path_traversal(absolute_path)
            continue

+        # Allow custom mount container paths
+        if _is_custom_mount_path(absolute_path):
+            _reject_path_traversal(absolute_path)
+            continue
+
        if any(absolute_path == prefix.rstrip("/") or absolute_path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES):
            continue

@@ -589,6 +730,8 @@ def replace_virtual_paths_in_command(command: str, thread_data: ThreadDataState

        result = acp_pattern.sub(replace_acp_match, result)

+    # Custom mount paths are resolved by LocalSandbox._resolve_paths_in_command()
+
    # Replace user-data paths
    if VIRTUAL_PATH_PREFIX in result and thread_data is not None:
        pattern = re.compile(rf"{re.escape(VIRTUAL_PATH_PREFIX)}(/[^\s\"';&|<>()]*)?")
@@ -666,7 +809,8 @@ def sandbox_from_runtime(runtime: ToolRuntime[ContextT, ThreadState] | None = No
    if sandbox is None:
        raise SandboxNotFoundError(f"Sandbox with ID '{sandbox_id}' not found", sandbox_id=sandbox_id)

-    runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for downstream use
+    if runtime.context is not None:
+        runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for downstream use
    return sandbox


@@ -701,7 +845,8 @@ def ensure_sandbox_initialized(runtime: ToolRuntime[ContextT, ThreadState] | Non
        if sandbox_id is not None:
            sandbox = get_sandbox_provider().get(sandbox_id)
            if sandbox is not None:
-                runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for releasing in after_agent
+                if runtime.context is not None:
+                    runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for releasing in after_agent
                return sandbox
            # Sandbox was released, fall through to acquire new one

@@ -723,7 +868,8 @@ def ensure_sandbox_initialized(runtime: ToolRuntime[ContextT, ThreadState] | Non
    if sandbox is None:
        raise SandboxNotFoundError("Sandbox not found after acquisition", sandbox_id=sandbox_id)

-    runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for releasing in after_agent
+    if runtime.context is not None:
+        runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for releasing in after_agent
    return sandbox


@@ -885,8 +1031,9 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
                path = _resolve_skills_path(path)
            elif _is_acp_workspace_path(path):
                path = _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data))
-            else:
+            elif not _is_custom_mount_path(path):
                path = _resolve_and_validate_user_data_path(path, thread_data)
+            # Custom mount paths are resolved by LocalSandbox._resolve_path()
        children = sandbox.list_dir(path)
        if not children:
            return "(empty)"
@@ -901,6 +1048,126 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
        return f"Error: Unexpected error listing directory: {_sanitize_error(e, runtime)}"


+@tool("glob", parse_docstring=True)
+def glob_tool(
+    runtime: ToolRuntime[ContextT, ThreadState],
+    description: str,
+    pattern: str,
+    path: str,
+    include_dirs: bool = False,
+    max_results: int = _DEFAULT_GLOB_MAX_RESULTS,
+) -> str:
+    """Find files or directories that match a glob pattern under a root directory.
+
+    Args:
+        description: Explain why you are searching for these paths in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
+        pattern: The glob pattern to match relative to the root path, for example `**/*.py`.
+        path: The **absolute** root directory to search under.
+        include_dirs: Whether matching directories should also be returned. Default is False.
+        max_results: Maximum number of paths to return. Default is 200.
+    """
+    try:
+        sandbox = ensure_sandbox_initialized(runtime)
+        ensure_thread_directories_exist(runtime)
+        requested_path = path
+        effective_max_results = _resolve_max_results(
+            "glob",
+            max_results,
+            default=_DEFAULT_GLOB_MAX_RESULTS,
+            upper_bound=_MAX_GLOB_MAX_RESULTS,
+        )
+        thread_data = None
+        if is_local_sandbox(runtime):
+            thread_data = get_thread_data(runtime)
+            if thread_data is None:
+                raise SandboxRuntimeError("Thread data not available for local sandbox")
+            path = _resolve_local_read_path(path, thread_data)
+        matches, truncated = sandbox.glob(path, pattern, include_dirs=include_dirs, max_results=effective_max_results)
+        if thread_data is not None:
+            matches = [mask_local_paths_in_output(match, thread_data) for match in matches]
+        return _format_glob_results(requested_path, matches, truncated)
+    except SandboxError as e:
+        return f"Error: {e}"
+    except FileNotFoundError:
+        return f"Error: Directory not found: {requested_path}"
+    except NotADirectoryError:
+        return f"Error: Path is not a directory: {requested_path}"
+    except PermissionError:
+        return f"Error: Permission denied: {requested_path}"
+    except Exception as e:
+        return f"Error: Unexpected error searching paths: {_sanitize_error(e, runtime)}"
+
+
+@tool("grep", parse_docstring=True)
+def grep_tool(
+    runtime: ToolRuntime[ContextT, ThreadState],
+    description: str,
+    pattern: str,
+    path: str,
+    glob: str | None = None,
+    literal: bool = False,
+    case_sensitive: bool = False,
+    max_results: int = _DEFAULT_GREP_MAX_RESULTS,
+) -> str:
+    """Search for matching lines inside text files under a root directory.
+
+    Args:
+        description: Explain why you are searching file contents in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
+        pattern: The string or regex pattern to search for.
+        path: The **absolute** root directory to search under.
+        glob: Optional glob filter for candidate files, for example `**/*.py`.
+        literal: Whether to treat `pattern` as a plain string. Default is False.
+        case_sensitive: Whether matching is case-sensitive. Default is False.
+        max_results: Maximum number of matching lines to return. Default is 100.
+    """
+    try:
+        sandbox = ensure_sandbox_initialized(runtime)
+        ensure_thread_directories_exist(runtime)
+        requested_path = path
+        effective_max_results = _resolve_max_results(
+            "grep",
+            max_results,
+            default=_DEFAULT_GREP_MAX_RESULTS,
+            upper_bound=_MAX_GREP_MAX_RESULTS,
+        )
+        thread_data = None
+        if is_local_sandbox(runtime):
+            thread_data = get_thread_data(runtime)
+            if thread_data is None:
+                raise SandboxRuntimeError("Thread data not available for local sandbox")
+            path = _resolve_local_read_path(path, thread_data)
+        matches, truncated = sandbox.grep(
+            path,
+            pattern,
+            glob=glob,
+            literal=literal,
+            case_sensitive=case_sensitive,
+            max_results=effective_max_results,
+        )
+        if thread_data is not None:
+            matches = [
+                GrepMatch(
+                    path=mask_local_paths_in_output(match.path, thread_data),
+                    line_number=match.line_number,
+                    line=match.line,
+                )
+                for match in matches
+            ]
+        return _format_grep_results(requested_path, matches, truncated)
+    except SandboxError as e:
+        return f"Error: {e}"
+    except FileNotFoundError:
+        return f"Error: Directory not found: {requested_path}"
+    except NotADirectoryError:
+        return f"Error: Path is not a directory: {requested_path}"
+    except re.error as e:
+        return f"Error: Invalid regex pattern: {e}"
+    except PermissionError:
+        return f"Error: Permission denied: {requested_path}"
+    except Exception as e:
+        return f"Error: Unexpected error searching file contents: {_sanitize_error(e, runtime)}"
+
+
@tool("read_file", parse_docstring=True)
 def read_file_tool(
    runtime: ToolRuntime[ContextT, ThreadState],
@@ -928,8 +1195,9 @@ def read_file_tool(
                path = _resolve_skills_path(path)
            elif _is_acp_workspace_path(path):
                path = _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data))
-            else:
+            elif not _is_custom_mount_path(path):
                path = _resolve_and_validate_user_data_path(path, thread_data)
+            # Custom mount paths are resolved by LocalSandbox._resolve_path()
        content = sandbox.read_file(path)
        if not content:
            return "(empty)"
@@ -977,7 +1245,9 @@ def write_file_tool(
        if is_local_sandbox(runtime):
            thread_data = get_thread_data(runtime)
            validate_local_tool_path(path, thread_data)
-            path = _resolve_and_validate_user_data_path(path, thread_data)
+            if not _is_custom_mount_path(path):
+                path = _resolve_and_validate_user_data_path(path, thread_data)
+            # Custom mount paths are resolved by LocalSandbox._resolve_path()
        with get_file_operation_lock(sandbox, path):
            sandbox.write_file(path, content, append)
        return "OK"
@@ -1019,7 +1289,9 @@ def str_replace_tool(
        if is_local_sandbox(runtime):
            thread_data = get_thread_data(runtime)
            validate_local_tool_path(path, thread_data)
-            path = _resolve_and_validate_user_data_path(path, thread_data)
+            if not _is_custom_mount_path(path):
+                path = _resolve_and_validate_user_data_path(path, thread_data)
+            # Custom mount paths are resolved by LocalSandbox._resolve_path()
        with get_file_operation_lock(sandbox, path):
            content = sandbox.read_file(path)
            if not content: