feat(auth): authentication module with multi-tenant isolation (RFC-001)

Introduce an always-on auth layer with auto-created admin on first boot, multi-tenant isolation for threads/stores, and a full setup/login flow. Backend - JWT access tokens with `ver` field for stale-token rejection; bump on password/email change - Password hashing, HttpOnly+Secure cookies (Secure derived from request scheme at runtime) - CSRF middleware covering both REST and LangGraph routes - IP-based login rate limiting (5 attempts / 5-min lockout) with bounded dict growth and X-Forwarded-For bypass fix - Multi-worker-safe admin auto-creation (single DB write, WAL once) - needs_setup + token_version on User model; SQLite schema migration - Thread/store isolation by owner; orphan thread migration on first admin registration - thread_id validated as UUID to prevent log injection - CLI tool to reset admin password - Decorator-based authz module extracted from auth core Frontend - Login and setup pages with SSR guard for needs_setup flow - Account settings page (change password / email) - AuthProvider + route guards; skips redirect when no users registered - i18n (en-US / zh-CN) for auth surfaces - Typed auth API client; parseAuthError unwraps FastAPI detail envelope Infra & tooling - Unified `serve.sh` with gateway mode + auto dep install - Public PyPI uv.toml pin for CI compatibility - Regenerated uv.lock with public index Tests - HTTP vs HTTPS cookie security tests - Auth middleware, rate limiter, CSRF, setup flow coverage
2026-05-23 08:25:57 +00:00 · 2026-04-08 00:31:43 +08:00
parent 636053fb6d
commit 27b66d6753
214 changed files with 18830 additions and 1065 deletions
@@ -8,6 +8,14 @@ from deerflow.subagents import get_available_subagent_names
 logger = logging.getLogger(__name__)


+def _get_enabled_skills():
+    try:
+        return list(load_skills(enabled_only=True))
+    except Exception:
+        logger.exception("Failed to load enabled skills for prompt injection")
+        return []
+
+
 def _build_subagent_section(max_concurrent: int) -> str:
    """Build the subagent system prompt section with dynamic concurrency limit.

@@ -386,7 +394,7 @@ def get_skills_prompt_section(available_skills: set[str] | None = None) -> str:
    Returns the <skill_system>...</skill_system> block listing all enabled skills,
    suitable for injection into any agent's system prompt.
    """
-    skills = load_skills(enabled_only=True)
+    skills = _get_enabled_skills()

    try:
        from deerflow.config import get_app_config
@@ -450,7 +458,7 @@ def get_deferred_tools_prompt_section() -> str:

        if not get_app_config().tool_search.enabled:
            return ""
-    except FileNotFoundError:
+    except Exception:
        return ""

    registry = get_deferred_registry()
@@ -246,6 +246,10 @@ def format_memory_for_injection(memory_data: dict[str, Any], max_tokens: int = 2
        if earlier.get("summary"):
            history_sections.append(f"Earlier: {earlier['summary']}")

+        background = history_data.get("longTermBackground", {})
+        if background.get("summary"):
+            history_sections.append(f"Background: {background['summary']}")
+
        if history_sections:
            sections.append("History:\n" + "\n".join(f"- {s}" for s in history_sections))

@@ -21,6 +21,7 @@ class ConversationContext:
    timestamp: datetime = field(default_factory=datetime.utcnow)
    agent_name: str | None = None
    correction_detected: bool = False
+    reinforcement_detected: bool = False


 class MemoryUpdateQueue:
@@ -44,6 +45,7 @@ class MemoryUpdateQueue:
        messages: list[Any],
        agent_name: str | None = None,
        correction_detected: bool = False,
+        reinforcement_detected: bool = False,
    ) -> None:
        """Add a conversation to the update queue.

@@ -52,6 +54,7 @@ class MemoryUpdateQueue:
            messages: The conversation messages.
            agent_name: If provided, memory is stored per-agent. If None, uses global memory.
            correction_detected: Whether recent turns include an explicit correction signal.
+            reinforcement_detected: Whether recent turns include a positive reinforcement signal.
        """
        config = get_memory_config()
        if not config.enabled:
@@ -63,11 +66,13 @@ class MemoryUpdateQueue:
                None,
            )
            merged_correction_detected = correction_detected or (existing_context.correction_detected if existing_context is not None else False)
+            merged_reinforcement_detected = reinforcement_detected or (existing_context.reinforcement_detected if existing_context is not None else False)
            context = ConversationContext(
                thread_id=thread_id,
                messages=messages,
                agent_name=agent_name,
                correction_detected=merged_correction_detected,
+                reinforcement_detected=merged_reinforcement_detected,
            )

            # Check if this thread already has a pending update
@@ -130,6 +135,7 @@ class MemoryUpdateQueue:
                        thread_id=context.thread_id,
                        agent_name=context.agent_name,
                        correction_detected=context.correction_detected,
+                        reinforcement_detected=context.reinforcement_detected,
                    )
                    if success:
                        logger.info("Memory updated successfully for thread %s", context.thread_id)
@@ -246,7 +246,7 @@ def _fact_content_key(content: Any) -> str | None:
    stripped = content.strip()
    if not stripped:
        return None
-    return stripped
+    return stripped.casefold()


 class MemoryUpdater:
@@ -272,6 +272,7 @@ class MemoryUpdater:
        thread_id: str | None = None,
        agent_name: str | None = None,
        correction_detected: bool = False,
+        reinforcement_detected: bool = False,
    ) -> bool:
        """Update memory based on conversation messages.

@@ -280,6 +281,7 @@ class MemoryUpdater:
            thread_id: Optional thread ID for tracking source.
            agent_name: If provided, updates per-agent memory. If None, updates global memory.
            correction_detected: Whether recent turns include an explicit correction signal.
+            reinforcement_detected: Whether recent turns include a positive reinforcement signal.

        Returns:
            True if update was successful, False otherwise.
@@ -310,6 +312,14 @@ class MemoryUpdater:
                    "and record the correct approach as a fact with category "
                    '"correction" and confidence >= 0.95 when appropriate.'
                )
+            if reinforcement_detected:
+                reinforcement_hint = (
+                    "IMPORTANT: Positive reinforcement signals were detected in this conversation. "
+                    "The user explicitly confirmed the agent's approach was correct or helpful. "
+                    "Record the confirmed approach, style, or preference as a fact with category "
+                    '"preference" or "behavior" and confidence >= 0.9 when appropriate.'
+                )
+                correction_hint = (correction_hint + "\n" + reinforcement_hint).strip() if correction_hint else reinforcement_hint

            prompt = MEMORY_UPDATE_PROMPT.format(
                current_memory=json.dumps(current_memory, indent=2),
@@ -441,6 +451,7 @@ def update_memory_from_conversation(
    thread_id: str | None = None,
    agent_name: str | None = None,
    correction_detected: bool = False,
+    reinforcement_detected: bool = False,
 ) -> bool:
    """Convenience function to update memory from a conversation.

@@ -449,9 +460,10 @@ def update_memory_from_conversation(
        thread_id: Optional thread ID.
        agent_name: If provided, updates per-agent memory. If None, updates global memory.
        correction_detected: Whether recent turns include an explicit correction signal.
+        reinforcement_detected: Whether recent turns include a positive reinforcement signal.

    Returns:
        True if successful, False otherwise.
    """
    updater = MemoryUpdater()
-    return updater.update_memory(messages, thread_id, agent_name, correction_detected)
+    return updater.update_memory(messages, thread_id, agent_name, correction_detected, reinforcement_detected)
@@ -182,6 +182,23 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):

        return None, False

+    @staticmethod
+    def _append_text(content: str | list | None, text: str) -> str | list:
+        """Append *text* to AIMessage content, handling str, list, and None.
+
+        When content is a list of content blocks (e.g. Anthropic thinking mode),
+        we append a new ``{"type": "text", ...}`` block instead of concatenating
+        a string to a list, which would raise ``TypeError``.
+        """
+        if content is None:
+            return text
+        if isinstance(content, list):
+            return [*content, {"type": "text", "text": f"\n\n{text}"}]
+        if isinstance(content, str):
+            return content + f"\n\n{text}"
+        # Fallback: coerce unexpected types to str to avoid TypeError
+        return str(content) + f"\n\n{text}"
+
    def _apply(self, state: AgentState, runtime: Runtime) -> dict | None:
        warning, hard_stop = self._track_and_check(state, runtime)

@@ -192,7 +209,7 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
            stripped_msg = last_msg.model_copy(
                update={
                    "tool_calls": [],
-                    "content": (last_msg.content or "") + f"\n\n{_HARD_STOP_MSG}",
+                    "content": self._append_text(last_msg.content, _HARD_STOP_MSG),
                }
            )
            return {"messages": [stripped_msg]}
@@ -29,6 +29,22 @@ _CORRECTION_PATTERNS = (
    re.compile(r"改用"),
 )

+_REINFORCEMENT_PATTERNS = (
+    re.compile(r"\byes[,.]?\s+(?:exactly|perfect|that(?:'s| is) (?:right|correct|it))\b", re.IGNORECASE),
+    re.compile(r"\bperfect(?:[.!?]|$)", re.IGNORECASE),
+    re.compile(r"\bexactly\s+(?:right|correct)\b", re.IGNORECASE),
+    re.compile(r"\bthat(?:'s| is)\s+(?:exactly\s+)?(?:right|correct|what i (?:wanted|needed|meant))\b", re.IGNORECASE),
+    re.compile(r"\bkeep\s+(?:doing\s+)?that\b", re.IGNORECASE),
+    re.compile(r"\bjust\s+(?:like\s+)?(?:that|this)\b", re.IGNORECASE),
+    re.compile(r"\bthis is (?:great|helpful)\b(?:[.!?]|$)", re.IGNORECASE),
+    re.compile(r"\bthis is what i wanted\b(?:[.!?]|$)", re.IGNORECASE),
+    re.compile(r"对[，,]?\s*就是这样(?:[。！？!?.]|$)"),
+    re.compile(r"完全正确(?:[。！？!?.]|$)"),
+    re.compile(r"(?:对[，,]?\s*)?就是这个意思(?:[。！？!?.]|$)"),
+    re.compile(r"正是我想要的(?:[。！？!?.]|$)"),
+    re.compile(r"继续保持(?:[。！？!?.]|$)"),
+)
+

 class MemoryMiddlewareState(AgentState):
    """Compatible with the `ThreadState` schema."""
@@ -132,6 +148,29 @@ def detect_correction(messages: list[Any]) -> bool:
    return False


+def detect_reinforcement(messages: list[Any]) -> bool:
+    """Detect explicit positive reinforcement signals in recent conversation turns.
+
+    Complements detect_correction() by identifying when the user confirms the
+    agent's approach was correct. This allows the memory system to record what
+    worked well, not just what went wrong.
+
+    The queue keeps only one pending context per thread, so callers pass the
+    latest filtered message list. Checking only recent user turns keeps signal
+    detection conservative while avoiding stale signals from long histories.
+    """
+    recent_user_msgs = [msg for msg in messages[-6:] if getattr(msg, "type", None) == "human"]
+
+    for msg in recent_user_msgs:
+        content = _extract_message_text(msg).strip()
+        if not content:
+            continue
+        if any(pattern.search(content) for pattern in _REINFORCEMENT_PATTERNS):
+            return True
+
+    return False
+
+
 class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
    """Middleware that queues conversation for memory update after agent execution.

@@ -196,12 +235,14 @@ class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):

        # Queue the filtered conversation for memory update
        correction_detected = detect_correction(filtered_messages)
+        reinforcement_detected = not correction_detected and detect_reinforcement(filtered_messages)
        queue = get_memory_queue()
        queue.add(
            thread_id=thread_id,
            messages=filtered_messages,
            agent_name=self._agent_name,
            correction_detected=correction_detected,
+            reinforcement_detected=reinforcement_detected,
        )

        return None
@@ -101,44 +101,33 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
        return user_msg if user_msg else "New Conversation"

    def _generate_title_result(self, state: TitleMiddlewareState) -> dict | None:
-        """Synchronously generate a title. Returns state update or None."""
+        """Generate a local fallback title without blocking on an LLM call."""
        if not self._should_generate_title(state):
            return None

-        prompt, user_msg = self._build_title_prompt(state)
-        config = get_title_config()
-        model = create_chat_model(name=config.model_name, thinking_enabled=False)
-
-        try:
-            response = model.invoke(prompt)
-            title = self._parse_title(response.content)
-            if not title:
-                title = self._fallback_title(user_msg)
-        except Exception:
-            logger.exception("Failed to generate title (sync)")
-            title = self._fallback_title(user_msg)
-
-        return {"title": title}
+        _, user_msg = self._build_title_prompt(state)
+        return {"title": self._fallback_title(user_msg)}

    async def _agenerate_title_result(self, state: TitleMiddlewareState) -> dict | None:
-        """Asynchronously generate a title. Returns state update or None."""
+        """Generate a title asynchronously and fall back locally on failure."""
        if not self._should_generate_title(state):
            return None

-        prompt, user_msg = self._build_title_prompt(state)
        config = get_title_config()
-        model = create_chat_model(name=config.model_name, thinking_enabled=False)
+        prompt, user_msg = self._build_title_prompt(state)

        try:
+            if config.model_name:
+                model = create_chat_model(name=config.model_name, thinking_enabled=False)
+            else:
+                model = create_chat_model(thinking_enabled=False)
            response = await model.ainvoke(prompt)
            title = self._parse_title(response.content)
-            if not title:
-                title = self._fallback_title(user_msg)
+            if title:
+                return {"title": title}
        except Exception:
-            logger.exception("Failed to generate title (async)")
-            title = self._fallback_title(user_msg)
-
-        return {"title": title}
+            logger.debug("Failed to generate async title; falling back to local title", exc_info=True)
+        return {"title": self._fallback_title(user_msg)}

    @override
    def after_model(self, state: TitleMiddlewareState, runtime: Runtime) -> dict | None:
@@ -138,6 +138,6 @@ def build_subagent_runtime_middlewares(*, lazy_init: bool = True) -> list[AgentM
    """Middlewares shared by subagent runtime before subagent-only middlewares."""
    return _build_runtime_middlewares(
        include_uploads=False,
-        include_dangling_tool_call_patch=False,
+        include_dangling_tool_call_patch=True,
        lazy_init=lazy_init,
    )
@@ -10,10 +10,52 @@ from langchain_core.messages import HumanMessage
 from langgraph.runtime import Runtime

 from deerflow.config.paths import Paths, get_paths
+from deerflow.utils.file_conversion import extract_outline

 logger = logging.getLogger(__name__)


+_OUTLINE_PREVIEW_LINES = 5
+
+
+def _extract_outline_for_file(file_path: Path) -> tuple[list[dict], list[str]]:
+    """Return the document outline and fallback preview for *file_path*.
+
+    Looks for a sibling ``<stem>.md`` file produced by the upload conversion
+    pipeline.
+
+    Returns:
+        (outline, preview) where:
+        - outline: list of ``{title, line}`` dicts (plus optional sentinel).
+          Empty when no headings are found or no .md exists.
+        - preview: first few non-empty lines of the .md, used as a content
+          anchor when outline is empty so the agent has some context.
+          Empty when outline is non-empty (no fallback needed).
+    """
+    md_path = file_path.with_suffix(".md")
+    if not md_path.is_file():
+        return [], []
+
+    outline = extract_outline(md_path)
+    if outline:
+        logger.debug("Extracted %d outline entries from %s", len(outline), file_path.name)
+        return outline, []
+
+    # outline is empty — read the first few non-empty lines as a content preview
+    preview: list[str] = []
+    try:
+        with md_path.open(encoding="utf-8") as f:
+            for line in f:
+                stripped = line.strip()
+                if stripped:
+                    preview.append(stripped)
+                if len(preview) >= _OUTLINE_PREVIEW_LINES:
+                    break
+    except Exception:
+        logger.debug("Failed to read preview lines from %s", md_path, exc_info=True)
+    return [], preview
+
+
 class UploadsMiddlewareState(AgentState):
    """State schema for uploads middleware."""

@@ -39,12 +81,38 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
        super().__init__()
        self._paths = Paths(base_dir) if base_dir else get_paths()

+    def _format_file_entry(self, file: dict, lines: list[str]) -> None:
+        """Append a single file entry (name, size, path, optional outline) to lines."""
+        size_kb = file["size"] / 1024
+        size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
+        lines.append(f"- {file['filename']} ({size_str})")
+        lines.append(f"  Path: {file['path']}")
+        outline = file.get("outline") or []
+        if outline:
+            truncated = outline[-1].get("truncated", False)
+            visible = [e for e in outline if not e.get("truncated")]
+            lines.append("  Document outline (use `read_file` with line ranges to read sections):")
+            for entry in visible:
+                lines.append(f"    L{entry['line']}: {entry['title']}")
+            if truncated:
+                lines.append(f"    ... (showing first {len(visible)} headings; use `read_file` to explore further)")
+        else:
+            preview = file.get("outline_preview") or []
+            if preview:
+                lines.append("  No structural headings detected. Document begins with:")
+                for text in preview:
+                    lines.append(f"    > {text}")
+            lines.append("  Use `grep` to search for keywords (e.g. `grep(pattern='keyword', path='/mnt/user-data/uploads/')`).")
+        lines.append("")
+
    def _create_files_message(self, new_files: list[dict], historical_files: list[dict]) -> str:
        """Create a formatted message listing uploaded files.

        Args:
            new_files: Files uploaded in the current message.
            historical_files: Files uploaded in previous messages.
+                Each file dict may contain an optional ``outline`` key — a list of
+                ``{title, line}`` dicts extracted from the converted Markdown file.

        Returns:
            Formatted string inside <uploaded_files> tags.
@@ -55,25 +123,24 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
        lines.append("")
        if new_files:
            for file in new_files:
-                size_kb = file["size"] / 1024
-                size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
-                lines.append(f"- {file['filename']} ({size_str})")
-                lines.append(f"  Path: {file['path']}")
-                lines.append("")
+                self._format_file_entry(file, lines)
        else:
            lines.append("(empty)")
+            lines.append("")

        if historical_files:
            lines.append("The following files were uploaded in previous messages and are still available:")
            lines.append("")
            for file in historical_files:
-                size_kb = file["size"] / 1024
-                size_str = f"{size_kb:.1f} KB" if size_kb < 1024 else f"{size_kb / 1024:.1f} MB"
-                lines.append(f"- {file['filename']} ({size_str})")
-                lines.append(f"  Path: {file['path']}")
-                lines.append("")
+                self._format_file_entry(file, lines)

-        lines.append("You can read these files using the `read_file` tool with the paths shown above.")
+        lines.append("To work with these files:")
+        lines.append("- Read from the file first — use the outline line numbers and `read_file` to locate relevant sections.")
+        lines.append("- Use `grep` to search for keywords when you are not sure which section to look at")
+        lines.append("  (e.g. `grep(pattern='revenue', path='/mnt/user-data/uploads/')`).")
+        lines.append("- Use `glob` to find files by name pattern")
+        lines.append("  (e.g. `glob(pattern='**/*.md', path='/mnt/user-data/uploads/')`).")
+        lines.append("- Only fall back to web search if the file content is clearly insufficient to answer the question.")
        lines.append("</uploaded_files>")

        return "\n".join(lines)
@@ -147,6 +214,13 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):

        # Resolve uploads directory for existence checks
        thread_id = (runtime.context or {}).get("thread_id")
+        if thread_id is None:
+            try:
+                from langgraph.config import get_config
+
+                thread_id = get_config().get("configurable", {}).get("thread_id")
+            except RuntimeError:
+                pass  # get_config() raises outside a runnable context (e.g. unit tests)
        uploads_dir = self._paths.sandbox_uploads_dir(thread_id) if thread_id else None

        # Get newly uploaded files from the current message's additional_kwargs.files
@@ -159,15 +233,26 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
            for file_path in sorted(uploads_dir.iterdir()):
                if file_path.is_file() and file_path.name not in new_filenames:
                    stat = file_path.stat()
+                    outline, preview = _extract_outline_for_file(file_path)
                    historical_files.append(
                        {
                            "filename": file_path.name,
                            "size": stat.st_size,
                            "path": f"/mnt/user-data/uploads/{file_path.name}",
                            "extension": file_path.suffix,
+                            "outline": outline,
+                            "outline_preview": preview,
                        }
                    )

+        # Attach outlines to new files as well
+        if uploads_dir:
+            for file in new_files:
+                phys_path = uploads_dir / file["filename"]
+                outline, preview = _extract_outline_for_file(phys_path)
+                file["outline"] = outline
+                file["outline_preview"] = preview
+
        if not new_files and not historical_files:
            return None

@@ -117,6 +117,7 @@ class DeerFlowClient:
        subagent_enabled: bool = False,
        plan_mode: bool = False,
        agent_name: str | None = None,
+        available_skills: set[str] | None = None,
        middlewares: Sequence[AgentMiddleware] | None = None,
    ):
        """Initialize the client.
@@ -133,6 +134,7 @@ class DeerFlowClient:
            subagent_enabled: Enable subagent delegation.
            plan_mode: Enable TodoList middleware for plan mode.
            agent_name: Name of the agent to use.
+            available_skills: Optional set of skill names to make available. If None (default), all scanned skills are available.
            middlewares: Optional list of custom middlewares to inject into the agent.
        """
        if config_path is not None:
@@ -148,6 +150,7 @@ class DeerFlowClient:
        self._subagent_enabled = subagent_enabled
        self._plan_mode = plan_mode
        self._agent_name = agent_name
+        self._available_skills = set(available_skills) if available_skills is not None else None
        self._middlewares = list(middlewares) if middlewares else []

        # Lazy agent — created on first call, recreated when config changes.
@@ -208,6 +211,8 @@ class DeerFlowClient:
            cfg.get("thinking_enabled"),
            cfg.get("is_plan_mode"),
            cfg.get("subagent_enabled"),
+            self._agent_name,
+            frozenset(self._available_skills) if self._available_skills is not None else None,
        )

        if self._agent is not None and self._agent_config_key == key:
@@ -226,6 +231,7 @@ class DeerFlowClient:
                subagent_enabled=subagent_enabled,
                max_concurrent_subagents=max_concurrent_subagents,
                agent_name=self._agent_name,
+                available_skills=self._available_skills,
            ),
            "state_schema": ThreadState,
        }
@@ -7,6 +7,7 @@ import uuid
 from agent_sandbox import Sandbox as AioSandboxClient

 from deerflow.sandbox.sandbox import Sandbox
+from deerflow.sandbox.search import GrepMatch, path_matches, should_ignore_path, truncate_line

 logger = logging.getLogger(__name__)

@@ -135,6 +136,86 @@ class AioSandbox(Sandbox):
                logger.error(f"Failed to write file in sandbox: {e}")
                raise

+    def glob(self, path: str, pattern: str, *, include_dirs: bool = False, max_results: int = 200) -> tuple[list[str], bool]:
+        if not include_dirs:
+            result = self._client.file.find_files(path=path, glob=pattern)
+            files = result.data.files if result.data and result.data.files else []
+            filtered = [file_path for file_path in files if not should_ignore_path(file_path)]
+            truncated = len(filtered) > max_results
+            return filtered[:max_results], truncated
+
+        result = self._client.file.list_path(path=path, recursive=True, show_hidden=False)
+        entries = result.data.files if result.data and result.data.files else []
+        matches: list[str] = []
+        root_path = path.rstrip("/") or "/"
+        root_prefix = root_path if root_path == "/" else f"{root_path}/"
+        for entry in entries:
+            if entry.path != root_path and not entry.path.startswith(root_prefix):
+                continue
+            if should_ignore_path(entry.path):
+                continue
+            rel_path = entry.path[len(root_path) :].lstrip("/")
+            if path_matches(pattern, rel_path):
+                matches.append(entry.path)
+                if len(matches) >= max_results:
+                    return matches, True
+        return matches, False
+
+    def grep(
+        self,
+        path: str,
+        pattern: str,
+        *,
+        glob: str | None = None,
+        literal: bool = False,
+        case_sensitive: bool = False,
+        max_results: int = 100,
+    ) -> tuple[list[GrepMatch], bool]:
+        import re as _re
+
+        regex_source = _re.escape(pattern) if literal else pattern
+        # Validate the pattern locally so an invalid regex raises re.error
+        # (caught by grep_tool's except re.error handler) rather than a
+        # generic remote API error.
+        _re.compile(regex_source, 0 if case_sensitive else _re.IGNORECASE)
+        regex = regex_source if case_sensitive else f"(?i){regex_source}"
+
+        if glob is not None:
+            find_result = self._client.file.find_files(path=path, glob=glob)
+            candidate_paths = find_result.data.files if find_result.data and find_result.data.files else []
+        else:
+            list_result = self._client.file.list_path(path=path, recursive=True, show_hidden=False)
+            entries = list_result.data.files if list_result.data and list_result.data.files else []
+            candidate_paths = [entry.path for entry in entries if not entry.is_directory]
+
+        matches: list[GrepMatch] = []
+        truncated = False
+
+        for file_path in candidate_paths:
+            if should_ignore_path(file_path):
+                continue
+
+            search_result = self._client.file.search_in_file(file=file_path, regex=regex)
+            data = search_result.data
+            if data is None:
+                continue
+
+            line_numbers = data.line_numbers or []
+            matched_lines = data.matches or []
+            for line_number, line in zip(line_numbers, matched_lines):
+                matches.append(
+                    GrepMatch(
+                        path=file_path,
+                        line_number=line_number if isinstance(line_number, int) else 0,
+                        line=truncate_line(line),
+                    )
+                )
+                if len(matches) >= max_results:
+                    truncated = True
+                    return matches, truncated
+
+        return matches, truncated
+
    def update_file(self, path: str, content: bytes) -> None:
        """Update a file with binary content in the sandbox.

@@ -1,5 +1,6 @@
 import logging
 import os
+from contextvars import ContextVar
 from pathlib import Path
 from typing import Any, Self

@@ -10,15 +11,15 @@ from pydantic import BaseModel, ConfigDict, Field
 from deerflow.config.acp_config import load_acp_config_from_dict
 from deerflow.config.checkpointer_config import CheckpointerConfig, load_checkpointer_config_from_dict
 from deerflow.config.extensions_config import ExtensionsConfig
-from deerflow.config.guardrails_config import load_guardrails_config_from_dict
-from deerflow.config.memory_config import load_memory_config_from_dict
+from deerflow.config.guardrails_config import GuardrailsConfig, load_guardrails_config_from_dict
+from deerflow.config.memory_config import MemoryConfig, load_memory_config_from_dict
 from deerflow.config.model_config import ModelConfig
 from deerflow.config.sandbox_config import SandboxConfig
 from deerflow.config.skills_config import SkillsConfig
 from deerflow.config.stream_bridge_config import StreamBridgeConfig, load_stream_bridge_config_from_dict
-from deerflow.config.subagents_config import load_subagents_config_from_dict
-from deerflow.config.summarization_config import load_summarization_config_from_dict
-from deerflow.config.title_config import load_title_config_from_dict
+from deerflow.config.subagents_config import SubagentsAppConfig, load_subagents_config_from_dict
+from deerflow.config.summarization_config import SummarizationConfig, load_summarization_config_from_dict
+from deerflow.config.title_config import TitleConfig, load_title_config_from_dict
 from deerflow.config.token_usage_config import TokenUsageConfig
 from deerflow.config.tool_config import ToolConfig, ToolGroupConfig
 from deerflow.config.tool_search_config import ToolSearchConfig, load_tool_search_config_from_dict
@@ -28,6 +29,13 @@ load_dotenv()
 logger = logging.getLogger(__name__)


+def _default_config_candidates() -> tuple[Path, ...]:
+    """Return deterministic config.yaml locations without relying on cwd."""
+    backend_dir = Path(__file__).resolve().parents[4]
+    repo_root = backend_dir.parent
+    return (backend_dir / "config.yaml", repo_root / "config.yaml")
+
+
 class AppConfig(BaseModel):
    """Config for the DeerFlow application"""

@@ -40,6 +48,11 @@ class AppConfig(BaseModel):
    skills: SkillsConfig = Field(default_factory=SkillsConfig, description="Skills configuration")
    extensions: ExtensionsConfig = Field(default_factory=ExtensionsConfig, description="Extensions configuration (MCP servers and skills state)")
    tool_search: ToolSearchConfig = Field(default_factory=ToolSearchConfig, description="Tool search / deferred loading configuration")
+    title: TitleConfig = Field(default_factory=TitleConfig, description="Automatic title generation configuration")
+    summarization: SummarizationConfig = Field(default_factory=SummarizationConfig, description="Conversation summarization configuration")
+    memory: MemoryConfig = Field(default_factory=MemoryConfig, description="Memory subsystem configuration")
+    subagents: SubagentsAppConfig = Field(default_factory=SubagentsAppConfig, description="Subagent runtime configuration")
+    guardrails: GuardrailsConfig = Field(default_factory=GuardrailsConfig, description="Guardrail middleware configuration")
    model_config = ConfigDict(extra="allow", frozen=False)
    checkpointer: CheckpointerConfig | None = Field(default=None, description="Checkpointer configuration")
    stream_bridge: StreamBridgeConfig | None = Field(default=None, description="Stream bridge configuration")
@@ -51,7 +64,7 @@ class AppConfig(BaseModel):
        Priority:
        1. If provided `config_path` argument, use it.
        2. If provided `DEER_FLOW_CONFIG_PATH` environment variable, use it.
-        3. Otherwise, first check the `config.yaml` in the current directory, then fallback to `config.yaml` in the parent directory.
+        3. Otherwise, search deterministic backend/repository-root defaults from `_default_config_candidates()`.
        """
        if config_path:
            path = Path(config_path)
@@ -64,14 +77,10 @@ class AppConfig(BaseModel):
                raise FileNotFoundError(f"Config file specified by environment variable `DEER_FLOW_CONFIG_PATH` not found at {path}")
            return path
        else:
-            # Check if the config.yaml is in the current directory
-            path = Path(os.getcwd()) / "config.yaml"
-            if not path.exists():
-                # Check if the config.yaml is in the parent directory of CWD
-                path = Path(os.getcwd()).parent / "config.yaml"
-                if not path.exists():
-                    raise FileNotFoundError("`config.yaml` file not found at the current directory nor its parent directory")
-            return path
+            for path in _default_config_candidates():
+                if path.exists():
+                    return path
+            raise FileNotFoundError("`config.yaml` file not found at the default backend or repository root locations")

    @classmethod
    def from_file(cls, config_path: str | None = None) -> Self:
@@ -244,6 +253,8 @@ _app_config: AppConfig | None = None
 _app_config_path: Path | None = None
 _app_config_mtime: float | None = None
 _app_config_is_custom = False
+_current_app_config: ContextVar[AppConfig | None] = ContextVar("deerflow_current_app_config", default=None)
+_current_app_config_stack: ContextVar[tuple[AppConfig | None, ...]] = ContextVar("deerflow_current_app_config_stack", default=())


 def _get_config_mtime(config_path: Path) -> float | None:
@@ -276,6 +287,10 @@ def get_app_config() -> AppConfig:
    """
    global _app_config, _app_config_path, _app_config_mtime

+    runtime_override = _current_app_config.get()
+    if runtime_override is not None:
+        return runtime_override
+
    if _app_config is not None and _app_config_is_custom:
        return _app_config

@@ -337,3 +352,26 @@ def set_app_config(config: AppConfig) -> None:
    _app_config_path = None
    _app_config_mtime = None
    _app_config_is_custom = True
+
+
+def peek_current_app_config() -> AppConfig | None:
+    """Return the runtime-scoped AppConfig override, if one is active."""
+    return _current_app_config.get()
+
+
+def push_current_app_config(config: AppConfig) -> None:
+    """Push a runtime-scoped AppConfig override for the current execution context."""
+    stack = _current_app_config_stack.get()
+    _current_app_config_stack.set(stack + (_current_app_config.get(),))
+    _current_app_config.set(config)
+
+
+def pop_current_app_config() -> None:
+    """Pop the latest runtime-scoped AppConfig override for the current execution context."""
+    stack = _current_app_config_stack.get()
+    if not stack:
+        _current_app_config.set(None)
+        return
+    previous = stack[-1]
+    _current_app_config_stack.set(stack[:-1])
+    _current_app_config.set(previous)
@@ -80,6 +80,12 @@ class ExtensionsConfig(BaseModel):
        Args:
            config_path: Optional path to extensions config file.

+        Resolution order:
+            1. If provided `config_path` argument, use it.
+            2. If provided `DEER_FLOW_EXTENSIONS_CONFIG_PATH` environment variable, use it.
+            3. Otherwise, search backend/repository-root defaults for
+               `extensions_config.json`, then legacy `mcp_config.json`.
+
        Returns:
            Path to the extensions config file if found, otherwise None.
        """
@@ -94,24 +100,16 @@ class ExtensionsConfig(BaseModel):
                raise FileNotFoundError(f"Extensions config file specified by environment variable `DEER_FLOW_EXTENSIONS_CONFIG_PATH` not found at {path}")
            return path
        else:
-            # Check if the extensions_config.json is in the current directory
-            path = Path(os.getcwd()) / "extensions_config.json"
-            if path.exists():
-                return path
-
-            # Check if the extensions_config.json is in the parent directory of CWD
-            path = Path(os.getcwd()).parent / "extensions_config.json"
-            if path.exists():
-                return path
-
-            # Backward compatibility: check for mcp_config.json
-            path = Path(os.getcwd()) / "mcp_config.json"
-            if path.exists():
-                return path
-
-            path = Path(os.getcwd()).parent / "mcp_config.json"
-            if path.exists():
-                return path
+            backend_dir = Path(__file__).resolve().parents[4]
+            repo_root = backend_dir.parent
+            for path in (
+                backend_dir / "extensions_config.json",
+                repo_root / "extensions_config.json",
+                backend_dir / "mcp_config.json",
+                repo_root / "mcp_config.json",
+            ):
+                if path.exists():
+                    return path

            # Extensions are optional, so return None if not found
            return None
@@ -9,6 +9,12 @@ VIRTUAL_PATH_PREFIX = "/mnt/user-data"
 _SAFE_THREAD_ID_RE = re.compile(r"^[A-Za-z0-9_\-]+$")


+def _default_local_base_dir() -> Path:
+    """Return the repo-local DeerFlow state directory without relying on cwd."""
+    backend_dir = Path(__file__).resolve().parents[4]
+    return backend_dir / ".deer-flow"
+
+
 def _validate_thread_id(thread_id: str) -> str:
    """Validate a thread ID before using it in filesystem paths."""
    if not _SAFE_THREAD_ID_RE.match(thread_id):
@@ -67,8 +73,7 @@ class Paths:
    BaseDir resolution (in priority order):
        1. Constructor argument `base_dir`
        2. DEER_FLOW_HOME environment variable
-        3. Local dev fallback: cwd/.deer-flow  (when cwd is the backend/ dir)
-        4. Default: $HOME/.deer-flow
+        3. Repo-local fallback derived from this module path: `{backend_dir}/.deer-flow`
    """

    def __init__(self, base_dir: str | Path | None = None) -> None:
@@ -104,11 +109,7 @@ class Paths:
        if env_home := os.getenv("DEER_FLOW_HOME"):
            return Path(env_home).resolve()

-        cwd = Path.cwd()
-        if cwd.name == "backend" or (cwd / "pyproject.toml").exists():
-            return cwd / ".deer-flow"
-
-        return Path.home() / ".deer-flow"
+        return _default_local_base_dir()

    @property
    def memory_file(self) -> Path:
@@ -3,6 +3,11 @@ from pathlib import Path
 from pydantic import BaseModel, Field


+def _default_repo_root() -> Path:
+    """Resolve the repo root without relying on the current working directory."""
+    return Path(__file__).resolve().parents[5]
+
+
 class SkillsConfig(BaseModel):
    """Configuration for skills system"""

@@ -26,8 +31,8 @@ class SkillsConfig(BaseModel):
            # Use configured path (can be absolute or relative)
            path = Path(self.path)
            if not path.is_absolute():
-                # If relative, resolve from current working directory
-                path = Path.cwd() / path
+                # If relative, resolve from the repo root for deterministic behavior.
+                path = _default_repo_root() / path
            return path.resolve()
        else:
            # Default: ../skills relative to backend directory
@@ -15,6 +15,11 @@ class SubagentOverrideConfig(BaseModel):
        ge=1,
        description="Timeout in seconds for this subagent (None = use global default)",
    )
+    max_turns: int | None = Field(
+        default=None,
+        ge=1,
+        description="Maximum turns for this subagent (None = use global or builtin default)",
+    )


 class SubagentsAppConfig(BaseModel):
@@ -25,6 +30,11 @@ class SubagentsAppConfig(BaseModel):
        ge=1,
        description="Default timeout in seconds for all subagents (default: 900 = 15 minutes)",
    )
+    max_turns: int | None = Field(
+        default=None,
+        ge=1,
+        description="Optional default max-turn override for all subagents (None = keep builtin defaults)",
+    )
    agents: dict[str, SubagentOverrideConfig] = Field(
        default_factory=dict,
        description="Per-agent configuration overrides keyed by agent name",
@@ -44,6 +54,15 @@ class SubagentsAppConfig(BaseModel):
            return override.timeout_seconds
        return self.timeout_seconds

+    def get_max_turns_for(self, agent_name: str, builtin_default: int) -> int:
+        """Get the effective max_turns for a specific agent."""
+        override = self.agents.get(agent_name)
+        if override is not None and override.max_turns is not None:
+            return override.max_turns
+        if self.max_turns is not None:
+            return self.max_turns
+        return builtin_default
+

 _subagents_config: SubagentsAppConfig = SubagentsAppConfig()

@@ -58,8 +77,26 @@ def load_subagents_config_from_dict(config_dict: dict) -> None:
    global _subagents_config
    _subagents_config = SubagentsAppConfig(**config_dict)

-    overrides_summary = {name: f"{override.timeout_seconds}s" for name, override in _subagents_config.agents.items() if override.timeout_seconds is not None}
+    overrides_summary = {}
+    for name, override in _subagents_config.agents.items():
+        parts = []
+        if override.timeout_seconds is not None:
+            parts.append(f"timeout={override.timeout_seconds}s")
+        if override.max_turns is not None:
+            parts.append(f"max_turns={override.max_turns}")
+        if parts:
+            overrides_summary[name] = ", ".join(parts)
+
    if overrides_summary:
-        logger.info(f"Subagents config loaded: default timeout={_subagents_config.timeout_seconds}s, per-agent overrides={overrides_summary}")
+        logger.info(
+            "Subagents config loaded: default timeout=%ss, default max_turns=%s, per-agent overrides=%s",
+            _subagents_config.timeout_seconds,
+            _subagents_config.max_turns,
+            overrides_summary,
+        )
    else:
-        logger.info(f"Subagents config loaded: default timeout={_subagents_config.timeout_seconds}s, no per-agent overrides")
+        logger.info(
+            "Subagents config loaded: default timeout=%ss, default max_turns=%s, no per-agent overrides",
+            _subagents_config.timeout_seconds,
+            _subagents_config.max_turns,
+        )
@@ -25,6 +25,7 @@ class MemoryStreamBridge(StreamBridge):
        self._maxsize = queue_maxsize
        self._queues: dict[str, asyncio.Queue[StreamEvent]] = {}
        self._counters: dict[str, int] = {}
+        self._dropped_counts: dict[str, int] = {}

    # -- helpers ---------------------------------------------------------------

@@ -32,6 +33,7 @@ class MemoryStreamBridge(StreamBridge):
        if run_id not in self._queues:
            self._queues[run_id] = asyncio.Queue(maxsize=self._maxsize)
            self._counters[run_id] = 0
+            self._dropped_counts[run_id] = 0
        return self._queues[run_id]

    def _next_id(self, run_id: str) -> str:
@@ -48,14 +50,41 @@ class MemoryStreamBridge(StreamBridge):
        try:
            await asyncio.wait_for(queue.put(entry), timeout=_PUBLISH_TIMEOUT)
        except TimeoutError:
-            logger.warning("Stream bridge queue full for run %s — dropping event %s", run_id, event)
+            self._dropped_counts[run_id] = self._dropped_counts.get(run_id, 0) + 1
+            logger.warning(
+                "Stream bridge queue full for run %s — dropping event %s (total dropped: %d)",
+                run_id,
+                event,
+                self._dropped_counts[run_id],
+            )

    async def publish_end(self, run_id: str) -> None:
        queue = self._get_or_create_queue(run_id)
-        try:
-            await asyncio.wait_for(queue.put(END_SENTINEL), timeout=_PUBLISH_TIMEOUT)
-        except TimeoutError:
-            logger.warning("Stream bridge queue full for run %s — dropping END sentinel", run_id)
+
+        # END sentinel is critical — it is the only signal that allows
+        # subscribers to terminate.  If the queue is full we evict the
+        # oldest *regular* events to make room rather than dropping END,
+        # which would cause the SSE connection to hang forever and leak
+        # the queue/counter resources for this run_id.
+        if queue.full():
+            evicted = 0
+            while queue.full():
+                try:
+                    queue.get_nowait()
+                    evicted += 1
+                except asyncio.QueueEmpty:
+                    break  # pragma: no cover – defensive
+            if evicted:
+                logger.warning(
+                    "Stream bridge queue full for run %s — evicted %d event(s) to guarantee END sentinel delivery",
+                    run_id,
+                    evicted,
+                )
+
+        # After eviction the queue is guaranteed to have space, so a
+        # simple non-blocking put is safe.  We still use put() (which
+        # blocks until space is available) as a defensive measure.
+        await queue.put(END_SENTINEL)

    async def subscribe(
        self,
@@ -84,7 +113,18 @@ class MemoryStreamBridge(StreamBridge):
            await asyncio.sleep(delay)
        self._queues.pop(run_id, None)
        self._counters.pop(run_id, None)
+        self._dropped_counts.pop(run_id, None)

    async def close(self) -> None:
        self._queues.clear()
        self._counters.clear()
+        self._dropped_counts.clear()
+
+    def dropped_count(self, run_id: str) -> int:
+        """Return the number of events dropped for *run_id*."""
+        return self._dropped_counts.get(run_id, 0)
+
+    @property
+    def dropped_total(self) -> int:
+        """Return the total number of events dropped across all runs."""
+        return sum(self._dropped_counts.values())
@@ -1,72 +1,6 @@
-import fnmatch
 from pathlib import Path

-IGNORE_PATTERNS = [
-    # Version Control
-    ".git",
-    ".svn",
-    ".hg",
-    ".bzr",
-    # Dependencies
-    "node_modules",
-    "__pycache__",
-    ".venv",
-    "venv",
-    ".env",
-    "env",
-    ".tox",
-    ".nox",
-    ".eggs",
-    "*.egg-info",
-    "site-packages",
-    # Build outputs
-    "dist",
-    "build",
-    ".next",
-    ".nuxt",
-    ".output",
-    ".turbo",
-    "target",
-    "out",
-    # IDE & Editor
-    ".idea",
-    ".vscode",
-    "*.swp",
-    "*.swo",
-    "*~",
-    ".project",
-    ".classpath",
-    ".settings",
-    # OS generated
-    ".DS_Store",
-    "Thumbs.db",
-    "desktop.ini",
-    "*.lnk",
-    # Logs & temp files
-    "*.log",
-    "*.tmp",
-    "*.temp",
-    "*.bak",
-    "*.cache",
-    ".cache",
-    "logs",
-    # Coverage & test artifacts
-    ".coverage",
-    "coverage",
-    ".nyc_output",
-    "htmlcov",
-    ".pytest_cache",
-    ".mypy_cache",
-    ".ruff_cache",
-]
-
-
-def _should_ignore(name: str) -> bool:
-    """Check if a file/directory name matches any ignore pattern."""
-    for pattern in IGNORE_PATTERNS:
-        if fnmatch.fnmatch(name, pattern):
-            return True
-    return False
+from deerflow.sandbox.search import should_ignore_name


 def list_dir(path: str, max_depth: int = 2) -> list[str]:
@@ -95,7 +29,7 @@ def list_dir(path: str, max_depth: int = 2) -> list[str]:

        try:
            for item in current_path.iterdir():
-                if _should_ignore(item.name):
+                if should_ignore_name(item.name):
                    continue

                post_fix = "/" if item.is_dir() else ""
@@ -1,11 +1,23 @@
+import errno
 import ntpath
 import os
 import shutil
 import subprocess
+from dataclasses import dataclass
 from pathlib import Path

 from deerflow.sandbox.local.list_dir import list_dir
 from deerflow.sandbox.sandbox import Sandbox
+from deerflow.sandbox.search import GrepMatch, find_glob_matches, find_grep_matches
+
+
+@dataclass(frozen=True)
+class PathMapping:
+    """A path mapping from a container path to a local path with optional read-only flag."""
+
+    container_path: str
+    local_path: str
+    read_only: bool = False


 class LocalSandbox(Sandbox):
@@ -39,17 +51,42 @@ class LocalSandbox(Sandbox):

        return None

-    def __init__(self, id: str, path_mappings: dict[str, str] | None = None):
+    def __init__(self, id: str, path_mappings: list[PathMapping] | None = None):
        """
        Initialize local sandbox with optional path mappings.

        Args:
            id: Sandbox identifier
-            path_mappings: Dictionary mapping container paths to local paths
-                          Example: {"/mnt/skills": "/absolute/path/to/skills"}
+            path_mappings: List of path mappings with optional read-only flag.
+                          Skills directory is read-only by default.
        """
        super().__init__(id)
-        self.path_mappings = path_mappings or {}
+        self.path_mappings = path_mappings or []
+
+    def _is_read_only_path(self, resolved_path: str) -> bool:
+        """Check if a resolved path is under a read-only mount.
+
+        When multiple mappings match (nested mounts), prefer the most specific
+        mapping (i.e. the one whose local_path is the longest prefix of the
+        resolved path), similar to how ``_resolve_path`` handles container paths.
+        """
+        resolved = str(Path(resolved_path).resolve())
+
+        best_mapping: PathMapping | None = None
+        best_prefix_len = -1
+
+        for mapping in self.path_mappings:
+            local_resolved = str(Path(mapping.local_path).resolve())
+            if resolved == local_resolved or resolved.startswith(local_resolved + os.sep):
+                prefix_len = len(local_resolved)
+                if prefix_len > best_prefix_len:
+                    best_prefix_len = prefix_len
+                    best_mapping = mapping
+
+        if best_mapping is None:
+            return False
+
+        return best_mapping.read_only

    def _resolve_path(self, path: str) -> str:
        """
@@ -64,7 +101,9 @@ class LocalSandbox(Sandbox):
        path_str = str(path)

        # Try each mapping (longest prefix first for more specific matches)
-        for container_path, local_path in sorted(self.path_mappings.items(), key=lambda x: len(x[0]), reverse=True):
+        for mapping in sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True):
+            container_path = mapping.container_path
+            local_path = mapping.local_path
            if path_str == container_path or path_str.startswith(container_path + "/"):
                # Replace the container path prefix with local path
                relative = path_str[len(container_path) :].lstrip("/")
@@ -84,15 +123,16 @@ class LocalSandbox(Sandbox):
        Returns:
            Container path if mapping exists, otherwise original path
        """
-        path_str = str(Path(path).resolve())
+        normalized_path = path.replace("\\", "/")
+        path_str = str(Path(normalized_path).resolve())

        # Try each mapping (longest local path first for more specific matches)
-        for container_path, local_path in sorted(self.path_mappings.items(), key=lambda x: len(x[1]), reverse=True):
-            local_path_resolved = str(Path(local_path).resolve())
-            if path_str.startswith(local_path_resolved):
+        for mapping in sorted(self.path_mappings, key=lambda m: len(m.local_path), reverse=True):
+            local_path_resolved = str(Path(mapping.local_path).resolve())
+            if path_str == local_path_resolved or path_str.startswith(local_path_resolved + "/"):
                # Replace the local path prefix with container path
                relative = path_str[len(local_path_resolved) :].lstrip("/")
-                resolved = f"{container_path}/{relative}" if relative else container_path
+                resolved = f"{mapping.container_path}/{relative}" if relative else mapping.container_path
                return resolved

        # No mapping found, return original path
@@ -111,7 +151,7 @@ class LocalSandbox(Sandbox):
        import re

        # Sort mappings by local path length (longest first) for correct prefix matching
-        sorted_mappings = sorted(self.path_mappings.items(), key=lambda x: len(x[1]), reverse=True)
+        sorted_mappings = sorted(self.path_mappings, key=lambda m: len(m.local_path), reverse=True)

        if not sorted_mappings:
            return output
@@ -119,12 +159,11 @@ class LocalSandbox(Sandbox):
        # Create pattern that matches absolute paths
        # Match paths like /Users/... or other absolute paths
        result = output
-        for container_path, local_path in sorted_mappings:
-            local_path_resolved = str(Path(local_path).resolve())
+        for mapping in sorted_mappings:
            # Escape the local path for use in regex
-            escaped_local = re.escape(local_path_resolved)
-            # Match the local path followed by optional path components
-            pattern = re.compile(escaped_local + r"(?:/[^\s\"';&|<>()]*)?")
+            escaped_local = re.escape(str(Path(mapping.local_path).resolve()))
+            # Match the local path followed by optional path components with either separator
+            pattern = re.compile(escaped_local + r"(?:[/\\][^\s\"';&|<>()]*)?")

            def replace_match(match: re.Match) -> str:
                matched_path = match.group(0)
@@ -147,7 +186,7 @@ class LocalSandbox(Sandbox):
        import re

        # Sort mappings by length (longest first) for correct prefix matching
-        sorted_mappings = sorted(self.path_mappings.items(), key=lambda x: len(x[0]), reverse=True)
+        sorted_mappings = sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True)

        # Build regex pattern to match all container paths
        # Match container path followed by optional path components
@@ -157,7 +196,7 @@ class LocalSandbox(Sandbox):
        # Create pattern that matches any of the container paths.
        # The lookahead (?=/|$|...) ensures we only match at a path-segment boundary,
        # preventing /mnt/skills from matching inside /mnt/skills-extra.
-        patterns = [re.escape(container_path) + r"(?=/|$|[\s\"';&|<>()])(?:/[^\s\"';&|<>()]*)?" for container_path, _ in sorted_mappings]
+        patterns = [re.escape(m.container_path) + r"(?=/|$|[\s\"';&|<>()])(?:/[^\s\"';&|<>()]*)?" for m in sorted_mappings]
        pattern = re.compile("|".join(f"({p})" for p in patterns))

        def replace_match(match: re.Match) -> str:
@@ -248,6 +287,8 @@ class LocalSandbox(Sandbox):

    def write_file(self, path: str, content: str, append: bool = False) -> None:
        resolved_path = self._resolve_path(path)
+        if self._is_read_only_path(resolved_path):
+            raise OSError(errno.EROFS, "Read-only file system", path)
        try:
            dir_path = os.path.dirname(resolved_path)
            if dir_path:
@@ -259,8 +300,43 @@ class LocalSandbox(Sandbox):
            # Re-raise with the original path for clearer error messages, hiding internal resolved paths
            raise type(e)(e.errno, e.strerror, path) from None

+    def glob(self, path: str, pattern: str, *, include_dirs: bool = False, max_results: int = 200) -> tuple[list[str], bool]:
+        resolved_path = Path(self._resolve_path(path))
+        matches, truncated = find_glob_matches(resolved_path, pattern, include_dirs=include_dirs, max_results=max_results)
+        return [self._reverse_resolve_path(match) for match in matches], truncated
+
+    def grep(
+        self,
+        path: str,
+        pattern: str,
+        *,
+        glob: str | None = None,
+        literal: bool = False,
+        case_sensitive: bool = False,
+        max_results: int = 100,
+    ) -> tuple[list[GrepMatch], bool]:
+        resolved_path = Path(self._resolve_path(path))
+        matches, truncated = find_grep_matches(
+            resolved_path,
+            pattern,
+            glob_pattern=glob,
+            literal=literal,
+            case_sensitive=case_sensitive,
+            max_results=max_results,
+        )
+        return [
+            GrepMatch(
+                path=self._reverse_resolve_path(match.path),
+                line_number=match.line_number,
+                line=match.line,
+            )
+            for match in matches
+        ], truncated
+
    def update_file(self, path: str, content: bytes) -> None:
        resolved_path = self._resolve_path(path)
+        if self._is_read_only_path(resolved_path):
+            raise OSError(errno.EROFS, "Read-only file system", path)
        try:
            dir_path = os.path.dirname(resolved_path)
            if dir_path:
@@ -1,6 +1,7 @@
 import logging
+from pathlib import Path

-from deerflow.sandbox.local.local_sandbox import LocalSandbox
+from deerflow.sandbox.local.local_sandbox import LocalSandbox, PathMapping
 from deerflow.sandbox.sandbox import Sandbox
 from deerflow.sandbox.sandbox_provider import SandboxProvider

@@ -14,16 +15,17 @@ class LocalSandboxProvider(SandboxProvider):
        """Initialize the local sandbox provider with path mappings."""
        self._path_mappings = self._setup_path_mappings()

-    def _setup_path_mappings(self) -> dict[str, str]:
+    def _setup_path_mappings(self) -> list[PathMapping]:
        """
        Setup path mappings for local sandbox.

-        Maps container paths to actual local paths, including skills directory.
+        Maps container paths to actual local paths, including skills directory
+        and any custom mounts configured in config.yaml.

        Returns:
-            Dictionary of path mappings
+            List of path mappings
        """
-        mappings = {}
+        mappings: list[PathMapping] = []

        # Map skills container path to local skills directory
        try:
@@ -35,10 +37,63 @@ class LocalSandboxProvider(SandboxProvider):

            # Only add mapping if skills directory exists
            if skills_path.exists():
-                mappings[container_path] = str(skills_path)
+                mappings.append(
+                    PathMapping(
+                        container_path=container_path,
+                        local_path=str(skills_path),
+                        read_only=True,  # Skills directory is always read-only
+                    )
+                )
+
+            # Map custom mounts from sandbox config
+            _RESERVED_CONTAINER_PREFIXES = [container_path, "/mnt/acp-workspace", "/mnt/user-data"]
+            sandbox_config = config.sandbox
+            if sandbox_config and sandbox_config.mounts:
+                for mount in sandbox_config.mounts:
+                    host_path = Path(mount.host_path)
+                    container_path = mount.container_path.rstrip("/") or "/"
+
+                    if not host_path.is_absolute():
+                        logger.warning(
+                            "Mount host_path must be absolute, skipping: %s -> %s",
+                            mount.host_path,
+                            mount.container_path,
+                        )
+                        continue
+
+                    if not container_path.startswith("/"):
+                        logger.warning(
+                            "Mount container_path must be absolute, skipping: %s -> %s",
+                            mount.host_path,
+                            mount.container_path,
+                        )
+                        continue
+
+                    # Reject mounts that conflict with reserved container paths
+                    if any(container_path == p or container_path.startswith(p + "/") for p in _RESERVED_CONTAINER_PREFIXES):
+                        logger.warning(
+                            "Mount container_path conflicts with reserved prefix, skipping: %s",
+                            mount.container_path,
+                        )
+                        continue
+                    # Ensure the host path exists before adding mapping
+                    if host_path.exists():
+                        mappings.append(
+                            PathMapping(
+                                container_path=container_path,
+                                local_path=str(host_path.resolve()),
+                                read_only=mount.read_only,
+                            )
+                        )
+                    else:
+                        logger.warning(
+                            "Mount host_path does not exist, skipping: %s -> %s",
+                            mount.host_path,
+                            mount.container_path,
+                        )
        except Exception as e:
            # Log but don't fail if config loading fails
-            logger.warning("Could not setup skills path mapping: %s", e, exc_info=True)
+            logger.warning("Could not setup path mappings: %s", e, exc_info=True)

        return mappings

@@ -1,5 +1,7 @@
 from abc import ABC, abstractmethod

+from deerflow.sandbox.search import GrepMatch
+

 class Sandbox(ABC):
    """Abstract base class for sandbox environments"""
@@ -61,6 +63,25 @@ class Sandbox(ABC):
        """
        pass

+    @abstractmethod
+    def glob(self, path: str, pattern: str, *, include_dirs: bool = False, max_results: int = 200) -> tuple[list[str], bool]:
+        """Find paths that match a glob pattern under a root directory."""
+        pass
+
+    @abstractmethod
+    def grep(
+        self,
+        path: str,
+        pattern: str,
+        *,
+        glob: str | None = None,
+        literal: bool = False,
+        case_sensitive: bool = False,
+        max_results: int = 100,
+    ) -> tuple[list[GrepMatch], bool]:
+        """Search for matches inside text files under a directory."""
+        pass
+
    @abstractmethod
    def update_file(self, path: str, content: bytes) -> None:
        """Update a file with binary content.
@@ -0,0 +1,210 @@
+import fnmatch
+import os
+import re
+from dataclasses import dataclass
+from pathlib import Path, PurePosixPath
+
+IGNORE_PATTERNS = [
+    ".git",
+    ".svn",
+    ".hg",
+    ".bzr",
+    "node_modules",
+    "__pycache__",
+    ".venv",
+    "venv",
+    ".env",
+    "env",
+    ".tox",
+    ".nox",
+    ".eggs",
+    "*.egg-info",
+    "site-packages",
+    "dist",
+    "build",
+    ".next",
+    ".nuxt",
+    ".output",
+    ".turbo",
+    "target",
+    "out",
+    ".idea",
+    ".vscode",
+    "*.swp",
+    "*.swo",
+    "*~",
+    ".project",
+    ".classpath",
+    ".settings",
+    ".DS_Store",
+    "Thumbs.db",
+    "desktop.ini",
+    "*.lnk",
+    "*.log",
+    "*.tmp",
+    "*.temp",
+    "*.bak",
+    "*.cache",
+    ".cache",
+    "logs",
+    ".coverage",
+    "coverage",
+    ".nyc_output",
+    "htmlcov",
+    ".pytest_cache",
+    ".mypy_cache",
+    ".ruff_cache",
+]
+
+DEFAULT_MAX_FILE_SIZE_BYTES = 1_000_000
+DEFAULT_LINE_SUMMARY_LENGTH = 200
+
+
+@dataclass(frozen=True)
+class GrepMatch:
+    path: str
+    line_number: int
+    line: str
+
+
+def should_ignore_name(name: str) -> bool:
+    for pattern in IGNORE_PATTERNS:
+        if fnmatch.fnmatch(name, pattern):
+            return True
+    return False
+
+
+def should_ignore_path(path: str) -> bool:
+    return any(should_ignore_name(segment) for segment in path.replace("\\", "/").split("/") if segment)
+
+
+def path_matches(pattern: str, rel_path: str) -> bool:
+    path = PurePosixPath(rel_path)
+    if path.match(pattern):
+        return True
+    if pattern.startswith("**/"):
+        return path.match(pattern[3:])
+    return False
+
+
+def truncate_line(line: str, max_chars: int = DEFAULT_LINE_SUMMARY_LENGTH) -> str:
+    line = line.rstrip("\n\r")
+    if len(line) <= max_chars:
+        return line
+    return line[: max_chars - 3] + "..."
+
+
+def is_binary_file(path: Path, sample_size: int = 8192) -> bool:
+    try:
+        with path.open("rb") as handle:
+            return b"\0" in handle.read(sample_size)
+    except OSError:
+        return True
+
+
+def find_glob_matches(root: Path, pattern: str, *, include_dirs: bool = False, max_results: int = 200) -> tuple[list[str], bool]:
+    matches: list[str] = []
+    truncated = False
+    root = root.resolve()
+
+    if not root.exists():
+        raise FileNotFoundError(root)
+    if not root.is_dir():
+        raise NotADirectoryError(root)
+
+    for current_root, dirs, files in os.walk(root):
+        dirs[:] = [name for name in dirs if not should_ignore_name(name)]
+        # root is already resolved; os.walk builds current_root by joining under root,
+        # so relative_to() works without an extra stat()/resolve() per directory.
+        rel_dir = Path(current_root).relative_to(root)
+
+        if include_dirs:
+            for name in dirs:
+                rel_path = (rel_dir / name).as_posix()
+                if path_matches(pattern, rel_path):
+                    matches.append(str(Path(current_root) / name))
+                    if len(matches) >= max_results:
+                        truncated = True
+                        return matches, truncated
+
+        for name in files:
+            if should_ignore_name(name):
+                continue
+            rel_path = (rel_dir / name).as_posix()
+            if path_matches(pattern, rel_path):
+                matches.append(str(Path(current_root) / name))
+                if len(matches) >= max_results:
+                    truncated = True
+                    return matches, truncated
+
+    return matches, truncated
+
+
+def find_grep_matches(
+    root: Path,
+    pattern: str,
+    *,
+    glob_pattern: str | None = None,
+    literal: bool = False,
+    case_sensitive: bool = False,
+    max_results: int = 100,
+    max_file_size: int = DEFAULT_MAX_FILE_SIZE_BYTES,
+    line_summary_length: int = DEFAULT_LINE_SUMMARY_LENGTH,
+) -> tuple[list[GrepMatch], bool]:
+    matches: list[GrepMatch] = []
+    truncated = False
+    root = root.resolve()
+
+    if not root.exists():
+        raise FileNotFoundError(root)
+    if not root.is_dir():
+        raise NotADirectoryError(root)
+
+    regex_source = re.escape(pattern) if literal else pattern
+    flags = 0 if case_sensitive else re.IGNORECASE
+    regex = re.compile(regex_source, flags)
+
+    # Skip lines longer than this to prevent ReDoS on minified / no-newline files.
+    _max_line_chars = line_summary_length * 10
+
+    for current_root, dirs, files in os.walk(root):
+        dirs[:] = [name for name in dirs if not should_ignore_name(name)]
+        rel_dir = Path(current_root).relative_to(root)
+
+        for name in files:
+            if should_ignore_name(name):
+                continue
+
+            candidate_path = Path(current_root) / name
+            rel_path = (rel_dir / name).as_posix()
+
+            if glob_pattern is not None and not path_matches(glob_pattern, rel_path):
+                continue
+
+            try:
+                if candidate_path.is_symlink():
+                    continue
+                file_path = candidate_path.resolve()
+                if not file_path.is_relative_to(root):
+                    continue
+                if file_path.stat().st_size > max_file_size or is_binary_file(file_path):
+                    continue
+                with file_path.open(encoding="utf-8", errors="replace") as handle:
+                    for line_number, line in enumerate(handle, start=1):
+                        if len(line) > _max_line_chars:
+                            continue
+                        if regex.search(line):
+                            matches.append(
+                                GrepMatch(
+                                    path=str(file_path),
+                                    line_number=line_number,
+                                    line=truncate_line(line, line_summary_length),
+                                )
+                            )
+                            if len(matches) >= max_results:
+                                truncated = True
+                                return matches, truncated
+            except OSError:
+                continue
+
+    return matches, truncated
@@ -7,6 +7,7 @@ from langchain.tools import ToolRuntime, tool
 from langgraph.typing import ContextT

 from deerflow.agents.thread_state import ThreadDataState, ThreadState
+from deerflow.config import get_app_config
 from deerflow.config.paths import VIRTUAL_PATH_PREFIX
 from deerflow.sandbox.exceptions import (
    SandboxError,
@@ -16,6 +17,7 @@ from deerflow.sandbox.exceptions import (
 from deerflow.sandbox.file_operation_lock import get_file_operation_lock
 from deerflow.sandbox.sandbox import Sandbox
 from deerflow.sandbox.sandbox_provider import get_sandbox_provider
+from deerflow.sandbox.search import GrepMatch
 from deerflow.sandbox.security import LOCAL_HOST_BASH_DISABLED_MESSAGE, is_host_bash_allowed

 _ABSOLUTE_PATH_PATTERN = re.compile(r"(?<![:\w])(?<!:/)/(?:[^\s\"'`;&|<>()]+)")
@@ -31,6 +33,10 @@ _LOCAL_BASH_SYSTEM_PATH_PREFIXES = (

 _DEFAULT_SKILLS_CONTAINER_PATH = "/mnt/skills"
 _ACP_WORKSPACE_VIRTUAL_PATH = "/mnt/acp-workspace"
+_DEFAULT_GLOB_MAX_RESULTS = 200
+_MAX_GLOB_MAX_RESULTS = 1000
+_DEFAULT_GREP_MAX_RESULTS = 100
+_MAX_GREP_MAX_RESULTS = 500


 def _get_skills_container_path() -> str:
@@ -113,6 +119,54 @@ def _is_acp_workspace_path(path: str) -> bool:
    return path == _ACP_WORKSPACE_VIRTUAL_PATH or path.startswith(f"{_ACP_WORKSPACE_VIRTUAL_PATH}/")


+def _get_custom_mounts():
+    """Get custom volume mounts from sandbox config.
+
+    Result is cached after the first successful config load.  If config loading
+    fails an empty list is returned *without* caching so that a later call can
+    pick up the real value once the config is available.
+    """
+    cached = getattr(_get_custom_mounts, "_cached", None)
+    if cached is not None:
+        return cached
+    try:
+        from pathlib import Path
+
+        from deerflow.config import get_app_config
+
+        config = get_app_config()
+        mounts = []
+        if config.sandbox and config.sandbox.mounts:
+            # Only include mounts whose host_path exists, consistent with
+            # LocalSandboxProvider._setup_path_mappings() which also filters
+            # by host_path.exists().
+            mounts = [m for m in config.sandbox.mounts if Path(m.host_path).exists()]
+        _get_custom_mounts._cached = mounts  # type: ignore[attr-defined]
+        return mounts
+    except Exception:
+        # If config loading fails, return an empty list without caching so that
+        # a later call can retry once the config is available.
+        return []
+
+
+def _is_custom_mount_path(path: str) -> bool:
+    """Check if path is under a custom mount container_path."""
+    for mount in _get_custom_mounts():
+        if path == mount.container_path or path.startswith(f"{mount.container_path}/"):
+            return True
+    return False
+
+
+def _get_custom_mount_for_path(path: str):
+    """Get the mount config matching this path (longest prefix first)."""
+    best = None
+    for mount in _get_custom_mounts():
+        if path == mount.container_path or path.startswith(f"{mount.container_path}/"):
+            if best is None or len(mount.container_path) > len(best.container_path):
+                best = mount
+    return best
+
+
 def _extract_thread_id_from_thread_data(thread_data: "ThreadDataState | None") -> str | None:
    """Extract thread_id from thread_data by inspecting workspace_path.

@@ -245,16 +299,84 @@ def _get_mcp_allowed_paths() -> list[str]:
    return allowed_paths


+def _get_tool_config_int(name: str, key: str, default: int) -> int:
+    try:
+        tool_config = get_app_config().get_tool_config(name)
+        if tool_config is not None and key in tool_config.model_extra:
+            value = tool_config.model_extra.get(key)
+            if isinstance(value, int):
+                return value
+    except Exception:
+        pass
+    return default
+
+
+def _clamp_max_results(value: int, *, default: int, upper_bound: int) -> int:
+    if value <= 0:
+        return default
+    return min(value, upper_bound)
+
+
+def _resolve_max_results(name: str, requested: int, *, default: int, upper_bound: int) -> int:
+    requested_max_results = _clamp_max_results(requested, default=default, upper_bound=upper_bound)
+    configured_max_results = _clamp_max_results(
+        _get_tool_config_int(name, "max_results", default),
+        default=default,
+        upper_bound=upper_bound,
+    )
+    return min(requested_max_results, configured_max_results)
+
+
+def _resolve_local_read_path(path: str, thread_data: ThreadDataState) -> str:
+    validate_local_tool_path(path, thread_data, read_only=True)
+    if _is_skills_path(path):
+        return _resolve_skills_path(path)
+    if _is_acp_workspace_path(path):
+        return _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data))
+    return _resolve_and_validate_user_data_path(path, thread_data)
+
+
+def _format_glob_results(root_path: str, matches: list[str], truncated: bool) -> str:
+    if not matches:
+        return f"No files matched under {root_path}"
+
+    lines = [f"Found {len(matches)} paths under {root_path}"]
+    if truncated:
+        lines[0] += f" (showing first {len(matches)})"
+    lines.extend(f"{index}. {path}" for index, path in enumerate(matches, start=1))
+    if truncated:
+        lines.append("Results truncated. Narrow the path or pattern to see fewer matches.")
+    return "\n".join(lines)
+
+
+def _format_grep_results(root_path: str, matches: list[GrepMatch], truncated: bool) -> str:
+    if not matches:
+        return f"No matches found under {root_path}"
+
+    lines = [f"Found {len(matches)} matches under {root_path}"]
+    if truncated:
+        lines[0] += f" (showing first {len(matches)})"
+    lines.extend(f"{match.path}:{match.line_number}: {match.line}" for match in matches)
+    if truncated:
+        lines.append("Results truncated. Narrow the path or add a glob filter.")
+    return "\n".join(lines)
+
+
 def _path_variants(path: str) -> set[str]:
    return {path, path.replace("\\", "/"), path.replace("/", "\\")}


+def _path_separator_for_style(path: str) -> str:
+    return "\\" if "\\" in path and "/" not in path else "/"
+
+
 def _join_path_preserving_style(base: str, relative: str) -> str:
    if not relative:
        return base
-    if "/" in base and "\\" not in base:
-        return f"{base.rstrip('/')}/{relative}"
-    return str(Path(base) / relative)
+    separator = _path_separator_for_style(base)
+    normalized_relative = relative.replace("\\" if separator == "/" else "/", separator).lstrip("/\\")
+    stripped_base = base.rstrip("/\\")
+    return f"{stripped_base}{separator}{normalized_relative}"


 def _sanitize_error(error: Exception, runtime: "ToolRuntime[ContextT, ThreadState] | None" = None) -> str:
@@ -299,7 +421,10 @@ def replace_virtual_path(path: str, thread_data: ThreadDataState | None) -> str:
            return actual_base
        if path.startswith(f"{virtual_base}/"):
            rest = path[len(virtual_base) :].lstrip("/")
-            return _join_path_preserving_style(actual_base, rest)
+            result = _join_path_preserving_style(actual_base, rest)
+            if path.endswith("/") and not result.endswith(("/", "\\")):
+                result += _path_separator_for_style(actual_base)
+            return result

    return path

@@ -379,6 +504,8 @@ def mask_local_paths_in_output(output: str, thread_data: ThreadDataState | None)

            result = pattern.sub(replace_acp, result)

+    # Custom mount host paths are masked by LocalSandbox._reverse_resolve_paths_in_output()
+
    # Mask user-data host paths
    if thread_data is None:
        return result
@@ -427,6 +554,7 @@ def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *,
      - ``/mnt/user-data/*``  — always allowed (read + write)
      - ``/mnt/skills/*``     — allowed only when *read_only* is True
      - ``/mnt/acp-workspace/*`` — allowed only when *read_only* is True
+      - Custom mount paths (from config.yaml) — respects per-mount ``read_only`` flag

    Args:
        path: The virtual path to validate.
@@ -458,7 +586,14 @@ def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *,
    if path.startswith(f"{VIRTUAL_PATH_PREFIX}/"):
        return

-    raise PermissionError(f"Only paths under {VIRTUAL_PATH_PREFIX}/, {_get_skills_container_path()}/, or {_ACP_WORKSPACE_VIRTUAL_PATH}/ are allowed")
+    # Custom mount paths — respect read_only config
+    if _is_custom_mount_path(path):
+        mount = _get_custom_mount_for_path(path)
+        if mount and mount.read_only and not read_only:
+            raise PermissionError(f"Write access to read-only mount is not allowed: {path}")
+        return
+
+    raise PermissionError(f"Only paths under {VIRTUAL_PATH_PREFIX}/, {_get_skills_container_path()}/, {_ACP_WORKSPACE_VIRTUAL_PATH}/, or configured mount paths are allowed")


 def _validate_resolved_user_data_path(resolved: Path, thread_data: ThreadDataState) -> None:
@@ -508,9 +643,10 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
    boundary and must not be treated as isolation from the host filesystem.

    In local mode, commands must use virtual paths under /mnt/user-data for
-    user data access. Skills paths under /mnt/skills and ACP workspace paths
-    under /mnt/acp-workspace are allowed (path-traversal checks only; write
-    prevention for bash commands is not enforced here).
+    user data access. Skills paths under /mnt/skills, ACP workspace paths
+    under /mnt/acp-workspace, and custom mount container paths (configured in
+    config.yaml) are allowed (path-traversal checks only; write prevention
+    for bash commands is not enforced here).
    A small allowlist of common system path prefixes is kept for executable
    and device references (e.g. /bin/sh, /dev/null).
    """
@@ -545,6 +681,11 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
            _reject_path_traversal(absolute_path)
            continue

+        # Allow custom mount container paths
+        if _is_custom_mount_path(absolute_path):
+            _reject_path_traversal(absolute_path)
+            continue
+
        if any(absolute_path == prefix.rstrip("/") or absolute_path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES):
            continue

@@ -589,6 +730,8 @@ def replace_virtual_paths_in_command(command: str, thread_data: ThreadDataState

        result = acp_pattern.sub(replace_acp_match, result)

+    # Custom mount paths are resolved by LocalSandbox._resolve_paths_in_command()
+
    # Replace user-data paths
    if VIRTUAL_PATH_PREFIX in result and thread_data is not None:
        pattern = re.compile(rf"{re.escape(VIRTUAL_PATH_PREFIX)}(/[^\s\"';&|<>()]*)?")
@@ -666,7 +809,8 @@ def sandbox_from_runtime(runtime: ToolRuntime[ContextT, ThreadState] | None = No
    if sandbox is None:
        raise SandboxNotFoundError(f"Sandbox with ID '{sandbox_id}' not found", sandbox_id=sandbox_id)

-    runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for downstream use
+    if runtime.context is not None:
+        runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for downstream use
    return sandbox


@@ -701,7 +845,8 @@ def ensure_sandbox_initialized(runtime: ToolRuntime[ContextT, ThreadState] | Non
        if sandbox_id is not None:
            sandbox = get_sandbox_provider().get(sandbox_id)
            if sandbox is not None:
-                runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for releasing in after_agent
+                if runtime.context is not None:
+                    runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for releasing in after_agent
                return sandbox
            # Sandbox was released, fall through to acquire new one

@@ -723,7 +868,8 @@ def ensure_sandbox_initialized(runtime: ToolRuntime[ContextT, ThreadState] | Non
    if sandbox is None:
        raise SandboxNotFoundError("Sandbox not found after acquisition", sandbox_id=sandbox_id)

-    runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for releasing in after_agent
+    if runtime.context is not None:
+        runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for releasing in after_agent
    return sandbox


@@ -885,8 +1031,9 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
                path = _resolve_skills_path(path)
            elif _is_acp_workspace_path(path):
                path = _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data))
-            else:
+            elif not _is_custom_mount_path(path):
                path = _resolve_and_validate_user_data_path(path, thread_data)
+            # Custom mount paths are resolved by LocalSandbox._resolve_path()
        children = sandbox.list_dir(path)
        if not children:
            return "(empty)"
@@ -901,6 +1048,126 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
        return f"Error: Unexpected error listing directory: {_sanitize_error(e, runtime)}"


+@tool("glob", parse_docstring=True)
+def glob_tool(
+    runtime: ToolRuntime[ContextT, ThreadState],
+    description: str,
+    pattern: str,
+    path: str,
+    include_dirs: bool = False,
+    max_results: int = _DEFAULT_GLOB_MAX_RESULTS,
+) -> str:
+    """Find files or directories that match a glob pattern under a root directory.
+
+    Args:
+        description: Explain why you are searching for these paths in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
+        pattern: The glob pattern to match relative to the root path, for example `**/*.py`.
+        path: The **absolute** root directory to search under.
+        include_dirs: Whether matching directories should also be returned. Default is False.
+        max_results: Maximum number of paths to return. Default is 200.
+    """
+    try:
+        sandbox = ensure_sandbox_initialized(runtime)
+        ensure_thread_directories_exist(runtime)
+        requested_path = path
+        effective_max_results = _resolve_max_results(
+            "glob",
+            max_results,
+            default=_DEFAULT_GLOB_MAX_RESULTS,
+            upper_bound=_MAX_GLOB_MAX_RESULTS,
+        )
+        thread_data = None
+        if is_local_sandbox(runtime):
+            thread_data = get_thread_data(runtime)
+            if thread_data is None:
+                raise SandboxRuntimeError("Thread data not available for local sandbox")
+            path = _resolve_local_read_path(path, thread_data)
+        matches, truncated = sandbox.glob(path, pattern, include_dirs=include_dirs, max_results=effective_max_results)
+        if thread_data is not None:
+            matches = [mask_local_paths_in_output(match, thread_data) for match in matches]
+        return _format_glob_results(requested_path, matches, truncated)
+    except SandboxError as e:
+        return f"Error: {e}"
+    except FileNotFoundError:
+        return f"Error: Directory not found: {requested_path}"
+    except NotADirectoryError:
+        return f"Error: Path is not a directory: {requested_path}"
+    except PermissionError:
+        return f"Error: Permission denied: {requested_path}"
+    except Exception as e:
+        return f"Error: Unexpected error searching paths: {_sanitize_error(e, runtime)}"
+
+
+@tool("grep", parse_docstring=True)
+def grep_tool(
+    runtime: ToolRuntime[ContextT, ThreadState],
+    description: str,
+    pattern: str,
+    path: str,
+    glob: str | None = None,
+    literal: bool = False,
+    case_sensitive: bool = False,
+    max_results: int = _DEFAULT_GREP_MAX_RESULTS,
+) -> str:
+    """Search for matching lines inside text files under a root directory.
+
+    Args:
+        description: Explain why you are searching file contents in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
+        pattern: The string or regex pattern to search for.
+        path: The **absolute** root directory to search under.
+        glob: Optional glob filter for candidate files, for example `**/*.py`.
+        literal: Whether to treat `pattern` as a plain string. Default is False.
+        case_sensitive: Whether matching is case-sensitive. Default is False.
+        max_results: Maximum number of matching lines to return. Default is 100.
+    """
+    try:
+        sandbox = ensure_sandbox_initialized(runtime)
+        ensure_thread_directories_exist(runtime)
+        requested_path = path
+        effective_max_results = _resolve_max_results(
+            "grep",
+            max_results,
+            default=_DEFAULT_GREP_MAX_RESULTS,
+            upper_bound=_MAX_GREP_MAX_RESULTS,
+        )
+        thread_data = None
+        if is_local_sandbox(runtime):
+            thread_data = get_thread_data(runtime)
+            if thread_data is None:
+                raise SandboxRuntimeError("Thread data not available for local sandbox")
+            path = _resolve_local_read_path(path, thread_data)
+        matches, truncated = sandbox.grep(
+            path,
+            pattern,
+            glob=glob,
+            literal=literal,
+            case_sensitive=case_sensitive,
+            max_results=effective_max_results,
+        )
+        if thread_data is not None:
+            matches = [
+                GrepMatch(
+                    path=mask_local_paths_in_output(match.path, thread_data),
+                    line_number=match.line_number,
+                    line=match.line,
+                )
+                for match in matches
+            ]
+        return _format_grep_results(requested_path, matches, truncated)
+    except SandboxError as e:
+        return f"Error: {e}"
+    except FileNotFoundError:
+        return f"Error: Directory not found: {requested_path}"
+    except NotADirectoryError:
+        return f"Error: Path is not a directory: {requested_path}"
+    except re.error as e:
+        return f"Error: Invalid regex pattern: {e}"
+    except PermissionError:
+        return f"Error: Permission denied: {requested_path}"
+    except Exception as e:
+        return f"Error: Unexpected error searching file contents: {_sanitize_error(e, runtime)}"
+
+
@tool("read_file", parse_docstring=True)
 def read_file_tool(
    runtime: ToolRuntime[ContextT, ThreadState],
@@ -928,8 +1195,9 @@ def read_file_tool(
                path = _resolve_skills_path(path)
            elif _is_acp_workspace_path(path):
                path = _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data))
-            else:
+            elif not _is_custom_mount_path(path):
                path = _resolve_and_validate_user_data_path(path, thread_data)
+            # Custom mount paths are resolved by LocalSandbox._resolve_path()
        content = sandbox.read_file(path)
        if not content:
            return "(empty)"
@@ -977,7 +1245,9 @@ def write_file_tool(
        if is_local_sandbox(runtime):
            thread_data = get_thread_data(runtime)
            validate_local_tool_path(path, thread_data)
-            path = _resolve_and_validate_user_data_path(path, thread_data)
+            if not _is_custom_mount_path(path):
+                path = _resolve_and_validate_user_data_path(path, thread_data)
+            # Custom mount paths are resolved by LocalSandbox._resolve_path()
        with get_file_operation_lock(sandbox, path):
            sandbox.write_file(path, content, append)
        return "OK"
@@ -1019,7 +1289,9 @@ def str_replace_tool(
        if is_local_sandbox(runtime):
            thread_data = get_thread_data(runtime)
            validate_local_tool_path(path, thread_data)
-            path = _resolve_and_validate_user_data_path(path, thread_data)
+            if not _is_custom_mount_path(path):
+                path = _resolve_and_validate_user_data_path(path, thread_data)
+            # Custom mount paths are resolved by LocalSandbox._resolve_path()
        with get_file_operation_lock(sandbox, path):
            content = sandbox.read_file(path)
            if not content:
@@ -43,5 +43,5 @@ You have access to the sandbox environment:
    tools=["bash", "ls", "read_file", "write_file", "str_replace"],  # Sandbox tools only
    disallowed_tools=["task", "ask_clarification", "present_files"],
    model="inherit",
-    max_turns=30,
+    max_turns=60,
 )
@@ -44,5 +44,5 @@ You have access to the same sandbox environment as the parent agent:
    tools=None,  # Inherit all tools from parent
    disallowed_tools=["task", "ask_clarification", "present_files"],  # Prevent nesting and clarification
    model="inherit",
-    max_turns=50,
+    max_turns=100,
 )
@@ -28,9 +28,27 @@ def get_subagent_config(name: str) -> SubagentConfig | None:

    app_config = get_subagents_app_config()
    effective_timeout = app_config.get_timeout_for(name)
+    effective_max_turns = app_config.get_max_turns_for(name, config.max_turns)
+
+    overrides = {}
    if effective_timeout != config.timeout_seconds:
-        logger.debug(f"Subagent '{name}': timeout overridden by config.yaml ({config.timeout_seconds}s -> {effective_timeout}s)")
-        config = replace(config, timeout_seconds=effective_timeout)
+        logger.debug(
+            "Subagent '%s': timeout overridden by config.yaml (%ss -> %ss)",
+            name,
+            config.timeout_seconds,
+            effective_timeout,
+        )
+        overrides["timeout_seconds"] = effective_timeout
+    if effective_max_turns != config.max_turns:
+        logger.debug(
+            "Subagent '%s': max_turns overridden by config.yaml (%s -> %s)",
+            name,
+            config.max_turns,
+            effective_max_turns,
+        )
+        overrides["max_turns"] = effective_max_turns
+    if overrides:
+        config = replace(config, **overrides)

    return config

@@ -57,6 +57,42 @@ def _build_mcp_servers() -> dict[str, dict[str, Any]]:
    return build_servers_config(ExtensionsConfig.from_file())


+def _build_acp_mcp_servers() -> list[dict[str, Any]]:
+    """Build ACP ``mcpServers`` payload for ``new_session``.
+
+    The ACP client expects a list of server objects, while DeerFlow's MCP helper
+    returns a name -> config mapping for the LangChain MCP adapter. This helper
+    converts the enabled servers into the ACP wire format.
+    """
+    from deerflow.config.extensions_config import ExtensionsConfig
+
+    extensions_config = ExtensionsConfig.from_file()
+    enabled_servers = extensions_config.get_enabled_mcp_servers()
+
+    mcp_servers: list[dict[str, Any]] = []
+    for name, server_config in enabled_servers.items():
+        transport_type = server_config.type or "stdio"
+        payload: dict[str, Any] = {"name": name, "type": transport_type}
+
+        if transport_type == "stdio":
+            if not server_config.command:
+                raise ValueError(f"MCP server '{name}' with stdio transport requires 'command' field")
+            payload["command"] = server_config.command
+            payload["args"] = server_config.args
+            payload["env"] = [{"name": key, "value": value} for key, value in server_config.env.items()]
+        elif transport_type in ("http", "sse"):
+            if not server_config.url:
+                raise ValueError(f"MCP server '{name}' with {transport_type} transport requires 'url' field")
+            payload["url"] = server_config.url
+            payload["headers"] = [{"name": key, "value": value} for key, value in server_config.headers.items()]
+        else:
+            raise ValueError(f"MCP server '{name}' has unsupported transport type: {transport_type}")
+
+        mcp_servers.append(payload)
+
+    return mcp_servers
+
+
 def _build_permission_response(options: list[Any], *, auto_approve: bool) -> Any:
    """Build an ACP permission response.

@@ -173,7 +209,15 @@ def build_invoke_acp_agent_tool(agents: dict) -> BaseTool:
        cmd = agent_config.command
        args = agent_config.args or []
        physical_cwd = _get_work_dir(thread_id)
-        mcp_servers = _build_mcp_servers()
+        try:
+            mcp_servers = _build_acp_mcp_servers()
+        except ValueError as exc:
+            logger.warning(
+                "Invalid MCP server configuration for ACP agent '%s'; continuing without MCP servers: %s",
+                agent,
+                exc,
+            )
+            mcp_servers = []
        agent_env: dict[str, str] | None = None
        if agent_config.env:
            agent_env = {k: (os.environ.get(v[1:], "") if v.startswith("$") else v) for k, v in agent_config.env.items()}
@@ -1,10 +1,22 @@
 """File conversion utilities.

-Converts document files (PDF, PPT, Excel, Word) to Markdown using markitdown.
+Converts document files (PDF, PPT, Excel, Word) to Markdown.
+
+PDF conversion strategy (auto mode):
+  1. Try pymupdf4llm if installed — better heading detection, faster on most files.
+  2. If output is suspiciously short (< _MIN_CHARS_PER_PAGE chars/page, or < 200 chars
+     total when page count is unavailable), treat as image-based and fall back to MarkItDown.
+  3. If pymupdf4llm is not installed, use MarkItDown directly (existing behaviour).
+
+Large files (> ASYNC_THRESHOLD_BYTES) are converted in a thread pool via
+asyncio.to_thread() to avoid blocking the event loop (fixes #1569).
+
 No FastAPI or HTTP dependencies — pure utility functions.
 """

+import asyncio
 import logging
+import re
 from pathlib import Path

 logger = logging.getLogger(__name__)
@@ -20,28 +32,278 @@ CONVERTIBLE_EXTENSIONS = {
    ".docx",
 }

+# Files larger than this threshold are converted in a background thread.
+# Small files complete in < 1s synchronously; spawning a thread adds unnecessary
+# scheduling overhead for them.
+_ASYNC_THRESHOLD_BYTES = 1 * 1024 * 1024  # 1 MB
+
+# If pymupdf4llm produces fewer characters *per page* than this threshold,
+# the PDF is likely image-based or encrypted — fall back to MarkItDown.
+# Rationale: normal text PDFs yield 200-2000 chars/page; image-based PDFs
+# yield close to 0. 50 chars/page gives a wide safety margin.
+# Falls back to absolute 200-char check when page count is unavailable.
+_MIN_CHARS_PER_PAGE = 50
+
+
+def _pymupdf_output_too_sparse(text: str, file_path: Path) -> bool:
+    """Return True if pymupdf4llm output is suspiciously short (image-based PDF).
+
+    Uses chars-per-page rather than an absolute threshold so that both short
+    documents (few pages, few chars) and long documents (many pages, many chars)
+    are handled correctly.
+    """
+    chars = len(text.strip())
+    doc = None
+    pages: int | None = None
+    try:
+        import pymupdf
+
+        doc = pymupdf.open(str(file_path))
+        pages = len(doc)
+    except Exception:
+        pass
+    finally:
+        if doc is not None:
+            try:
+                doc.close()
+            except Exception:
+                pass
+    if pages is not None and pages > 0:
+        return (chars / pages) < _MIN_CHARS_PER_PAGE
+    # Fallback: absolute threshold when page count is unavailable
+    return chars < 200
+
+
+def _convert_pdf_with_pymupdf4llm(file_path: Path) -> str | None:
+    """Attempt PDF conversion with pymupdf4llm.
+
+    Returns the markdown text, or None if pymupdf4llm is not installed or
+    if conversion fails (e.g. encrypted/corrupt PDF).
+    """
+    try:
+        import pymupdf4llm
+    except ImportError:
+        return None
+
+    try:
+        return pymupdf4llm.to_markdown(str(file_path))
+    except Exception:
+        logger.exception("pymupdf4llm failed to convert %s; falling back to MarkItDown", file_path.name)
+        return None
+
+
+def _convert_with_markitdown(file_path: Path) -> str:
+    """Convert any supported file to markdown text using MarkItDown."""
+    from markitdown import MarkItDown
+
+    md = MarkItDown()
+    return md.convert(str(file_path)).text_content
+
+
+def _do_convert(file_path: Path, pdf_converter: str) -> str:
+    """Synchronous conversion — called directly or via asyncio.to_thread.
+
+    Args:
+        file_path: Path to the file.
+        pdf_converter: "auto" | "pymupdf4llm" | "markitdown"
+    """
+    is_pdf = file_path.suffix.lower() == ".pdf"
+
+    if is_pdf and pdf_converter != "markitdown":
+        # Try pymupdf4llm first (auto or explicit)
+        pymupdf_text = _convert_pdf_with_pymupdf4llm(file_path)
+
+        if pymupdf_text is not None:
+            # pymupdf4llm is installed
+            if pdf_converter == "pymupdf4llm":
+                # Explicit — use as-is regardless of output length
+                return pymupdf_text
+            # auto mode: fall back if output looks like a failed parse.
+            # Use chars-per-page to distinguish image-based PDFs (near 0) from
+            # legitimately short documents.
+            if not _pymupdf_output_too_sparse(pymupdf_text, file_path):
+                return pymupdf_text
+            logger.warning(
+                "pymupdf4llm produced only %d chars for %s (likely image-based PDF); falling back to MarkItDown",
+                len(pymupdf_text.strip()),
+                file_path.name,
+            )
+        # pymupdf4llm not installed or fallback triggered → use MarkItDown
+
+    return _convert_with_markitdown(file_path)
+

 async def convert_file_to_markdown(file_path: Path) -> Path | None:
-    """Convert a file to markdown using markitdown.
+    """Convert a supported document file to Markdown.
+
+    PDF files are handled with a two-converter strategy (see module docstring).
+    Large files (> 1 MB) are offloaded to a thread pool to avoid blocking the
+    event loop.

    Args:
        file_path: Path to the file to convert.

    Returns:
-        Path to the markdown file if conversion was successful, None otherwise.
+        Path to the generated .md file, or None if conversion failed.
    """
    try:
-        from markitdown import MarkItDown
+        pdf_converter = _get_pdf_converter()
+        file_size = file_path.stat().st_size

-        md = MarkItDown()
-        result = md.convert(str(file_path))
+        if file_size > _ASYNC_THRESHOLD_BYTES:
+            text = await asyncio.to_thread(_do_convert, file_path, pdf_converter)
+        else:
+            text = _do_convert(file_path, pdf_converter)

-        # Save as .md file with same name
        md_path = file_path.with_suffix(".md")
-        md_path.write_text(result.text_content, encoding="utf-8")
+        md_path.write_text(text, encoding="utf-8")

-        logger.info(f"Converted {file_path.name} to markdown: {md_path.name}")
+        logger.info("Converted %s to markdown: %s (%d chars)", file_path.name, md_path.name, len(text))
        return md_path
    except Exception as e:
-        logger.error(f"Failed to convert {file_path.name} to markdown: {e}")
+        logger.error("Failed to convert %s to markdown: %s", file_path.name, e)
        return None
+
+
+# Regex for bold-only lines that look like section headings.
+# Targets SEC filing structural headings that pymupdf4llm renders as **bold**
+# rather than # Markdown headings (because they use same font size as body text,
+# distinguished only by bold+caps formatting).
+#
+# Pattern requires ALL of:
+#   1. Entire line is a single **...** block (no surrounding prose)
+#   2. Starts with a recognised structural keyword:
+#      - ITEM / PART / SECTION (with optional number/letter after)
+#      - SCHEDULE, EXHIBIT, APPENDIX, ANNEX, CHAPTER
+#      All-caps addresses, boilerplate ("CURRENT REPORT", "SIGNATURES",
+#      "WASHINGTON, DC 20549") do NOT start with these keywords and are excluded.
+#
+# Chinese headings (第三节...) are already captured as standard # headings
+# by pymupdf4llm, so they don't need this pattern.
+_BOLD_HEADING_RE = re.compile(r"^\*\*((ITEM|PART|SECTION|SCHEDULE|EXHIBIT|APPENDIX|ANNEX|CHAPTER)\b[A-Z0-9 .,\-]*)\*\*\s*$")
+
+# Regex for split-bold headings produced by pymupdf4llm when a heading spans
+# multiple text spans in the PDF (e.g. section number and title are separate spans).
+# Matches lines like:  **1** **Introduction**  or  **3.2** **Multi-Head Attention**
+# Requirements:
+#   1. Entire line consists only of **...** blocks separated by whitespace (no prose)
+#   2. First block is a section number (digits and dots, e.g. "1", "3.2", "A.1")
+#   3. Second block must not be purely numeric/punctuation — excludes financial table
+#      headers like **2023** **2022** **2021** while allowing non-ASCII titles such as
+#      **1** **概述** or accented words (negative lookahead instead of [A-Za-z])
+#   4. At most two additional blocks (four total) with [^*]+ (no * inside) to keep
+#      the regex linear and avoid ReDoS on attacker-controlled content
+_SPLIT_BOLD_HEADING_RE = re.compile(r"^\*\*[\dA-Z][\d\.]*\*\*\s+\*\*(?!\d[\d\s.,\-–—/:()%]*\*\*)[^*]+\*\*(?:\s+\*\*[^*]+\*\*){0,2}\s*$")
+
+# Maximum number of outline entries injected into the agent context.
+# Keeps prompt size bounded even for very long documents.
+MAX_OUTLINE_ENTRIES = 50
+
+_ALLOWED_PDF_CONVERTERS = {"auto", "pymupdf4llm", "markitdown"}
+
+
+def _clean_bold_title(raw: str) -> str:
+    """Normalise a title string that may contain pymupdf4llm bold artefacts.
+
+    pymupdf4llm sometimes emits adjacent bold spans as ``**A** **B**`` instead
+    of a single ``**A B**`` block.  This helper merges those fragments and then
+    strips the outermost ``**...**`` wrapper so the caller gets plain text.
+
+    Examples::
+
+        "**Overview**"                       → "Overview"
+        "**UNITED STATES** **SECURITIES**"   → "UNITED STATES SECURITIES"
+        "plain text"                         → "plain text"  (unchanged)
+    """
+    # Merge adjacent bold spans: "** **" → " "
+    merged = re.sub(r"\*\*\s*\*\*", " ", raw).strip()
+    # Strip outermost **...** if the whole string is wrapped
+    if m := re.fullmatch(r"\*\*(.+?)\*\*", merged, re.DOTALL):
+        return m.group(1).strip()
+    return merged
+
+
+def extract_outline(md_path: Path) -> list[dict]:
+    """Extract document outline (headings) from a Markdown file.
+
+    Recognises three heading styles produced by pymupdf4llm:
+
+    1. Standard Markdown headings: lines starting with one or more '#'.
+       Inline ``**...**`` wrappers and adjacent bold spans (``** **``) are
+       cleaned so the title is plain text.
+
+    2. Bold-only structural headings: ``**ITEM 1. BUSINESS**``, ``**PART II**``,
+       etc.  SEC filings use bold+caps for section headings with the same font
+       size as body text, so pymupdf4llm cannot promote them to # headings.
+
+    3. Split-bold headings: ``**1** **Introduction**``, ``**3.2** **Attention**``.
+       pymupdf4llm emits these when the section number and title text are
+       separate spans in the underlying PDF (common in academic papers).
+
+    Args:
+        md_path: Path to the .md file.
+
+    Returns:
+        List of dicts with keys: title (str), line (int, 1-based).
+        When the outline is truncated at MAX_OUTLINE_ENTRIES, a sentinel entry
+        ``{"truncated": True}`` is appended as the last element so callers can
+        render a "showing first N headings" hint without re-scanning the file.
+        Returns an empty list if the file cannot be read or has no headings.
+    """
+    outline: list[dict] = []
+    try:
+        with md_path.open(encoding="utf-8") as f:
+            for lineno, line in enumerate(f, 1):
+                stripped = line.strip()
+                if not stripped:
+                    continue
+
+                # Style 1: standard Markdown heading
+                if stripped.startswith("#"):
+                    title = _clean_bold_title(stripped.lstrip("#").strip())
+                    if title:
+                        outline.append({"title": title, "line": lineno})
+
+                # Style 2: single bold block with SEC structural keyword
+                elif m := _BOLD_HEADING_RE.match(stripped):
+                    title = m.group(1).strip()
+                    if title:
+                        outline.append({"title": title, "line": lineno})
+
+                # Style 3: split-bold heading — **<num>** **<title>**
+                # Regex already enforces max 4 blocks and non-numeric second block.
+                elif _SPLIT_BOLD_HEADING_RE.match(stripped):
+                    title = " ".join(re.findall(r"\*\*([^*]+)\*\*", stripped))
+                    if title:
+                        outline.append({"title": title, "line": lineno})
+
+                if len(outline) >= MAX_OUTLINE_ENTRIES:
+                    outline.append({"truncated": True})
+                    break
+    except Exception:
+        return []
+
+    return outline
+
+
+def _get_pdf_converter() -> str:
+    """Read pdf_converter setting from app config, defaulting to 'auto'.
+
+    Normalizes the value to lowercase and validates it against the allowed set
+    so that values like 'AUTO' or 'MarkItDown' from config.yaml don't silently
+    fall through to unexpected behaviour.
+    """
+    try:
+        from deerflow.config.app_config import get_app_config
+
+        cfg = get_app_config()
+        uploads_cfg = getattr(cfg, "uploads", None)
+        if uploads_cfg is not None:
+            raw = str(getattr(uploads_cfg, "pdf_converter", "auto")).strip().lower()
+            if raw not in _ALLOWED_PDF_CONVERTERS:
+                logger.warning("Invalid pdf_converter value %r; falling back to 'auto'", raw)
+                return "auto"
+            return raw
+    except Exception:
+        pass
+    return "auto"
@@ -9,16 +9,17 @@ dependencies = [
    "dotenv>=0.9.9",
    "httpx>=0.28.0",
    "kubernetes>=30.0.0",
-    "langchain>=1.2.3",
+    "langchain>=1.2.3,<1.2.10",
    "langchain-anthropic>=1.3.4",
    "langchain-deepseek>=1.0.1",
    "langchain-mcp-adapters>=0.1.0",
    "langchain-openai>=1.1.7",
    "langfuse>=3.4.1",
    "langgraph>=1.0.6,<1.0.10",
+    "langgraph-prebuilt>=1.0.6,<1.0.9",
    "langgraph-api>=0.7.0,<0.8.0",
    "langgraph-cli>=0.4.14",
-    "langgraph-runtime-inmem>=0.22.1",
+    "langgraph-runtime-inmem>=0.22.1,<0.27.0",
    "markdownify>=1.2.2",
    "markitdown[all,xlsx]>=0.0.1a2",
    "pydantic>=2.12.5",
@@ -34,6 +35,9 @@ dependencies = [
    "langgraph-sdk>=0.1.51",
 ]

+[project.optional-dependencies]
+pymupdf = ["pymupdf4llm>=0.0.17"]
+
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"