mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-10 01:15:58 +00:00
d9f4724950
* feat(tool-search): add hash-scoped promoted state to ThreadState * feat(tool-search): add immutable DeferredToolCatalog with stable hash * feat(tool-search): add build_deferred_tool_setup + Command-writing tool_search * refactor(tool-search): replace deferred-tool ContextVar with closures + graph state (#3272) Build the deferred catalog + tool_search tool per agent from the policy-filtered tool list (after skill allowed-tools), pass deferred_names + catalog_hash explicitly to DeferredToolFilterMiddleware and the prompt, and record promotions in ThreadState.promoted (scoped by catalog_hash) via a Command-returning tool_search. Removes DeferredToolRegistry and the _registry_var ContextVar so deferral no longer depends on build/execute sharing an async context. MCP tools are tagged with metadata[deerflow_mcp]; client.py assembles deferral the same way. Catalog is built AFTER tool-policy filtering (no policy-excluded tool can leak via tool_search) and assembly is fail-closed. Migrate tests off the deleted registry APIs; delete the obsolete ContextVar-based #2884 regression (re-covered by state-based tests in a follow-up). * test(tool-search): lock tool_search promotion into next model turn via graph state * test(tool-search): cross-context, policy-leak, fail-closed, #2884 isolation regressions * test(tool-search): align real-LLM e2e with closure-based deferred setup * docs: update DeferredToolFilterMiddleware description for closure+state design * style(tests): drop unused import in test_deferred_setup (ruff) * test(tool-search): harden merge_promoted + replace tautological catalog test From independent code review: - merge_promoted: use existing.get("catalog_hash") so a forward-incompatible or externally-injected persisted promoted dict triggers a replace instead of a KeyError crash; add regression test for the malformed-existing case. - test_deferred_catalog: replace the `== [] or True` tautology (a test that could never fail) with a deterministic invalid-regex->literal-fallback check (positive match on calc + negative empty match). - DeferredToolCatalog: comment why frozen-without-slots is required for the cached_property hash/names fields (adding slots=True would break them). * fix(tool-search): read tool_search.enabled from self._app_config in client DeerFlowClient._ensure_agent called get_app_config() directly to read tool_search.enabled, but the client already resolves and stores its config as self._app_config at construction (and uses it everywhere else). The bare call re-resolves config from disk at agent-build time, which raises FileNotFoundError in environments without a config.yaml (CI) — test_client.py's fixture only patches get_app_config during __init__, so the later call hit the real loader. Use self._app_config, matching the rest of the client. * test(tool-search): lock tool_search post-policy append ordering tool_search is appended after skill-allowlist filtering, so the allowlist can no longer deny it by name. Lock the intended contract: it only appears when allowed MCP tools survive the filter, and its catalog (derived from the already policy-filtered list) can never expose a denied tool. Addresses the ordering observation from the Copilot review on #3342.
96 lines
3.2 KiB
Python
96 lines
3.2 KiB
Python
from typing import Annotated, NotRequired, TypedDict
|
|
|
|
from langchain.agents import AgentState
|
|
|
|
|
|
class SandboxState(TypedDict):
|
|
sandbox_id: NotRequired[str | None]
|
|
|
|
|
|
class ThreadDataState(TypedDict):
|
|
workspace_path: NotRequired[str | None]
|
|
uploads_path: NotRequired[str | None]
|
|
outputs_path: NotRequired[str | None]
|
|
|
|
|
|
class ViewedImageData(TypedDict):
|
|
base64: str
|
|
mime_type: str
|
|
|
|
|
|
def merge_artifacts(existing: list[str] | None, new: list[str] | None) -> list[str]:
|
|
"""Reducer for artifacts list - merges and deduplicates artifacts."""
|
|
if existing is None:
|
|
return new or []
|
|
if new is None:
|
|
return existing
|
|
# Use dict.fromkeys to deduplicate while preserving order
|
|
return list(dict.fromkeys(existing + new))
|
|
|
|
|
|
def merge_viewed_images(existing: dict[str, ViewedImageData] | None, new: dict[str, ViewedImageData] | None) -> dict[str, ViewedImageData]:
|
|
"""Reducer for viewed_images dict - merges image dictionaries.
|
|
|
|
Special case: If new is an empty dict {}, it clears the existing images.
|
|
This allows middlewares to clear the viewed_images state after processing.
|
|
"""
|
|
if existing is None:
|
|
return new or {}
|
|
if new is None:
|
|
return existing
|
|
# Special case: empty dict means clear all viewed images
|
|
if len(new) == 0:
|
|
return {}
|
|
# Merge dictionaries, new values override existing ones for same keys
|
|
return {**existing, **new}
|
|
|
|
|
|
def merge_todos(existing: list | None, new: list | None) -> list | None:
|
|
"""Reducer for todos list - keeps the last non-None value.
|
|
|
|
Semantics:
|
|
- If `new` is None (node didn't touch todos), preserve `existing`.
|
|
- If `new` is provided (even empty list), it represents an explicit
|
|
update and wins over `existing`.
|
|
"""
|
|
if new is None:
|
|
return existing
|
|
return new
|
|
|
|
|
|
class PromotedTools(TypedDict):
|
|
catalog_hash: str
|
|
names: list[str]
|
|
|
|
|
|
def merge_promoted(existing: PromotedTools | None, new: PromotedTools | None) -> PromotedTools | None:
|
|
"""Reducer for deferred-tool promotions, scoped by catalog hash.
|
|
|
|
- new None/empty -> preserve existing (node didn't touch promotions).
|
|
- catalog_hash changed -> replace wholesale, dropping stale names (prevents a
|
|
persisted bare name from exposing a different tool after catalog drift).
|
|
- same catalog_hash -> union names, dedupe, preserve order.
|
|
"""
|
|
if not new:
|
|
return existing
|
|
if existing is None or existing.get("catalog_hash") != new["catalog_hash"]:
|
|
return {
|
|
"catalog_hash": new["catalog_hash"],
|
|
"names": list(dict.fromkeys(new["names"])),
|
|
}
|
|
return {
|
|
"catalog_hash": existing["catalog_hash"],
|
|
"names": list(dict.fromkeys(existing["names"] + new["names"])),
|
|
}
|
|
|
|
|
|
class ThreadState(AgentState):
|
|
sandbox: NotRequired[SandboxState | None]
|
|
thread_data: NotRequired[ThreadDataState | None]
|
|
title: NotRequired[str | None]
|
|
artifacts: Annotated[list[str], merge_artifacts]
|
|
todos: Annotated[list | None, merge_todos]
|
|
uploaded_files: NotRequired[list[dict] | None]
|
|
viewed_images: Annotated[dict[str, ViewedImageData], merge_viewed_images] # image_path -> {base64, mime_type}
|
|
promoted: Annotated[PromotedTools | None, merge_promoted]
|