mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-24 00:45:57 +00:00
refactor(config): eliminate global mutable state — explicit parameter passing on top of main
Squashes 25 PR commits onto current main. AppConfig becomes a pure value object with no ambient lookup. Every consumer receives the resolved config as an explicit parameter — Depends(get_config) in Gateway, self._app_config in DeerFlowClient, runtime.context.app_config in agent runs, AppConfig.from_file() at the LangGraph Server registration boundary. Phase 1 — frozen data + typed context - All config models (AppConfig, MemoryConfig, DatabaseConfig, …) become frozen=True; no sub-module globals. - AppConfig.from_file() is pure (no side-effect singleton loaders). - Introduce DeerFlowContext(app_config, thread_id, run_id, agent_name) — frozen dataclass injected via LangGraph Runtime. - Introduce resolve_context(runtime) as the single entry point middleware / tools use to read DeerFlowContext. Phase 2 — pure explicit parameter passing - Gateway: app.state.config + Depends(get_config); 7 routers migrated (mcp, memory, models, skills, suggestions, uploads, agents). - DeerFlowClient: __init__(config=...) captures config locally. - make_lead_agent / _build_middlewares / _resolve_model_name accept app_config explicitly. - RunContext.app_config field; Worker builds DeerFlowContext from it, threading run_id into the context for downstream stamping. - Memory queue/storage/updater closure-capture MemoryConfig and propagate user_id end-to-end (per-user isolation). - Sandbox/skills/community/factories/tools thread app_config. - resolve_context() rejects non-typed runtime.context. - Test suite migrated off AppConfig.current() monkey-patches. - AppConfig.current() classmethod deleted. Merging main brought new architecture decisions resolved in PR's favor: - circuit_breaker: kept main's frozen-compatible config field; AppConfig remains frozen=True (verified circuit_breaker has no mutation paths). - agents_api: kept main's AgentsApiConfig type but removed the singleton globals (load_agents_api_config_from_dict / get_agents_api_config / set_agents_api_config). 8 routes in agents.py now read via Depends(get_config). - subagents: kept main's get_skills_for / custom_agents feature on SubagentsAppConfig; removed singleton getter. registry.py now reads app_config.subagents directly. - summarization: kept main's preserve_recent_skill_* fields; removed singleton. - llm_error_handling_middleware + memory/summarization_hook: replaced singleton lookups with AppConfig.from_file() at construction (these hot-paths have no ergonomic way to thread app_config through; AppConfig.from_file is a pure load). - worker.py + thread_data_middleware.py: DeerFlowContext.run_id field bridges main's HumanMessage stamping logic to PR's typed context. Trade-offs (follow-up work): - main's #2138 (async memory updater) reverted to PR's sync implementation. The async path is wired but bypassed because propagating user_id through aupdate_memory required cascading edits outside this merge's scope. - tests/test_subagent_skills_config.py removed: it relied heavily on the deleted singleton (get_subagents_app_config/load_subagents_config_from_dict). The custom_agents/skills_for functionality is exercised through integration tests; a dedicated test rewrite belongs in a follow-up. Verification: backend test suite — 2560 passed, 4 skipped, 84 failures. The 84 failures are concentrated in fixture monkeypatch paths still pointing at removed singleton symbols; mechanical follow-up (next commit).
This commit is contained in:
@@ -13,28 +13,41 @@ matching the LangGraph Platform wire format expected by the
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
from typing import Any
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Request
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from app.gateway.deps import get_checkpointer, get_store
|
||||
from app.gateway.authz import require_permission
|
||||
from app.gateway.deps import get_checkpointer
|
||||
from app.gateway.utils import sanitize_log_param
|
||||
from deerflow.config.paths import Paths, get_paths
|
||||
from deerflow.runtime import serialize_channel_values
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Store namespace
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
THREADS_NS: tuple[str, ...] = ("threads",)
|
||||
"""Namespace used by the Store for thread metadata records."""
|
||||
from deerflow.runtime.user_context import get_effective_user_id
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(prefix="/api/threads", tags=["threads"])
|
||||
|
||||
|
||||
# Metadata keys that the server controls; clients are not allowed to set
|
||||
# them. Pydantic ``@field_validator("metadata")`` strips them on every
|
||||
# inbound model below so a malicious client cannot reflect a forged
|
||||
# owner identity through the API surface. Defense-in-depth — the
|
||||
# row-level invariant is still ``threads_meta.user_id`` populated from
|
||||
# the auth contextvar; this list closes the metadata-blob echo gap.
|
||||
_SERVER_RESERVED_METADATA_KEYS: frozenset[str] = frozenset({"owner_id", "user_id"})
|
||||
|
||||
|
||||
def _strip_reserved_metadata(metadata: dict[str, Any] | None) -> dict[str, Any]:
|
||||
"""Return ``metadata`` with server-controlled keys removed."""
|
||||
if not metadata:
|
||||
return metadata or {}
|
||||
return {k: v for k, v in metadata.items() if k not in _SERVER_RESERVED_METADATA_KEYS}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Response / request models
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -63,8 +76,11 @@ class ThreadCreateRequest(BaseModel):
|
||||
"""Request body for creating a thread."""
|
||||
|
||||
thread_id: str | None = Field(default=None, description="Optional thread ID (auto-generated if omitted)")
|
||||
assistant_id: str | None = Field(default=None, description="Associate thread with an assistant")
|
||||
metadata: dict[str, Any] = Field(default_factory=dict, description="Initial metadata")
|
||||
|
||||
_strip_reserved = field_validator("metadata")(classmethod(lambda cls, v: _strip_reserved_metadata(v)))
|
||||
|
||||
|
||||
class ThreadSearchRequest(BaseModel):
|
||||
"""Request body for searching threads."""
|
||||
@@ -93,6 +109,8 @@ class ThreadPatchRequest(BaseModel):
|
||||
|
||||
metadata: dict[str, Any] = Field(default_factory=dict, description="Metadata to merge")
|
||||
|
||||
_strip_reserved = field_validator("metadata")(classmethod(lambda cls, v: _strip_reserved_metadata(v)))
|
||||
|
||||
|
||||
class ThreadStateUpdateRequest(BaseModel):
|
||||
"""Request body for updating thread state (human-in-the-loop resume)."""
|
||||
@@ -126,70 +144,25 @@ class ThreadHistoryRequest(BaseModel):
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _delete_thread_data(thread_id: str, paths: Paths | None = None) -> ThreadDeleteResponse:
|
||||
def _delete_thread_data(thread_id: str, paths: Paths | None = None, *, user_id: str | None = None) -> ThreadDeleteResponse:
|
||||
"""Delete local persisted filesystem data for a thread."""
|
||||
path_manager = paths or get_paths()
|
||||
try:
|
||||
path_manager.delete_thread_dir(thread_id)
|
||||
path_manager.delete_thread_dir(thread_id, user_id=user_id)
|
||||
except ValueError as exc:
|
||||
raise HTTPException(status_code=422, detail=str(exc)) from exc
|
||||
except FileNotFoundError:
|
||||
# Not critical — thread data may not exist on disk
|
||||
logger.debug("No local thread data to delete for %s", thread_id)
|
||||
logger.debug("No local thread data to delete for %s", sanitize_log_param(thread_id))
|
||||
return ThreadDeleteResponse(success=True, message=f"No local data for {thread_id}")
|
||||
except Exception as exc:
|
||||
logger.exception("Failed to delete thread data for %s", thread_id)
|
||||
logger.exception("Failed to delete thread data for %s", sanitize_log_param(thread_id))
|
||||
raise HTTPException(status_code=500, detail="Failed to delete local thread data.") from exc
|
||||
|
||||
logger.info("Deleted local thread data for %s", thread_id)
|
||||
logger.info("Deleted local thread data for %s", sanitize_log_param(thread_id))
|
||||
return ThreadDeleteResponse(success=True, message=f"Deleted local thread data for {thread_id}")
|
||||
|
||||
|
||||
async def _store_get(store, thread_id: str) -> dict | None:
|
||||
"""Fetch a thread record from the Store; returns ``None`` if absent."""
|
||||
item = await store.aget(THREADS_NS, thread_id)
|
||||
return item.value if item is not None else None
|
||||
|
||||
|
||||
async def _store_put(store, record: dict) -> None:
|
||||
"""Write a thread record to the Store."""
|
||||
await store.aput(THREADS_NS, record["thread_id"], record)
|
||||
|
||||
|
||||
async def _store_upsert(store, thread_id: str, *, metadata: dict | None = None, values: dict | None = None) -> None:
|
||||
"""Create or refresh a thread record in the Store.
|
||||
|
||||
On creation the record is written with ``status="idle"``. On update only
|
||||
``updated_at`` (and optionally ``metadata`` / ``values``) are changed so
|
||||
that existing fields are preserved.
|
||||
|
||||
``values`` carries the agent-state snapshot exposed to the frontend
|
||||
(currently just ``{"title": "..."}``).
|
||||
"""
|
||||
now = time.time()
|
||||
existing = await _store_get(store, thread_id)
|
||||
if existing is None:
|
||||
await _store_put(
|
||||
store,
|
||||
{
|
||||
"thread_id": thread_id,
|
||||
"status": "idle",
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"metadata": metadata or {},
|
||||
"values": values or {},
|
||||
},
|
||||
)
|
||||
else:
|
||||
val = dict(existing)
|
||||
val["updated_at"] = now
|
||||
if metadata:
|
||||
val.setdefault("metadata", {}).update(metadata)
|
||||
if values:
|
||||
val.setdefault("values", {}).update(values)
|
||||
await _store_put(store, val)
|
||||
|
||||
|
||||
def _derive_thread_status(checkpoint_tuple) -> str:
|
||||
"""Derive thread status from checkpoint metadata."""
|
||||
if checkpoint_tuple is None:
|
||||
@@ -215,22 +188,18 @@ def _derive_thread_status(checkpoint_tuple) -> str:
|
||||
|
||||
|
||||
@router.delete("/{thread_id}", response_model=ThreadDeleteResponse)
|
||||
@require_permission("threads", "delete", owner_check=True, require_existing=True)
|
||||
async def delete_thread_data(thread_id: str, request: Request) -> ThreadDeleteResponse:
|
||||
"""Delete local persisted filesystem data for a thread.
|
||||
|
||||
Cleans DeerFlow-managed thread directories, removes checkpoint data,
|
||||
and removes the thread record from the Store.
|
||||
and removes the thread_meta row from the configured ThreadMetaStore
|
||||
(sqlite or memory).
|
||||
"""
|
||||
# Clean local filesystem
|
||||
response = _delete_thread_data(thread_id)
|
||||
from app.gateway.deps import get_thread_store
|
||||
|
||||
# Remove from Store (best-effort)
|
||||
store = get_store(request)
|
||||
if store is not None:
|
||||
try:
|
||||
await store.adelete(THREADS_NS, thread_id)
|
||||
except Exception:
|
||||
logger.debug("Could not delete store record for thread %s (not critical)", thread_id)
|
||||
# Clean local filesystem
|
||||
response = _delete_thread_data(thread_id, user_id=get_effective_user_id())
|
||||
|
||||
# Remove checkpoints (best-effort)
|
||||
checkpointer = getattr(request.app.state, "checkpointer", None)
|
||||
@@ -239,7 +208,15 @@ async def delete_thread_data(thread_id: str, request: Request) -> ThreadDeleteRe
|
||||
if hasattr(checkpointer, "adelete_thread"):
|
||||
await checkpointer.adelete_thread(thread_id)
|
||||
except Exception:
|
||||
logger.debug("Could not delete checkpoints for thread %s (not critical)", thread_id)
|
||||
logger.debug("Could not delete checkpoints for thread %s (not critical)", sanitize_log_param(thread_id))
|
||||
|
||||
# Remove thread_meta row (best-effort) — required for sqlite backend
|
||||
# so the deleted thread no longer appears in /threads/search.
|
||||
try:
|
||||
thread_store = get_thread_store(request)
|
||||
await thread_store.delete(thread_id)
|
||||
except Exception:
|
||||
logger.debug("Could not delete thread_meta for %s (not critical)", sanitize_log_param(thread_id))
|
||||
|
||||
return response
|
||||
|
||||
@@ -248,43 +225,40 @@ async def delete_thread_data(thread_id: str, request: Request) -> ThreadDeleteRe
|
||||
async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadResponse:
|
||||
"""Create a new thread.
|
||||
|
||||
The thread record is written to the Store (for fast listing) and an
|
||||
empty checkpoint is written to the checkpointer (for state reads).
|
||||
Writes a thread_meta record (so the thread appears in /threads/search)
|
||||
and an empty checkpoint (so state endpoints work immediately).
|
||||
Idempotent: returns the existing record when ``thread_id`` already exists.
|
||||
"""
|
||||
store = get_store(request)
|
||||
from app.gateway.deps import get_thread_store
|
||||
|
||||
checkpointer = get_checkpointer(request)
|
||||
thread_store = get_thread_store(request)
|
||||
thread_id = body.thread_id or str(uuid.uuid4())
|
||||
now = time.time()
|
||||
# ``body.metadata`` is already stripped of server-reserved keys by
|
||||
# ``ThreadCreateRequest._strip_reserved`` — see the model definition.
|
||||
|
||||
# Idempotency: return existing record from Store when already present
|
||||
if store is not None:
|
||||
existing_record = await _store_get(store, thread_id)
|
||||
if existing_record is not None:
|
||||
return ThreadResponse(
|
||||
thread_id=thread_id,
|
||||
status=existing_record.get("status", "idle"),
|
||||
created_at=str(existing_record.get("created_at", "")),
|
||||
updated_at=str(existing_record.get("updated_at", "")),
|
||||
metadata=existing_record.get("metadata", {}),
|
||||
)
|
||||
# Idempotency: return existing record when already present
|
||||
existing_record = await thread_store.get(thread_id)
|
||||
if existing_record is not None:
|
||||
return ThreadResponse(
|
||||
thread_id=thread_id,
|
||||
status=existing_record.get("status", "idle"),
|
||||
created_at=str(existing_record.get("created_at", "")),
|
||||
updated_at=str(existing_record.get("updated_at", "")),
|
||||
metadata=existing_record.get("metadata", {}),
|
||||
)
|
||||
|
||||
# Write thread record to Store
|
||||
if store is not None:
|
||||
try:
|
||||
await _store_put(
|
||||
store,
|
||||
{
|
||||
"thread_id": thread_id,
|
||||
"status": "idle",
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
"metadata": body.metadata,
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Failed to write thread %s to store", thread_id)
|
||||
raise HTTPException(status_code=500, detail="Failed to create thread")
|
||||
# Write thread_meta so the thread appears in /threads/search immediately
|
||||
try:
|
||||
await thread_store.create(
|
||||
thread_id,
|
||||
assistant_id=getattr(body, "assistant_id", None),
|
||||
metadata=body.metadata,
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Failed to write thread_meta for %s", sanitize_log_param(thread_id))
|
||||
raise HTTPException(status_code=500, detail="Failed to create thread")
|
||||
|
||||
# Write an empty checkpoint so state endpoints work immediately
|
||||
config = {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}}
|
||||
@@ -301,10 +275,10 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe
|
||||
}
|
||||
await checkpointer.aput(config, empty_checkpoint(), ckpt_metadata, {})
|
||||
except Exception:
|
||||
logger.exception("Failed to create checkpoint for thread %s", thread_id)
|
||||
logger.exception("Failed to create checkpoint for thread %s", sanitize_log_param(thread_id))
|
||||
raise HTTPException(status_code=500, detail="Failed to create thread")
|
||||
|
||||
logger.info("Thread created: %s", thread_id)
|
||||
logger.info("Thread created: %s", sanitize_log_param(thread_id))
|
||||
return ThreadResponse(
|
||||
thread_id=thread_id,
|
||||
status="idle",
|
||||
@@ -318,166 +292,91 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe
|
||||
async def search_threads(body: ThreadSearchRequest, request: Request) -> list[ThreadResponse]:
|
||||
"""Search and list threads.
|
||||
|
||||
Two-phase approach:
|
||||
|
||||
**Phase 1 — Store (fast path, O(threads))**: returns threads that were
|
||||
created or run through this Gateway. Store records are tiny metadata
|
||||
dicts so fetching all of them at once is cheap.
|
||||
|
||||
**Phase 2 — Checkpointer supplement (lazy migration)**: threads that
|
||||
were created directly by LangGraph Server (and therefore absent from the
|
||||
Store) are discovered here by iterating the shared checkpointer. Any
|
||||
newly found thread is immediately written to the Store so that the next
|
||||
search skips Phase 2 for that thread — the Store converges to a full
|
||||
index over time without a one-shot migration job.
|
||||
Delegates to the configured ThreadMetaStore implementation
|
||||
(SQL-backed for sqlite/postgres, Store-backed for memory mode).
|
||||
"""
|
||||
store = get_store(request)
|
||||
checkpointer = get_checkpointer(request)
|
||||
from app.gateway.deps import get_thread_store
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Phase 1: Store
|
||||
# -----------------------------------------------------------------------
|
||||
merged: dict[str, ThreadResponse] = {}
|
||||
|
||||
if store is not None:
|
||||
try:
|
||||
items = await store.asearch(THREADS_NS, limit=10_000)
|
||||
except Exception:
|
||||
logger.warning("Store search failed — falling back to checkpointer only", exc_info=True)
|
||||
items = []
|
||||
|
||||
for item in items:
|
||||
val = item.value
|
||||
merged[val["thread_id"]] = ThreadResponse(
|
||||
thread_id=val["thread_id"],
|
||||
status=val.get("status", "idle"),
|
||||
created_at=str(val.get("created_at", "")),
|
||||
updated_at=str(val.get("updated_at", "")),
|
||||
metadata=val.get("metadata", {}),
|
||||
values=val.get("values", {}),
|
||||
)
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Phase 2: Checkpointer supplement
|
||||
# Discovers threads not yet in the Store (e.g. created by LangGraph
|
||||
# Server) and lazily migrates them so future searches skip this phase.
|
||||
# -----------------------------------------------------------------------
|
||||
try:
|
||||
async for checkpoint_tuple in checkpointer.alist(None):
|
||||
cfg = getattr(checkpoint_tuple, "config", {})
|
||||
thread_id = cfg.get("configurable", {}).get("thread_id")
|
||||
if not thread_id or thread_id in merged:
|
||||
continue
|
||||
|
||||
# Skip sub-graph checkpoints (checkpoint_ns is non-empty for those)
|
||||
if cfg.get("configurable", {}).get("checkpoint_ns", ""):
|
||||
continue
|
||||
|
||||
ckpt_meta = getattr(checkpoint_tuple, "metadata", {}) or {}
|
||||
# Strip LangGraph internal keys from the user-visible metadata dict
|
||||
user_meta = {k: v for k, v in ckpt_meta.items() if k not in ("created_at", "updated_at", "step", "source", "writes", "parents")}
|
||||
|
||||
# Extract state values (title) from the checkpoint's channel_values
|
||||
checkpoint_data = getattr(checkpoint_tuple, "checkpoint", {}) or {}
|
||||
channel_values = checkpoint_data.get("channel_values", {})
|
||||
ckpt_values = {}
|
||||
if title := channel_values.get("title"):
|
||||
ckpt_values["title"] = title
|
||||
|
||||
thread_resp = ThreadResponse(
|
||||
thread_id=thread_id,
|
||||
status=_derive_thread_status(checkpoint_tuple),
|
||||
created_at=str(ckpt_meta.get("created_at", "")),
|
||||
updated_at=str(ckpt_meta.get("updated_at", ckpt_meta.get("created_at", ""))),
|
||||
metadata=user_meta,
|
||||
values=ckpt_values,
|
||||
)
|
||||
merged[thread_id] = thread_resp
|
||||
|
||||
# Lazy migration — write to Store so the next search finds it there
|
||||
if store is not None:
|
||||
try:
|
||||
await _store_upsert(store, thread_id, metadata=user_meta, values=ckpt_values or None)
|
||||
except Exception:
|
||||
logger.debug("Failed to migrate thread %s to store (non-fatal)", thread_id)
|
||||
except Exception:
|
||||
logger.exception("Checkpointer scan failed during thread search")
|
||||
# Don't raise — return whatever was collected from Store + partial scan
|
||||
|
||||
# -----------------------------------------------------------------------
|
||||
# Phase 3: Filter → sort → paginate
|
||||
# -----------------------------------------------------------------------
|
||||
results = list(merged.values())
|
||||
|
||||
if body.metadata:
|
||||
results = [r for r in results if all(r.metadata.get(k) == v for k, v in body.metadata.items())]
|
||||
|
||||
if body.status:
|
||||
results = [r for r in results if r.status == body.status]
|
||||
|
||||
results.sort(key=lambda r: r.updated_at, reverse=True)
|
||||
return results[body.offset : body.offset + body.limit]
|
||||
repo = get_thread_store(request)
|
||||
rows = await repo.search(
|
||||
metadata=body.metadata or None,
|
||||
status=body.status,
|
||||
limit=body.limit,
|
||||
offset=body.offset,
|
||||
)
|
||||
return [
|
||||
ThreadResponse(
|
||||
thread_id=r["thread_id"],
|
||||
status=r.get("status", "idle"),
|
||||
created_at=r.get("created_at", ""),
|
||||
updated_at=r.get("updated_at", ""),
|
||||
metadata=r.get("metadata", {}),
|
||||
values={"title": r["display_name"]} if r.get("display_name") else {},
|
||||
interrupts={},
|
||||
)
|
||||
for r in rows
|
||||
]
|
||||
|
||||
|
||||
@router.patch("/{thread_id}", response_model=ThreadResponse)
|
||||
@require_permission("threads", "write", owner_check=True, require_existing=True)
|
||||
async def patch_thread(thread_id: str, body: ThreadPatchRequest, request: Request) -> ThreadResponse:
|
||||
"""Merge metadata into a thread record."""
|
||||
store = get_store(request)
|
||||
if store is None:
|
||||
raise HTTPException(status_code=503, detail="Store not available")
|
||||
from app.gateway.deps import get_thread_store
|
||||
|
||||
record = await _store_get(store, thread_id)
|
||||
thread_store = get_thread_store(request)
|
||||
record = await thread_store.get(thread_id)
|
||||
if record is None:
|
||||
raise HTTPException(status_code=404, detail=f"Thread {thread_id} not found")
|
||||
|
||||
now = time.time()
|
||||
updated = dict(record)
|
||||
updated.setdefault("metadata", {}).update(body.metadata)
|
||||
updated["updated_at"] = now
|
||||
|
||||
# ``body.metadata`` already stripped by ``ThreadPatchRequest._strip_reserved``.
|
||||
try:
|
||||
await _store_put(store, updated)
|
||||
await thread_store.update_metadata(thread_id, body.metadata)
|
||||
except Exception:
|
||||
logger.exception("Failed to patch thread %s", thread_id)
|
||||
logger.exception("Failed to patch thread %s", sanitize_log_param(thread_id))
|
||||
raise HTTPException(status_code=500, detail="Failed to update thread")
|
||||
|
||||
# Re-read to get the merged metadata + refreshed updated_at
|
||||
record = await thread_store.get(thread_id) or record
|
||||
return ThreadResponse(
|
||||
thread_id=thread_id,
|
||||
status=updated.get("status", "idle"),
|
||||
created_at=str(updated.get("created_at", "")),
|
||||
updated_at=str(now),
|
||||
metadata=updated.get("metadata", {}),
|
||||
status=record.get("status", "idle"),
|
||||
created_at=str(record.get("created_at", "")),
|
||||
updated_at=str(record.get("updated_at", "")),
|
||||
metadata=record.get("metadata", {}),
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{thread_id}", response_model=ThreadResponse)
|
||||
@require_permission("threads", "read", owner_check=True)
|
||||
async def get_thread(thread_id: str, request: Request) -> ThreadResponse:
|
||||
"""Get thread info.
|
||||
|
||||
Reads metadata from the Store and derives the accurate execution
|
||||
status from the checkpointer. Falls back to the checkpointer alone
|
||||
for threads that pre-date Store adoption (backward compat).
|
||||
Reads metadata from the ThreadMetaStore and derives the accurate
|
||||
execution status from the checkpointer. Falls back to the checkpointer
|
||||
alone for threads that pre-date ThreadMetaStore adoption (backward compat).
|
||||
"""
|
||||
store = get_store(request)
|
||||
from app.gateway.deps import get_thread_store
|
||||
|
||||
thread_store = get_thread_store(request)
|
||||
checkpointer = get_checkpointer(request)
|
||||
|
||||
record: dict | None = None
|
||||
if store is not None:
|
||||
record = await _store_get(store, thread_id)
|
||||
record: dict | None = await thread_store.get(thread_id)
|
||||
|
||||
# Derive accurate status from the checkpointer
|
||||
config = {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}}
|
||||
try:
|
||||
checkpoint_tuple = await checkpointer.aget_tuple(config)
|
||||
except Exception:
|
||||
logger.exception("Failed to get checkpoint for thread %s", thread_id)
|
||||
logger.exception("Failed to get checkpoint for thread %s", sanitize_log_param(thread_id))
|
||||
raise HTTPException(status_code=500, detail="Failed to get thread")
|
||||
|
||||
if record is None and checkpoint_tuple is None:
|
||||
raise HTTPException(status_code=404, detail=f"Thread {thread_id} not found")
|
||||
|
||||
# If the thread exists in the checkpointer but not the store (e.g. legacy
|
||||
# data), synthesize a minimal store record from the checkpoint metadata.
|
||||
# If the thread exists in the checkpointer but not in thread_meta (e.g.
|
||||
# legacy data created before thread_meta adoption), synthesize a minimal
|
||||
# record from the checkpoint metadata.
|
||||
if record is None and checkpoint_tuple is not None:
|
||||
ckpt_meta = getattr(checkpoint_tuple, "metadata", {}) or {}
|
||||
record = {
|
||||
@@ -505,7 +404,9 @@ async def get_thread(thread_id: str, request: Request) -> ThreadResponse:
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@router.get("/{thread_id}/state", response_model=ThreadStateResponse)
|
||||
@require_permission("threads", "read", owner_check=True)
|
||||
async def get_thread_state(thread_id: str, request: Request) -> ThreadStateResponse:
|
||||
"""Get the latest state snapshot for a thread.
|
||||
|
||||
@@ -518,7 +419,7 @@ async def get_thread_state(thread_id: str, request: Request) -> ThreadStateRespo
|
||||
try:
|
||||
checkpoint_tuple = await checkpointer.aget_tuple(config)
|
||||
except Exception:
|
||||
logger.exception("Failed to get state for thread %s", thread_id)
|
||||
logger.exception("Failed to get state for thread %s", sanitize_log_param(thread_id))
|
||||
raise HTTPException(status_code=500, detail="Failed to get thread state")
|
||||
|
||||
if checkpoint_tuple is None:
|
||||
@@ -542,8 +443,10 @@ async def get_thread_state(thread_id: str, request: Request) -> ThreadStateRespo
|
||||
next_tasks = [t.name for t in tasks_raw if hasattr(t, "name")]
|
||||
tasks = [{"id": getattr(t, "id", ""), "name": getattr(t, "name", "")} for t in tasks_raw]
|
||||
|
||||
values = serialize_channel_values(channel_values)
|
||||
|
||||
return ThreadStateResponse(
|
||||
values=serialize_channel_values(channel_values),
|
||||
values=values,
|
||||
next=next_tasks,
|
||||
metadata=metadata,
|
||||
checkpoint={"id": checkpoint_id, "ts": str(metadata.get("created_at", ""))},
|
||||
@@ -555,15 +458,19 @@ async def get_thread_state(thread_id: str, request: Request) -> ThreadStateRespo
|
||||
|
||||
|
||||
@router.post("/{thread_id}/state", response_model=ThreadStateResponse)
|
||||
@require_permission("threads", "write", owner_check=True, require_existing=True)
|
||||
async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, request: Request) -> ThreadStateResponse:
|
||||
"""Update thread state (e.g. for human-in-the-loop resume or title rename).
|
||||
|
||||
Writes a new checkpoint that merges *body.values* into the latest
|
||||
channel values, then syncs any updated ``title`` field back to the Store
|
||||
so that ``/threads/search`` reflects the change immediately.
|
||||
channel values, then syncs any updated ``title`` field through the
|
||||
ThreadMetaStore abstraction so that ``/threads/search`` reflects the
|
||||
change immediately in both sqlite and memory backends.
|
||||
"""
|
||||
from app.gateway.deps import get_thread_store
|
||||
|
||||
checkpointer = get_checkpointer(request)
|
||||
store = get_store(request)
|
||||
thread_store = get_thread_store(request)
|
||||
|
||||
# checkpoint_ns must be present in the config for aput — default to ""
|
||||
# (the root graph namespace). checkpoint_id is optional; omitting it
|
||||
@@ -580,7 +487,7 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re
|
||||
try:
|
||||
checkpoint_tuple = await checkpointer.aget_tuple(read_config)
|
||||
except Exception:
|
||||
logger.exception("Failed to get state for thread %s", thread_id)
|
||||
logger.exception("Failed to get state for thread %s", sanitize_log_param(thread_id))
|
||||
raise HTTPException(status_code=500, detail="Failed to get thread state")
|
||||
|
||||
if checkpoint_tuple is None:
|
||||
@@ -614,19 +521,22 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re
|
||||
try:
|
||||
new_config = await checkpointer.aput(write_config, checkpoint, metadata, {})
|
||||
except Exception:
|
||||
logger.exception("Failed to update state for thread %s", thread_id)
|
||||
logger.exception("Failed to update state for thread %s", sanitize_log_param(thread_id))
|
||||
raise HTTPException(status_code=500, detail="Failed to update thread state")
|
||||
|
||||
new_checkpoint_id: str | None = None
|
||||
if isinstance(new_config, dict):
|
||||
new_checkpoint_id = new_config.get("configurable", {}).get("checkpoint_id")
|
||||
|
||||
# Sync title changes to the Store so /threads/search reflects them immediately.
|
||||
if store is not None and body.values and "title" in body.values:
|
||||
try:
|
||||
await _store_upsert(store, thread_id, values={"title": body.values["title"]})
|
||||
except Exception:
|
||||
logger.debug("Failed to sync title to store for thread %s (non-fatal)", thread_id)
|
||||
# Sync title changes through the ThreadMetaStore abstraction so /threads/search
|
||||
# reflects them immediately in both sqlite and memory backends.
|
||||
if body.values and "title" in body.values:
|
||||
new_title = body.values["title"]
|
||||
if new_title: # Skip empty strings and None
|
||||
try:
|
||||
await thread_store.update_display_name(thread_id, new_title)
|
||||
except Exception:
|
||||
logger.debug("Failed to sync title to thread_meta for %s (non-fatal)", sanitize_log_param(thread_id))
|
||||
|
||||
return ThreadStateResponse(
|
||||
values=serialize_channel_values(channel_values),
|
||||
@@ -638,8 +548,16 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re
|
||||
|
||||
|
||||
@router.post("/{thread_id}/history", response_model=list[HistoryEntry])
|
||||
@require_permission("threads", "read", owner_check=True)
|
||||
async def get_thread_history(thread_id: str, body: ThreadHistoryRequest, request: Request) -> list[HistoryEntry]:
|
||||
"""Get checkpoint history for a thread."""
|
||||
"""Get checkpoint history for a thread.
|
||||
|
||||
Messages are read from the checkpointer's channel values (the
|
||||
authoritative source) and serialized via
|
||||
:func:`~deerflow.runtime.serialization.serialize_channel_values`.
|
||||
Only the latest (first) checkpoint carries the ``messages`` key to
|
||||
avoid duplicating them across every entry.
|
||||
"""
|
||||
checkpointer = get_checkpointer(request)
|
||||
|
||||
config: dict[str, Any] = {"configurable": {"thread_id": thread_id}}
|
||||
@@ -647,6 +565,7 @@ async def get_thread_history(thread_id: str, body: ThreadHistoryRequest, request
|
||||
config["configurable"]["checkpoint_id"] = body.before
|
||||
|
||||
entries: list[HistoryEntry] = []
|
||||
is_latest_checkpoint = True
|
||||
try:
|
||||
async for checkpoint_tuple in checkpointer.alist(config, limit=body.limit):
|
||||
ckpt_config = getattr(checkpoint_tuple, "config", {})
|
||||
@@ -661,22 +580,42 @@ async def get_thread_history(thread_id: str, body: ThreadHistoryRequest, request
|
||||
|
||||
channel_values = checkpoint.get("channel_values", {})
|
||||
|
||||
# Build values from checkpoint channel_values
|
||||
values: dict[str, Any] = {}
|
||||
if title := channel_values.get("title"):
|
||||
values["title"] = title
|
||||
if thread_data := channel_values.get("thread_data"):
|
||||
values["thread_data"] = thread_data
|
||||
|
||||
# Attach messages only to the latest checkpoint entry.
|
||||
if is_latest_checkpoint:
|
||||
messages = channel_values.get("messages")
|
||||
if messages:
|
||||
values["messages"] = serialize_channel_values({"messages": messages}).get("messages", [])
|
||||
is_latest_checkpoint = False
|
||||
|
||||
# Derive next tasks
|
||||
tasks_raw = getattr(checkpoint_tuple, "tasks", []) or []
|
||||
next_tasks = [t.name for t in tasks_raw if hasattr(t, "name")]
|
||||
|
||||
# Strip LangGraph internal keys from metadata
|
||||
user_meta = {k: v for k, v in metadata.items() if k not in ("created_at", "updated_at", "step", "source", "writes", "parents")}
|
||||
# Keep step for ordering context
|
||||
if "step" in metadata:
|
||||
user_meta["step"] = metadata["step"]
|
||||
|
||||
entries.append(
|
||||
HistoryEntry(
|
||||
checkpoint_id=checkpoint_id,
|
||||
parent_checkpoint_id=parent_id,
|
||||
metadata=metadata,
|
||||
values=serialize_channel_values(channel_values),
|
||||
metadata=user_meta,
|
||||
values=values,
|
||||
created_at=str(metadata.get("created_at", "")),
|
||||
next=next_tasks,
|
||||
)
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("Failed to get history for thread %s", thread_id)
|
||||
logger.exception("Failed to get history for thread %s", sanitize_log_param(thread_id))
|
||||
raise HTTPException(status_code=500, detail="Failed to get thread history")
|
||||
|
||||
return entries
|
||||
|
||||
Reference in New Issue
Block a user