mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-24 08:55:59 +00:00
feat(persistence): add ORM models, repositories, DB/JSONL event stores, RunJournal, and API endpoints
Phase 2-B: run persistence + event storage + token tracking. - ORM models: RunRow (with token fields), ThreadMetaRow, RunEventRow - RunRepository implements RunStore ABC via SQLAlchemy ORM - ThreadMetaRepository with owner access control - DbRunEventStore with trace content truncation and cursor pagination - JsonlRunEventStore with per-run files and seq recovery from disk - RunJournal (BaseCallbackHandler) captures LLM/tool/lifecycle events, accumulates token usage by caller type, buffers and flushes to store - RunManager now accepts optional RunStore for persistent backing - Worker creates RunJournal, writes human_message, injects callbacks - Gateway deps use factory functions (RunRepository when DB available) - New endpoints: messages, run messages, run events, token-usage - ThreadCreateRequest gains assistant_id field - 92 tests pass (33 new), zero regressions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
"""In-memory run registry."""
|
||||
"""In-memory run registry with optional persistent RunStore backing."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
@@ -7,9 +7,13 @@ import logging
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from .schemas import DisconnectMode, RunStatus
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from deerflow.runtime.runs.store.base import RunStore
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@@ -38,11 +42,17 @@ class RunRecord:
|
||||
|
||||
|
||||
class RunManager:
|
||||
"""In-memory run registry. All mutations are protected by an asyncio lock."""
|
||||
"""In-memory run registry with optional persistent RunStore backing.
|
||||
|
||||
def __init__(self) -> None:
|
||||
All mutations are protected by an asyncio lock. When a ``store`` is
|
||||
provided, serializable metadata is also persisted to the store so
|
||||
that run history survives process restarts.
|
||||
"""
|
||||
|
||||
def __init__(self, store: RunStore | None = None) -> None:
|
||||
self._runs: dict[str, RunRecord] = {}
|
||||
self._lock = asyncio.Lock()
|
||||
self._store = store
|
||||
|
||||
async def create(
|
||||
self,
|
||||
@@ -71,6 +81,20 @@ class RunManager:
|
||||
)
|
||||
async with self._lock:
|
||||
self._runs[run_id] = record
|
||||
if self._store is not None:
|
||||
try:
|
||||
await self._store.put(
|
||||
run_id,
|
||||
thread_id=thread_id,
|
||||
assistant_id=assistant_id,
|
||||
status=RunStatus.pending.value,
|
||||
multitask_strategy=multitask_strategy,
|
||||
metadata=metadata or {},
|
||||
kwargs=kwargs or {},
|
||||
created_at=now,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to persist run %s to store", run_id, exc_info=True)
|
||||
logger.info("Run created: run_id=%s thread_id=%s", run_id, thread_id)
|
||||
return record
|
||||
|
||||
@@ -96,6 +120,11 @@ class RunManager:
|
||||
record.updated_at = _now_iso()
|
||||
if error is not None:
|
||||
record.error = error
|
||||
if self._store is not None:
|
||||
try:
|
||||
await self._store.update_status(run_id, status.value, error=error)
|
||||
except Exception:
|
||||
logger.warning("Failed to persist status update for run %s", run_id, exc_info=True)
|
||||
logger.info("Run %s -> %s", run_id, status.value)
|
||||
|
||||
async def cancel(self, run_id: str, *, action: str = "interrupt") -> bool:
|
||||
@@ -185,6 +214,21 @@ class RunManager:
|
||||
)
|
||||
self._runs[run_id] = record
|
||||
|
||||
if self._store is not None:
|
||||
try:
|
||||
await self._store.put(
|
||||
run_id,
|
||||
thread_id=thread_id,
|
||||
assistant_id=assistant_id,
|
||||
status=RunStatus.pending.value,
|
||||
multitask_strategy=multitask_strategy,
|
||||
metadata=metadata or {},
|
||||
kwargs=kwargs or {},
|
||||
created_at=now,
|
||||
)
|
||||
except Exception:
|
||||
logger.warning("Failed to persist run %s to store", run_id, exc_info=True)
|
||||
|
||||
logger.info("Run created: run_id=%s thread_id=%s", run_id, thread_id)
|
||||
return record
|
||||
|
||||
|
||||
@@ -45,6 +45,8 @@ async def run_agent(
|
||||
stream_subgraphs: bool = False,
|
||||
interrupt_before: list[str] | Literal["*"] | None = None,
|
||||
interrupt_after: list[str] | Literal["*"] | None = None,
|
||||
event_store: Any | None = None,
|
||||
run_events_config: Any | None = None,
|
||||
) -> None:
|
||||
"""Execute an agent in the background, publishing events to *bridge*."""
|
||||
|
||||
@@ -52,6 +54,30 @@ async def run_agent(
|
||||
thread_id = record.thread_id
|
||||
requested_modes: set[str] = set(stream_modes or ["values"])
|
||||
|
||||
# Initialize RunJournal for event capture
|
||||
journal = None
|
||||
if event_store is not None:
|
||||
from deerflow.runtime.journal import RunJournal
|
||||
|
||||
journal = RunJournal(
|
||||
run_id=run_id,
|
||||
thread_id=thread_id,
|
||||
event_store=event_store,
|
||||
track_token_usage=getattr(run_events_config, "track_token_usage", True),
|
||||
)
|
||||
|
||||
# Write human_message event
|
||||
user_input = _extract_user_input(graph_input)
|
||||
if user_input:
|
||||
await event_store.put(
|
||||
thread_id=thread_id,
|
||||
run_id=run_id,
|
||||
event_type="human_message",
|
||||
category="message",
|
||||
content=user_input,
|
||||
)
|
||||
journal.set_first_human_message(user_input)
|
||||
|
||||
# Track whether "events" was requested but skipped
|
||||
if "events" in requested_modes:
|
||||
logger.info(
|
||||
@@ -92,6 +118,10 @@ async def run_agent(
|
||||
runtime = Runtime(context={"thread_id": thread_id}, store=store)
|
||||
config.setdefault("configurable", {})["__pregel_runtime"] = runtime
|
||||
|
||||
# Inject RunJournal as a callback
|
||||
if journal is not None:
|
||||
config.setdefault("callbacks", []).append(journal)
|
||||
|
||||
runnable_config = RunnableConfig(**config)
|
||||
agent = agent_factory(config=runnable_config)
|
||||
|
||||
@@ -206,6 +236,13 @@ async def run_agent(
|
||||
)
|
||||
|
||||
finally:
|
||||
# Flush any buffered journal events
|
||||
if journal is not None:
|
||||
try:
|
||||
await journal.flush()
|
||||
except Exception:
|
||||
logger.warning("Failed to flush journal for run %s", run_id, exc_info=True)
|
||||
|
||||
await bridge.publish_end(run_id)
|
||||
asyncio.create_task(bridge.cleanup(run_id, delay=60))
|
||||
|
||||
@@ -227,6 +264,23 @@ def _lg_mode_to_sse_event(mode: str) -> str:
|
||||
return mode
|
||||
|
||||
|
||||
def _extract_user_input(graph_input: dict) -> str:
|
||||
"""Extract user input text from graph_input for event recording."""
|
||||
messages = graph_input.get("messages")
|
||||
if not messages:
|
||||
return ""
|
||||
# Take the last message (usually the user's input)
|
||||
last = messages[-1] if isinstance(messages, list) else messages
|
||||
if isinstance(last, str):
|
||||
return last
|
||||
if hasattr(last, "content"):
|
||||
content = last.content
|
||||
return content if isinstance(content, str) else str(content)
|
||||
if isinstance(last, dict):
|
||||
return str(last.get("content", ""))
|
||||
return ""
|
||||
|
||||
|
||||
def _unpack_stream_item(
|
||||
item: Any,
|
||||
lg_modes: list[str],
|
||||
|
||||
Reference in New Issue
Block a user