feat(persistence): add ORM models, repositories, DB/JSONL event stores, RunJournal, and API endpoints

Phase 2-B: run persistence + event storage + token tracking.

- ORM models: RunRow (with token fields), ThreadMetaRow, RunEventRow
- RunRepository implements RunStore ABC via SQLAlchemy ORM
- ThreadMetaRepository with owner access control
- DbRunEventStore with trace content truncation and cursor pagination
- JsonlRunEventStore with per-run files and seq recovery from disk
- RunJournal (BaseCallbackHandler) captures LLM/tool/lifecycle events,
  accumulates token usage by caller type, buffers and flushes to store
- RunManager now accepts optional RunStore for persistent backing
- Worker creates RunJournal, writes human_message, injects callbacks
- Gateway deps use factory functions (RunRepository when DB available)
- New endpoints: messages, run messages, run events, token-usage
- ThreadCreateRequest gains assistant_id field
- 92 tests pass (33 new), zero regressions

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
rayhpeng
2026-04-02 19:03:38 +08:00
parent 23eacf9533
commit e3179cd54d
21 changed files with 1946 additions and 29 deletions
@@ -1,4 +1,4 @@
"""In-memory run registry."""
"""In-memory run registry with optional persistent RunStore backing."""
from __future__ import annotations
@@ -7,9 +7,13 @@ import logging
import uuid
from dataclasses import dataclass, field
from datetime import UTC, datetime
from typing import TYPE_CHECKING
from .schemas import DisconnectMode, RunStatus
if TYPE_CHECKING:
from deerflow.runtime.runs.store.base import RunStore
logger = logging.getLogger(__name__)
@@ -38,11 +42,17 @@ class RunRecord:
class RunManager:
"""In-memory run registry. All mutations are protected by an asyncio lock."""
"""In-memory run registry with optional persistent RunStore backing.
def __init__(self) -> None:
All mutations are protected by an asyncio lock. When a ``store`` is
provided, serializable metadata is also persisted to the store so
that run history survives process restarts.
"""
def __init__(self, store: RunStore | None = None) -> None:
self._runs: dict[str, RunRecord] = {}
self._lock = asyncio.Lock()
self._store = store
async def create(
self,
@@ -71,6 +81,20 @@ class RunManager:
)
async with self._lock:
self._runs[run_id] = record
if self._store is not None:
try:
await self._store.put(
run_id,
thread_id=thread_id,
assistant_id=assistant_id,
status=RunStatus.pending.value,
multitask_strategy=multitask_strategy,
metadata=metadata or {},
kwargs=kwargs or {},
created_at=now,
)
except Exception:
logger.warning("Failed to persist run %s to store", run_id, exc_info=True)
logger.info("Run created: run_id=%s thread_id=%s", run_id, thread_id)
return record
@@ -96,6 +120,11 @@ class RunManager:
record.updated_at = _now_iso()
if error is not None:
record.error = error
if self._store is not None:
try:
await self._store.update_status(run_id, status.value, error=error)
except Exception:
logger.warning("Failed to persist status update for run %s", run_id, exc_info=True)
logger.info("Run %s -> %s", run_id, status.value)
async def cancel(self, run_id: str, *, action: str = "interrupt") -> bool:
@@ -185,6 +214,21 @@ class RunManager:
)
self._runs[run_id] = record
if self._store is not None:
try:
await self._store.put(
run_id,
thread_id=thread_id,
assistant_id=assistant_id,
status=RunStatus.pending.value,
multitask_strategy=multitask_strategy,
metadata=metadata or {},
kwargs=kwargs or {},
created_at=now,
)
except Exception:
logger.warning("Failed to persist run %s to store", run_id, exc_info=True)
logger.info("Run created: run_id=%s thread_id=%s", run_id, thread_id)
return record
@@ -45,6 +45,8 @@ async def run_agent(
stream_subgraphs: bool = False,
interrupt_before: list[str] | Literal["*"] | None = None,
interrupt_after: list[str] | Literal["*"] | None = None,
event_store: Any | None = None,
run_events_config: Any | None = None,
) -> None:
"""Execute an agent in the background, publishing events to *bridge*."""
@@ -52,6 +54,30 @@ async def run_agent(
thread_id = record.thread_id
requested_modes: set[str] = set(stream_modes or ["values"])
# Initialize RunJournal for event capture
journal = None
if event_store is not None:
from deerflow.runtime.journal import RunJournal
journal = RunJournal(
run_id=run_id,
thread_id=thread_id,
event_store=event_store,
track_token_usage=getattr(run_events_config, "track_token_usage", True),
)
# Write human_message event
user_input = _extract_user_input(graph_input)
if user_input:
await event_store.put(
thread_id=thread_id,
run_id=run_id,
event_type="human_message",
category="message",
content=user_input,
)
journal.set_first_human_message(user_input)
# Track whether "events" was requested but skipped
if "events" in requested_modes:
logger.info(
@@ -92,6 +118,10 @@ async def run_agent(
runtime = Runtime(context={"thread_id": thread_id}, store=store)
config.setdefault("configurable", {})["__pregel_runtime"] = runtime
# Inject RunJournal as a callback
if journal is not None:
config.setdefault("callbacks", []).append(journal)
runnable_config = RunnableConfig(**config)
agent = agent_factory(config=runnable_config)
@@ -206,6 +236,13 @@ async def run_agent(
)
finally:
# Flush any buffered journal events
if journal is not None:
try:
await journal.flush()
except Exception:
logger.warning("Failed to flush journal for run %s", run_id, exc_info=True)
await bridge.publish_end(run_id)
asyncio.create_task(bridge.cleanup(run_id, delay=60))
@@ -227,6 +264,23 @@ def _lg_mode_to_sse_event(mode: str) -> str:
return mode
def _extract_user_input(graph_input: dict) -> str:
"""Extract user input text from graph_input for event recording."""
messages = graph_input.get("messages")
if not messages:
return ""
# Take the last message (usually the user's input)
last = messages[-1] if isinstance(messages, list) else messages
if isinstance(last, str):
return last
if hasattr(last, "content"):
content = last.content
return content if isinstance(content, str) else str(content)
if isinstance(last, dict):
return str(last.get("content", ""))
return ""
def _unpack_stream_item(
item: Any,
lg_modes: list[str],