mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-24 00:45:57 +00:00
refactor(config): eliminate global mutable state — explicit parameter passing on top of main
Squashes 25 PR commits onto current main. AppConfig becomes a pure value object with no ambient lookup. Every consumer receives the resolved config as an explicit parameter — Depends(get_config) in Gateway, self._app_config in DeerFlowClient, runtime.context.app_config in agent runs, AppConfig.from_file() at the LangGraph Server registration boundary. Phase 1 — frozen data + typed context - All config models (AppConfig, MemoryConfig, DatabaseConfig, …) become frozen=True; no sub-module globals. - AppConfig.from_file() is pure (no side-effect singleton loaders). - Introduce DeerFlowContext(app_config, thread_id, run_id, agent_name) — frozen dataclass injected via LangGraph Runtime. - Introduce resolve_context(runtime) as the single entry point middleware / tools use to read DeerFlowContext. Phase 2 — pure explicit parameter passing - Gateway: app.state.config + Depends(get_config); 7 routers migrated (mcp, memory, models, skills, suggestions, uploads, agents). - DeerFlowClient: __init__(config=...) captures config locally. - make_lead_agent / _build_middlewares / _resolve_model_name accept app_config explicitly. - RunContext.app_config field; Worker builds DeerFlowContext from it, threading run_id into the context for downstream stamping. - Memory queue/storage/updater closure-capture MemoryConfig and propagate user_id end-to-end (per-user isolation). - Sandbox/skills/community/factories/tools thread app_config. - resolve_context() rejects non-typed runtime.context. - Test suite migrated off AppConfig.current() monkey-patches. - AppConfig.current() classmethod deleted. Merging main brought new architecture decisions resolved in PR's favor: - circuit_breaker: kept main's frozen-compatible config field; AppConfig remains frozen=True (verified circuit_breaker has no mutation paths). - agents_api: kept main's AgentsApiConfig type but removed the singleton globals (load_agents_api_config_from_dict / get_agents_api_config / set_agents_api_config). 8 routes in agents.py now read via Depends(get_config). - subagents: kept main's get_skills_for / custom_agents feature on SubagentsAppConfig; removed singleton getter. registry.py now reads app_config.subagents directly. - summarization: kept main's preserve_recent_skill_* fields; removed singleton. - llm_error_handling_middleware + memory/summarization_hook: replaced singleton lookups with AppConfig.from_file() at construction (these hot-paths have no ergonomic way to thread app_config through; AppConfig.from_file is a pure load). - worker.py + thread_data_middleware.py: DeerFlowContext.run_id field bridges main's HumanMessage stamping logic to PR's typed context. Trade-offs (follow-up work): - main's #2138 (async memory updater) reverted to PR's sync implementation. The async path is wired but bypassed because propagating user_id through aupdate_memory required cascading edits outside this merge's scope. - tests/test_subagent_skills_config.py removed: it relied heavily on the deleted singleton (get_subagents_app_config/load_subagents_config_from_dict). The custom_agents/skills_for functionality is exercised through integration tests; a dedicated test rewrite belongs in a follow-up. Verification: backend test suite — 2560 passed, 4 skipped, 84 failures. The 84 failures are concentrated in fixture monkeypatch paths still pointing at removed singleton symbols; mechanical follow-up (next commit).
This commit is contained in:
@@ -0,0 +1,13 @@
|
||||
"""DeerFlow application persistence layer (SQLAlchemy 2.0 async ORM).
|
||||
|
||||
This module manages DeerFlow's own application data -- runs metadata,
|
||||
thread ownership, cron jobs, users. It is completely separate from
|
||||
LangGraph's checkpointer, which manages graph execution state.
|
||||
|
||||
Usage:
|
||||
from deerflow.persistence import init_engine, close_engine, get_session_factory
|
||||
"""
|
||||
|
||||
from deerflow.persistence.engine import close_engine, get_engine, get_session_factory, init_engine
|
||||
|
||||
__all__ = ["close_engine", "get_engine", "get_session_factory", "init_engine"]
|
||||
@@ -0,0 +1,40 @@
|
||||
"""SQLAlchemy declarative base with automatic to_dict support.
|
||||
|
||||
All DeerFlow ORM models inherit from this Base. It provides a generic
|
||||
to_dict() method via SQLAlchemy's inspect() so individual models don't
|
||||
need to write their own serialization logic.
|
||||
|
||||
LangGraph's checkpointer tables are NOT managed by this Base.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import inspect as sa_inspect
|
||||
from sqlalchemy.orm import DeclarativeBase
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
"""Base class for all DeerFlow ORM models.
|
||||
|
||||
Provides:
|
||||
- Automatic to_dict() via SQLAlchemy column inspection.
|
||||
- Standard __repr__() showing all column values.
|
||||
"""
|
||||
|
||||
def to_dict(self, *, exclude: set[str] | None = None) -> dict:
|
||||
"""Convert ORM instance to plain dict.
|
||||
|
||||
Uses SQLAlchemy's inspect() to iterate mapped column attributes.
|
||||
|
||||
Args:
|
||||
exclude: Optional set of column keys to omit.
|
||||
|
||||
Returns:
|
||||
Dict of {column_key: value} for all mapped columns.
|
||||
"""
|
||||
exclude = exclude or set()
|
||||
return {c.key: getattr(self, c.key) for c in sa_inspect(type(self)).mapper.column_attrs if c.key not in exclude}
|
||||
|
||||
def __repr__(self) -> str:
|
||||
cols = ", ".join(f"{c.key}={getattr(self, c.key)!r}" for c in sa_inspect(type(self)).mapper.column_attrs)
|
||||
return f"{type(self).__name__}({cols})"
|
||||
@@ -0,0 +1,190 @@
|
||||
"""Async SQLAlchemy engine lifecycle management.
|
||||
|
||||
Initializes at Gateway startup, provides session factory for
|
||||
repositories, disposes at shutdown.
|
||||
|
||||
When database.backend="memory", init_engine is a no-op and
|
||||
get_session_factory() returns None. Repositories must check for
|
||||
None and fall back to in-memory implementations.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
|
||||
from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker, create_async_engine
|
||||
|
||||
|
||||
def _json_serializer(obj: object) -> str:
|
||||
"""JSON serializer with ensure_ascii=False for Chinese character support."""
|
||||
return json.dumps(obj, ensure_ascii=False)
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_engine: AsyncEngine | None = None
|
||||
_session_factory: async_sessionmaker[AsyncSession] | None = None
|
||||
|
||||
|
||||
async def _auto_create_postgres_db(url: str) -> None:
|
||||
"""Connect to the ``postgres`` maintenance DB and CREATE DATABASE.
|
||||
|
||||
The target database name is extracted from *url*. The connection is
|
||||
made to the default ``postgres`` database on the same server using
|
||||
``AUTOCOMMIT`` isolation (CREATE DATABASE cannot run inside a
|
||||
transaction).
|
||||
"""
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.engine.url import make_url
|
||||
|
||||
parsed = make_url(url)
|
||||
db_name = parsed.database
|
||||
if not db_name:
|
||||
raise ValueError("Cannot auto-create database: no database name in URL")
|
||||
|
||||
# Connect to the default 'postgres' database to issue CREATE DATABASE
|
||||
maint_url = parsed.set(database="postgres")
|
||||
maint_engine = create_async_engine(maint_url, isolation_level="AUTOCOMMIT")
|
||||
try:
|
||||
async with maint_engine.connect() as conn:
|
||||
await conn.execute(text(f'CREATE DATABASE "{db_name}"'))
|
||||
logger.info("Auto-created PostgreSQL database: %s", db_name)
|
||||
finally:
|
||||
await maint_engine.dispose()
|
||||
|
||||
|
||||
async def init_engine(
|
||||
backend: str,
|
||||
*,
|
||||
url: str = "",
|
||||
echo: bool = False,
|
||||
pool_size: int = 5,
|
||||
sqlite_dir: str = "",
|
||||
) -> None:
|
||||
"""Create the async engine and session factory, then auto-create tables.
|
||||
|
||||
Args:
|
||||
backend: "memory", "sqlite", or "postgres".
|
||||
url: SQLAlchemy async URL (for sqlite/postgres).
|
||||
echo: Echo SQL to log.
|
||||
pool_size: Postgres connection pool size.
|
||||
sqlite_dir: Directory to create for SQLite (ensured to exist).
|
||||
"""
|
||||
global _engine, _session_factory
|
||||
|
||||
if backend == "memory":
|
||||
logger.info("Persistence backend=memory -- ORM engine not initialized")
|
||||
return
|
||||
|
||||
if backend == "postgres":
|
||||
try:
|
||||
import asyncpg # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError("database.backend is set to 'postgres' but asyncpg is not installed.\nInstall it with:\n uv sync --extra postgres\nOr switch to backend: sqlite in config.yaml for single-node deployment.") from None
|
||||
|
||||
if backend == "sqlite":
|
||||
import os
|
||||
|
||||
from sqlalchemy import event
|
||||
|
||||
os.makedirs(sqlite_dir or ".", exist_ok=True)
|
||||
_engine = create_async_engine(url, echo=echo, json_serializer=_json_serializer)
|
||||
|
||||
# Enable WAL on every new connection. SQLite PRAGMA settings are
|
||||
# per-connection, so we wire the listener instead of running PRAGMA
|
||||
# once at startup. WAL gives concurrent reads + writers without
|
||||
# blocking and is the standard recommendation for any production
|
||||
# SQLite deployment (TC-UPG-06 in AUTH_TEST_PLAN.md). The companion
|
||||
# ``synchronous=NORMAL`` is the safe-and-fast pairing — fsync only
|
||||
# at WAL checkpoint boundaries instead of every commit.
|
||||
# Note: we do not set PRAGMA busy_timeout here — Python's sqlite3
|
||||
# driver already defaults to a 5-second busy timeout (see the
|
||||
# ``timeout`` kwarg of ``sqlite3.connect``), and aiosqlite /
|
||||
# SQLAlchemy's aiosqlite dialect inherit that default. Setting
|
||||
# it again would be a no-op.
|
||||
@event.listens_for(_engine.sync_engine, "connect")
|
||||
def _enable_sqlite_wal(dbapi_conn, _record): # noqa: ARG001 — SQLAlchemy contract
|
||||
cursor = dbapi_conn.cursor()
|
||||
try:
|
||||
cursor.execute("PRAGMA journal_mode=WAL;")
|
||||
cursor.execute("PRAGMA synchronous=NORMAL;")
|
||||
cursor.execute("PRAGMA foreign_keys=ON;")
|
||||
finally:
|
||||
cursor.close()
|
||||
elif backend == "postgres":
|
||||
_engine = create_async_engine(
|
||||
url,
|
||||
echo=echo,
|
||||
pool_size=pool_size,
|
||||
pool_pre_ping=True,
|
||||
json_serializer=_json_serializer,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown persistence backend: {backend!r}")
|
||||
|
||||
_session_factory = async_sessionmaker(_engine, expire_on_commit=False)
|
||||
|
||||
# Auto-create tables (dev convenience). Production should use Alembic.
|
||||
from deerflow.persistence.base import Base
|
||||
|
||||
# Import all models so Base.metadata discovers them.
|
||||
# When no models exist yet (scaffolding phase), this is a no-op.
|
||||
try:
|
||||
import deerflow.persistence.models # noqa: F401
|
||||
except ImportError:
|
||||
# Models package not yet available — tables won't be auto-created.
|
||||
# This is expected during initial scaffolding or minimal installs.
|
||||
logger.debug("deerflow.persistence.models not found; skipping auto-create tables")
|
||||
|
||||
try:
|
||||
async with _engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
except Exception as exc:
|
||||
if backend == "postgres" and "does not exist" in str(exc):
|
||||
# Database not yet created — attempt to auto-create it, then retry.
|
||||
await _auto_create_postgres_db(url)
|
||||
# Rebuild engine against the now-existing database
|
||||
await _engine.dispose()
|
||||
_engine = create_async_engine(url, echo=echo, pool_size=pool_size, pool_pre_ping=True, json_serializer=_json_serializer)
|
||||
_session_factory = async_sessionmaker(_engine, expire_on_commit=False)
|
||||
async with _engine.begin() as conn:
|
||||
await conn.run_sync(Base.metadata.create_all)
|
||||
else:
|
||||
raise
|
||||
|
||||
logger.info("Persistence engine initialized: backend=%s", backend)
|
||||
|
||||
|
||||
async def init_engine_from_config(config) -> None:
|
||||
"""Convenience: init engine from a DatabaseConfig object."""
|
||||
if config.backend == "memory":
|
||||
await init_engine("memory")
|
||||
return
|
||||
await init_engine(
|
||||
backend=config.backend,
|
||||
url=config.app_sqlalchemy_url,
|
||||
echo=config.echo_sql,
|
||||
pool_size=config.pool_size,
|
||||
sqlite_dir=config.sqlite_dir if config.backend == "sqlite" else "",
|
||||
)
|
||||
|
||||
|
||||
def get_session_factory() -> async_sessionmaker[AsyncSession] | None:
|
||||
"""Return the async session factory, or None if backend=memory."""
|
||||
return _session_factory
|
||||
|
||||
|
||||
def get_engine() -> AsyncEngine | None:
|
||||
"""Return the async engine, or None if not initialized."""
|
||||
return _engine
|
||||
|
||||
|
||||
async def close_engine() -> None:
|
||||
"""Dispose the engine, release all connections."""
|
||||
global _engine, _session_factory
|
||||
if _engine is not None:
|
||||
await _engine.dispose()
|
||||
logger.info("Persistence engine closed")
|
||||
_engine = None
|
||||
_session_factory = None
|
||||
@@ -0,0 +1,6 @@
|
||||
"""Feedback persistence — ORM and SQL repository."""
|
||||
|
||||
from deerflow.persistence.feedback.model import FeedbackRow
|
||||
from deerflow.persistence.feedback.sql import FeedbackRepository
|
||||
|
||||
__all__ = ["FeedbackRepository", "FeedbackRow"]
|
||||
@@ -0,0 +1,34 @@
|
||||
"""ORM model for user feedback on runs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from sqlalchemy import DateTime, String, Text, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from deerflow.persistence.base import Base
|
||||
|
||||
|
||||
class FeedbackRow(Base):
|
||||
__tablename__ = "feedback"
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint("thread_id", "run_id", "user_id", name="uq_feedback_thread_run_user"),
|
||||
)
|
||||
|
||||
feedback_id: Mapped[str] = mapped_column(String(64), primary_key=True)
|
||||
run_id: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
|
||||
thread_id: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
|
||||
user_id: Mapped[str | None] = mapped_column(String(64), index=True)
|
||||
message_id: Mapped[str | None] = mapped_column(String(64))
|
||||
# message_id is an optional RunEventStore event identifier —
|
||||
# allows feedback to target a specific message or the entire run
|
||||
|
||||
rating: Mapped[int] = mapped_column(nullable=False)
|
||||
# +1 (thumbs-up) or -1 (thumbs-down)
|
||||
|
||||
comment: Mapped[str | None] = mapped_column(Text)
|
||||
# Optional text feedback from the user
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC))
|
||||
@@ -0,0 +1,217 @@
|
||||
"""SQLAlchemy-backed feedback storage.
|
||||
|
||||
Each method acquires its own short-lived session.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from sqlalchemy import case, func, select
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from deerflow.persistence.feedback.model import FeedbackRow
|
||||
from deerflow.runtime.user_context import AUTO, _AutoSentinel, resolve_user_id
|
||||
|
||||
|
||||
class FeedbackRepository:
|
||||
def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
|
||||
self._sf = session_factory
|
||||
|
||||
@staticmethod
|
||||
def _row_to_dict(row: FeedbackRow) -> dict:
|
||||
d = row.to_dict()
|
||||
val = d.get("created_at")
|
||||
if isinstance(val, datetime):
|
||||
d["created_at"] = val.isoformat()
|
||||
return d
|
||||
|
||||
async def create(
|
||||
self,
|
||||
*,
|
||||
run_id: str,
|
||||
thread_id: str,
|
||||
rating: int,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
message_id: str | None = None,
|
||||
comment: str | None = None,
|
||||
) -> dict:
|
||||
"""Create a feedback record. rating must be +1 or -1."""
|
||||
if rating not in (1, -1):
|
||||
raise ValueError(f"rating must be +1 or -1, got {rating}")
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.create")
|
||||
row = FeedbackRow(
|
||||
feedback_id=str(uuid.uuid4()),
|
||||
run_id=run_id,
|
||||
thread_id=thread_id,
|
||||
user_id=resolved_user_id,
|
||||
message_id=message_id,
|
||||
rating=rating,
|
||||
comment=comment,
|
||||
created_at=datetime.now(UTC),
|
||||
)
|
||||
async with self._sf() as session:
|
||||
session.add(row)
|
||||
await session.commit()
|
||||
await session.refresh(row)
|
||||
return self._row_to_dict(row)
|
||||
|
||||
async def get(
|
||||
self,
|
||||
feedback_id: str,
|
||||
*,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> dict | None:
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.get")
|
||||
async with self._sf() as session:
|
||||
row = await session.get(FeedbackRow, feedback_id)
|
||||
if row is None:
|
||||
return None
|
||||
if resolved_user_id is not None and row.user_id != resolved_user_id:
|
||||
return None
|
||||
return self._row_to_dict(row)
|
||||
|
||||
async def list_by_run(
|
||||
self,
|
||||
thread_id: str,
|
||||
run_id: str,
|
||||
*,
|
||||
limit: int = 100,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> list[dict]:
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.list_by_run")
|
||||
stmt = select(FeedbackRow).where(FeedbackRow.thread_id == thread_id, FeedbackRow.run_id == run_id)
|
||||
if resolved_user_id is not None:
|
||||
stmt = stmt.where(FeedbackRow.user_id == resolved_user_id)
|
||||
stmt = stmt.order_by(FeedbackRow.created_at.asc()).limit(limit)
|
||||
async with self._sf() as session:
|
||||
result = await session.execute(stmt)
|
||||
return [self._row_to_dict(r) for r in result.scalars()]
|
||||
|
||||
async def list_by_thread(
|
||||
self,
|
||||
thread_id: str,
|
||||
*,
|
||||
limit: int = 100,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> list[dict]:
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.list_by_thread")
|
||||
stmt = select(FeedbackRow).where(FeedbackRow.thread_id == thread_id)
|
||||
if resolved_user_id is not None:
|
||||
stmt = stmt.where(FeedbackRow.user_id == resolved_user_id)
|
||||
stmt = stmt.order_by(FeedbackRow.created_at.asc()).limit(limit)
|
||||
async with self._sf() as session:
|
||||
result = await session.execute(stmt)
|
||||
return [self._row_to_dict(r) for r in result.scalars()]
|
||||
|
||||
async def delete(
|
||||
self,
|
||||
feedback_id: str,
|
||||
*,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> bool:
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.delete")
|
||||
async with self._sf() as session:
|
||||
row = await session.get(FeedbackRow, feedback_id)
|
||||
if row is None:
|
||||
return False
|
||||
if resolved_user_id is not None and row.user_id != resolved_user_id:
|
||||
return False
|
||||
await session.delete(row)
|
||||
await session.commit()
|
||||
return True
|
||||
|
||||
async def upsert(
|
||||
self,
|
||||
*,
|
||||
run_id: str,
|
||||
thread_id: str,
|
||||
rating: int,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
comment: str | None = None,
|
||||
) -> dict:
|
||||
"""Create or update feedback for (thread_id, run_id, user_id). rating must be +1 or -1."""
|
||||
if rating not in (1, -1):
|
||||
raise ValueError(f"rating must be +1 or -1, got {rating}")
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.upsert")
|
||||
async with self._sf() as session:
|
||||
stmt = select(FeedbackRow).where(
|
||||
FeedbackRow.thread_id == thread_id,
|
||||
FeedbackRow.run_id == run_id,
|
||||
FeedbackRow.user_id == resolved_user_id,
|
||||
)
|
||||
result = await session.execute(stmt)
|
||||
row = result.scalar_one_or_none()
|
||||
if row is not None:
|
||||
row.rating = rating
|
||||
row.comment = comment
|
||||
row.created_at = datetime.now(UTC)
|
||||
else:
|
||||
row = FeedbackRow(
|
||||
feedback_id=str(uuid.uuid4()),
|
||||
run_id=run_id,
|
||||
thread_id=thread_id,
|
||||
user_id=resolved_user_id,
|
||||
rating=rating,
|
||||
comment=comment,
|
||||
created_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(row)
|
||||
await session.commit()
|
||||
await session.refresh(row)
|
||||
return self._row_to_dict(row)
|
||||
|
||||
async def delete_by_run(
|
||||
self,
|
||||
*,
|
||||
thread_id: str,
|
||||
run_id: str,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> bool:
|
||||
"""Delete the current user's feedback for a run. Returns True if a record was deleted."""
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.delete_by_run")
|
||||
async with self._sf() as session:
|
||||
stmt = select(FeedbackRow).where(
|
||||
FeedbackRow.thread_id == thread_id,
|
||||
FeedbackRow.run_id == run_id,
|
||||
FeedbackRow.user_id == resolved_user_id,
|
||||
)
|
||||
result = await session.execute(stmt)
|
||||
row = result.scalar_one_or_none()
|
||||
if row is None:
|
||||
return False
|
||||
await session.delete(row)
|
||||
await session.commit()
|
||||
return True
|
||||
|
||||
async def list_by_thread_grouped(
|
||||
self,
|
||||
thread_id: str,
|
||||
*,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> dict[str, dict]:
|
||||
"""Return feedback grouped by run_id for a thread: {run_id: feedback_dict}."""
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.list_by_thread_grouped")
|
||||
stmt = select(FeedbackRow).where(FeedbackRow.thread_id == thread_id)
|
||||
if resolved_user_id is not None:
|
||||
stmt = stmt.where(FeedbackRow.user_id == resolved_user_id)
|
||||
async with self._sf() as session:
|
||||
result = await session.execute(stmt)
|
||||
return {row.run_id: self._row_to_dict(row) for row in result.scalars()}
|
||||
|
||||
async def aggregate_by_run(self, thread_id: str, run_id: str) -> dict:
|
||||
"""Aggregate feedback stats for a run using database-side counting."""
|
||||
stmt = select(
|
||||
func.count().label("total"),
|
||||
func.coalesce(func.sum(case((FeedbackRow.rating == 1, 1), else_=0)), 0).label("positive"),
|
||||
func.coalesce(func.sum(case((FeedbackRow.rating == -1, 1), else_=0)), 0).label("negative"),
|
||||
).where(FeedbackRow.thread_id == thread_id, FeedbackRow.run_id == run_id)
|
||||
async with self._sf() as session:
|
||||
row = (await session.execute(stmt)).one()
|
||||
return {
|
||||
"run_id": run_id,
|
||||
"total": row.total,
|
||||
"positive": row.positive,
|
||||
"negative": row.negative,
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
[alembic]
|
||||
script_location = %(here)s
|
||||
# Default URL for offline mode / autogenerate.
|
||||
# Runtime uses engine from DeerFlow config.
|
||||
sqlalchemy.url = sqlite+aiosqlite:///./data/deerflow.db
|
||||
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARN
|
||||
handlers = console
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARN
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
@@ -0,0 +1,65 @@
|
||||
"""Alembic environment for DeerFlow application tables.
|
||||
|
||||
ONLY manages DeerFlow's tables (runs, threads_meta, cron_jobs, users).
|
||||
LangGraph's checkpointer tables are managed by LangGraph itself -- they
|
||||
have their own schema lifecycle and must not be touched by Alembic.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from logging.config import fileConfig
|
||||
|
||||
from alembic import context
|
||||
from sqlalchemy.ext.asyncio import create_async_engine
|
||||
|
||||
from deerflow.persistence.base import Base
|
||||
|
||||
# Import all models so metadata is populated.
|
||||
try:
|
||||
import deerflow.persistence.models # noqa: F401 — register ORM models with Base.metadata
|
||||
except ImportError:
|
||||
# Models not available — migration will work with existing metadata only.
|
||||
logging.getLogger(__name__).warning("Could not import deerflow.persistence.models; Alembic may not detect all tables")
|
||||
|
||||
config = context.config
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
target_metadata = Base.metadata
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
render_as_batch=True,
|
||||
)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def do_run_migrations(connection):
|
||||
context.configure(
|
||||
connection=connection,
|
||||
target_metadata=target_metadata,
|
||||
render_as_batch=True, # Required for SQLite ALTER TABLE support
|
||||
)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
async def run_migrations_online() -> None:
|
||||
connectable = create_async_engine(config.get_main_option("sqlalchemy.url"))
|
||||
async with connectable.connect() as connection:
|
||||
await connection.run_sync(do_run_migrations)
|
||||
await connectable.dispose()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
asyncio.run(run_migrations_online())
|
||||
@@ -0,0 +1,23 @@
|
||||
"""ORM model registration entry point.
|
||||
|
||||
Importing this module ensures all ORM models are registered with
|
||||
``Base.metadata`` so Alembic autogenerate detects every table.
|
||||
|
||||
The actual ORM classes have moved to entity-specific subpackages:
|
||||
- ``deerflow.persistence.thread_meta``
|
||||
- ``deerflow.persistence.run``
|
||||
- ``deerflow.persistence.feedback``
|
||||
- ``deerflow.persistence.user``
|
||||
|
||||
``RunEventRow`` remains in ``deerflow.persistence.models.run_event`` because
|
||||
its storage implementation lives in ``deerflow.runtime.events.store.db`` and
|
||||
there is no matching entity directory.
|
||||
"""
|
||||
|
||||
from deerflow.persistence.feedback.model import FeedbackRow
|
||||
from deerflow.persistence.models.run_event import RunEventRow
|
||||
from deerflow.persistence.run.model import RunRow
|
||||
from deerflow.persistence.thread_meta.model import ThreadMetaRow
|
||||
from deerflow.persistence.user.model import UserRow
|
||||
|
||||
__all__ = ["FeedbackRow", "RunEventRow", "RunRow", "ThreadMetaRow", "UserRow"]
|
||||
@@ -0,0 +1,35 @@
|
||||
"""ORM model for run events."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from sqlalchemy import JSON, DateTime, Index, String, Text, UniqueConstraint
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from deerflow.persistence.base import Base
|
||||
|
||||
|
||||
class RunEventRow(Base):
|
||||
__tablename__ = "run_events"
|
||||
|
||||
id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
|
||||
thread_id: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
run_id: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
# Owner of the conversation this event belongs to. Nullable for data
|
||||
# created before auth was introduced; populated by auth middleware on
|
||||
# new writes and by the boot-time orphan migration on existing rows.
|
||||
user_id: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
|
||||
event_type: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
category: Mapped[str] = mapped_column(String(16), nullable=False)
|
||||
# "message" | "trace" | "lifecycle"
|
||||
content: Mapped[str] = mapped_column(Text, default="")
|
||||
event_metadata: Mapped[dict] = mapped_column(JSON, default=dict)
|
||||
seq: Mapped[int] = mapped_column(nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC))
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint("thread_id", "seq", name="uq_events_thread_seq"),
|
||||
Index("ix_events_thread_cat_seq", "thread_id", "category", "seq"),
|
||||
Index("ix_events_run", "thread_id", "run_id", "seq"),
|
||||
)
|
||||
@@ -0,0 +1,6 @@
|
||||
"""Run metadata persistence — ORM and SQL repository."""
|
||||
|
||||
from deerflow.persistence.run.model import RunRow
|
||||
from deerflow.persistence.run.sql import RunRepository
|
||||
|
||||
__all__ = ["RunRepository", "RunRow"]
|
||||
@@ -0,0 +1,49 @@
|
||||
"""ORM model for run metadata."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from sqlalchemy import JSON, DateTime, Index, String, Text
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from deerflow.persistence.base import Base
|
||||
|
||||
|
||||
class RunRow(Base):
|
||||
__tablename__ = "runs"
|
||||
|
||||
run_id: Mapped[str] = mapped_column(String(64), primary_key=True)
|
||||
thread_id: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
|
||||
assistant_id: Mapped[str | None] = mapped_column(String(128))
|
||||
user_id: Mapped[str | None] = mapped_column(String(64), index=True)
|
||||
status: Mapped[str] = mapped_column(String(20), default="pending")
|
||||
# "pending" | "running" | "success" | "error" | "timeout" | "interrupted"
|
||||
|
||||
model_name: Mapped[str | None] = mapped_column(String(128))
|
||||
multitask_strategy: Mapped[str] = mapped_column(String(20), default="reject")
|
||||
metadata_json: Mapped[dict] = mapped_column(JSON, default=dict)
|
||||
kwargs_json: Mapped[dict] = mapped_column(JSON, default=dict)
|
||||
error: Mapped[str | None] = mapped_column(Text)
|
||||
|
||||
# Convenience fields (for listing pages without querying RunEventStore)
|
||||
message_count: Mapped[int] = mapped_column(default=0)
|
||||
first_human_message: Mapped[str | None] = mapped_column(Text)
|
||||
last_ai_message: Mapped[str | None] = mapped_column(Text)
|
||||
|
||||
# Token usage (accumulated in-memory by RunJournal, written on run completion)
|
||||
total_input_tokens: Mapped[int] = mapped_column(default=0)
|
||||
total_output_tokens: Mapped[int] = mapped_column(default=0)
|
||||
total_tokens: Mapped[int] = mapped_column(default=0)
|
||||
llm_call_count: Mapped[int] = mapped_column(default=0)
|
||||
lead_agent_tokens: Mapped[int] = mapped_column(default=0)
|
||||
subagent_tokens: Mapped[int] = mapped_column(default=0)
|
||||
middleware_tokens: Mapped[int] = mapped_column(default=0)
|
||||
|
||||
# Follow-up association
|
||||
follow_up_to_run_id: Mapped[str | None] = mapped_column(String(64))
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC))
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC))
|
||||
|
||||
__table_args__ = (Index("ix_runs_thread_status", "thread_id", "status"),)
|
||||
@@ -0,0 +1,255 @@
|
||||
"""SQLAlchemy-backed RunStore implementation.
|
||||
|
||||
Each method acquires and releases its own short-lived session.
|
||||
Run status updates happen from background workers that may live
|
||||
minutes -- we don't hold connections across long execution.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import func, select, update
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from deerflow.persistence.run.model import RunRow
|
||||
from deerflow.runtime.runs.store.base import RunStore
|
||||
from deerflow.runtime.user_context import AUTO, _AutoSentinel, resolve_user_id
|
||||
|
||||
|
||||
class RunRepository(RunStore):
|
||||
def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
|
||||
self._sf = session_factory
|
||||
|
||||
@staticmethod
|
||||
def _safe_json(obj: Any) -> Any:
|
||||
"""Ensure obj is JSON-serializable. Falls back to model_dump() or str()."""
|
||||
if obj is None:
|
||||
return None
|
||||
if isinstance(obj, (str, int, float, bool)):
|
||||
return obj
|
||||
if isinstance(obj, dict):
|
||||
return {k: RunRepository._safe_json(v) for k, v in obj.items()}
|
||||
if isinstance(obj, (list, tuple)):
|
||||
return [RunRepository._safe_json(v) for v in obj]
|
||||
if hasattr(obj, "model_dump"):
|
||||
try:
|
||||
return obj.model_dump()
|
||||
except Exception:
|
||||
pass
|
||||
if hasattr(obj, "dict"):
|
||||
try:
|
||||
return obj.dict()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
json.dumps(obj)
|
||||
return obj
|
||||
except (TypeError, ValueError):
|
||||
return str(obj)
|
||||
|
||||
@staticmethod
|
||||
def _row_to_dict(row: RunRow) -> dict[str, Any]:
|
||||
d = row.to_dict()
|
||||
# Remap JSON columns to match RunStore interface
|
||||
d["metadata"] = d.pop("metadata_json", {})
|
||||
d["kwargs"] = d.pop("kwargs_json", {})
|
||||
# Convert datetime to ISO string for consistency with MemoryRunStore
|
||||
for key in ("created_at", "updated_at"):
|
||||
val = d.get(key)
|
||||
if isinstance(val, datetime):
|
||||
d[key] = val.isoformat()
|
||||
return d
|
||||
|
||||
async def put(
|
||||
self,
|
||||
run_id,
|
||||
*,
|
||||
thread_id,
|
||||
assistant_id=None,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
status="pending",
|
||||
multitask_strategy="reject",
|
||||
metadata=None,
|
||||
kwargs=None,
|
||||
error=None,
|
||||
created_at=None,
|
||||
follow_up_to_run_id=None,
|
||||
):
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="RunRepository.put")
|
||||
now = datetime.now(UTC)
|
||||
row = RunRow(
|
||||
run_id=run_id,
|
||||
thread_id=thread_id,
|
||||
assistant_id=assistant_id,
|
||||
user_id=resolved_user_id,
|
||||
status=status,
|
||||
multitask_strategy=multitask_strategy,
|
||||
metadata_json=self._safe_json(metadata) or {},
|
||||
kwargs_json=self._safe_json(kwargs) or {},
|
||||
error=error,
|
||||
follow_up_to_run_id=follow_up_to_run_id,
|
||||
created_at=datetime.fromisoformat(created_at) if created_at else now,
|
||||
updated_at=now,
|
||||
)
|
||||
async with self._sf() as session:
|
||||
session.add(row)
|
||||
await session.commit()
|
||||
|
||||
async def get(
|
||||
self,
|
||||
run_id,
|
||||
*,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
):
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="RunRepository.get")
|
||||
async with self._sf() as session:
|
||||
row = await session.get(RunRow, run_id)
|
||||
if row is None:
|
||||
return None
|
||||
if resolved_user_id is not None and row.user_id != resolved_user_id:
|
||||
return None
|
||||
return self._row_to_dict(row)
|
||||
|
||||
async def list_by_thread(
|
||||
self,
|
||||
thread_id,
|
||||
*,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
limit=100,
|
||||
):
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="RunRepository.list_by_thread")
|
||||
stmt = select(RunRow).where(RunRow.thread_id == thread_id)
|
||||
if resolved_user_id is not None:
|
||||
stmt = stmt.where(RunRow.user_id == resolved_user_id)
|
||||
stmt = stmt.order_by(RunRow.created_at.desc()).limit(limit)
|
||||
async with self._sf() as session:
|
||||
result = await session.execute(stmt)
|
||||
return [self._row_to_dict(r) for r in result.scalars()]
|
||||
|
||||
async def update_status(self, run_id, status, *, error=None):
|
||||
values: dict[str, Any] = {"status": status, "updated_at": datetime.now(UTC)}
|
||||
if error is not None:
|
||||
values["error"] = error
|
||||
async with self._sf() as session:
|
||||
await session.execute(update(RunRow).where(RunRow.run_id == run_id).values(**values))
|
||||
await session.commit()
|
||||
|
||||
async def delete(
|
||||
self,
|
||||
run_id,
|
||||
*,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
):
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="RunRepository.delete")
|
||||
async with self._sf() as session:
|
||||
row = await session.get(RunRow, run_id)
|
||||
if row is None:
|
||||
return
|
||||
if resolved_user_id is not None and row.user_id != resolved_user_id:
|
||||
return
|
||||
await session.delete(row)
|
||||
await session.commit()
|
||||
|
||||
async def list_pending(self, *, before=None):
|
||||
if before is None:
|
||||
before_dt = datetime.now(UTC)
|
||||
elif isinstance(before, datetime):
|
||||
before_dt = before
|
||||
else:
|
||||
before_dt = datetime.fromisoformat(before)
|
||||
stmt = select(RunRow).where(RunRow.status == "pending", RunRow.created_at <= before_dt).order_by(RunRow.created_at.asc())
|
||||
async with self._sf() as session:
|
||||
result = await session.execute(stmt)
|
||||
return [self._row_to_dict(r) for r in result.scalars()]
|
||||
|
||||
async def update_run_completion(
|
||||
self,
|
||||
run_id: str,
|
||||
*,
|
||||
status: str,
|
||||
total_input_tokens: int = 0,
|
||||
total_output_tokens: int = 0,
|
||||
total_tokens: int = 0,
|
||||
llm_call_count: int = 0,
|
||||
lead_agent_tokens: int = 0,
|
||||
subagent_tokens: int = 0,
|
||||
middleware_tokens: int = 0,
|
||||
message_count: int = 0,
|
||||
last_ai_message: str | None = None,
|
||||
first_human_message: str | None = None,
|
||||
error: str | None = None,
|
||||
) -> None:
|
||||
"""Update status + token usage + convenience fields on run completion."""
|
||||
values: dict[str, Any] = {
|
||||
"status": status,
|
||||
"total_input_tokens": total_input_tokens,
|
||||
"total_output_tokens": total_output_tokens,
|
||||
"total_tokens": total_tokens,
|
||||
"llm_call_count": llm_call_count,
|
||||
"lead_agent_tokens": lead_agent_tokens,
|
||||
"subagent_tokens": subagent_tokens,
|
||||
"middleware_tokens": middleware_tokens,
|
||||
"message_count": message_count,
|
||||
"updated_at": datetime.now(UTC),
|
||||
}
|
||||
if last_ai_message is not None:
|
||||
values["last_ai_message"] = last_ai_message[:2000]
|
||||
if first_human_message is not None:
|
||||
values["first_human_message"] = first_human_message[:2000]
|
||||
if error is not None:
|
||||
values["error"] = error
|
||||
async with self._sf() as session:
|
||||
await session.execute(update(RunRow).where(RunRow.run_id == run_id).values(**values))
|
||||
await session.commit()
|
||||
|
||||
async def aggregate_tokens_by_thread(self, thread_id: str) -> dict[str, Any]:
|
||||
"""Aggregate token usage via a single SQL GROUP BY query."""
|
||||
_completed = RunRow.status.in_(("success", "error"))
|
||||
_thread = RunRow.thread_id == thread_id
|
||||
|
||||
stmt = (
|
||||
select(
|
||||
func.coalesce(RunRow.model_name, "unknown").label("model"),
|
||||
func.count().label("runs"),
|
||||
func.coalesce(func.sum(RunRow.total_tokens), 0).label("total_tokens"),
|
||||
func.coalesce(func.sum(RunRow.total_input_tokens), 0).label("total_input_tokens"),
|
||||
func.coalesce(func.sum(RunRow.total_output_tokens), 0).label("total_output_tokens"),
|
||||
func.coalesce(func.sum(RunRow.lead_agent_tokens), 0).label("lead_agent"),
|
||||
func.coalesce(func.sum(RunRow.subagent_tokens), 0).label("subagent"),
|
||||
func.coalesce(func.sum(RunRow.middleware_tokens), 0).label("middleware"),
|
||||
)
|
||||
.where(_thread, _completed)
|
||||
.group_by(func.coalesce(RunRow.model_name, "unknown"))
|
||||
)
|
||||
|
||||
async with self._sf() as session:
|
||||
rows = (await session.execute(stmt)).all()
|
||||
|
||||
total_tokens = total_input = total_output = total_runs = 0
|
||||
lead_agent = subagent = middleware = 0
|
||||
by_model: dict[str, dict] = {}
|
||||
for r in rows:
|
||||
by_model[r.model] = {"tokens": r.total_tokens, "runs": r.runs}
|
||||
total_tokens += r.total_tokens
|
||||
total_input += r.total_input_tokens
|
||||
total_output += r.total_output_tokens
|
||||
total_runs += r.runs
|
||||
lead_agent += r.lead_agent
|
||||
subagent += r.subagent
|
||||
middleware += r.middleware
|
||||
|
||||
return {
|
||||
"total_tokens": total_tokens,
|
||||
"total_input_tokens": total_input,
|
||||
"total_output_tokens": total_output,
|
||||
"total_runs": total_runs,
|
||||
"by_model": by_model,
|
||||
"by_caller": {
|
||||
"lead_agent": lead_agent,
|
||||
"subagent": subagent,
|
||||
"middleware": middleware,
|
||||
},
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
"""Thread metadata persistence — ORM, abstract store, and concrete implementations."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from deerflow.persistence.thread_meta.base import ThreadMetaStore
|
||||
from deerflow.persistence.thread_meta.memory import MemoryThreadMetaStore
|
||||
from deerflow.persistence.thread_meta.model import ThreadMetaRow
|
||||
from deerflow.persistence.thread_meta.sql import ThreadMetaRepository
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langgraph.store.base import BaseStore
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
__all__ = [
|
||||
"MemoryThreadMetaStore",
|
||||
"ThreadMetaRepository",
|
||||
"ThreadMetaRow",
|
||||
"ThreadMetaStore",
|
||||
"make_thread_store",
|
||||
]
|
||||
|
||||
|
||||
def make_thread_store(
|
||||
session_factory: async_sessionmaker[AsyncSession] | None,
|
||||
store: BaseStore | None = None,
|
||||
) -> ThreadMetaStore:
|
||||
"""Create the appropriate ThreadMetaStore based on available backends.
|
||||
|
||||
Returns a SQL-backed repository when a session factory is available,
|
||||
otherwise falls back to the in-memory LangGraph Store implementation.
|
||||
"""
|
||||
if session_factory is not None:
|
||||
return ThreadMetaRepository(session_factory)
|
||||
if store is None:
|
||||
raise ValueError("make_thread_store requires either a session_factory (SQL) or a store (memory)")
|
||||
return MemoryThreadMetaStore(store)
|
||||
@@ -0,0 +1,76 @@
|
||||
"""Abstract interface for thread metadata storage.
|
||||
|
||||
Implementations:
|
||||
- ThreadMetaRepository: SQL-backed (sqlite / postgres via SQLAlchemy)
|
||||
- MemoryThreadMetaStore: wraps LangGraph BaseStore (memory mode)
|
||||
|
||||
All mutating and querying methods accept a ``user_id`` parameter with
|
||||
three-state semantics (see :mod:`deerflow.runtime.user_context`):
|
||||
|
||||
- ``AUTO`` (default): resolve from the request-scoped contextvar.
|
||||
- Explicit ``str``: use the provided value verbatim.
|
||||
- Explicit ``None``: bypass owner filtering (migration/CLI only).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
|
||||
from deerflow.runtime.user_context import AUTO, _AutoSentinel
|
||||
|
||||
|
||||
class ThreadMetaStore(abc.ABC):
|
||||
@abc.abstractmethod
|
||||
async def create(
|
||||
self,
|
||||
thread_id: str,
|
||||
*,
|
||||
assistant_id: str | None = None,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
display_name: str | None = None,
|
||||
metadata: dict | None = None,
|
||||
) -> dict:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
async def get(self, thread_id: str, *, user_id: str | None | _AutoSentinel = AUTO) -> dict | None:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
async def search(
|
||||
self,
|
||||
*,
|
||||
metadata: dict | None = None,
|
||||
status: str | None = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> list[dict]:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
async def update_display_name(self, thread_id: str, display_name: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
async def update_status(self, thread_id: str, status: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
async def update_metadata(self, thread_id: str, metadata: dict, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
|
||||
"""Merge ``metadata`` into the thread's metadata field.
|
||||
|
||||
Existing keys are overwritten by the new values; keys absent from
|
||||
``metadata`` are preserved. No-op if the thread does not exist
|
||||
or the owner check fails.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
async def check_access(self, thread_id: str, user_id: str, *, require_existing: bool = False) -> bool:
|
||||
"""Check if ``user_id`` has access to ``thread_id``."""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
async def delete(self, thread_id: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
|
||||
pass
|
||||
@@ -0,0 +1,149 @@
|
||||
"""In-memory ThreadMetaStore backed by LangGraph BaseStore.
|
||||
|
||||
Used when database.backend=memory. Delegates to the LangGraph Store's
|
||||
``("threads",)`` namespace — the same namespace used by the Gateway
|
||||
router for thread records.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from langgraph.store.base import BaseStore
|
||||
|
||||
from deerflow.persistence.thread_meta.base import ThreadMetaStore
|
||||
from deerflow.runtime.user_context import AUTO, _AutoSentinel, resolve_user_id
|
||||
|
||||
THREADS_NS: tuple[str, ...] = ("threads",)
|
||||
|
||||
|
||||
class MemoryThreadMetaStore(ThreadMetaStore):
|
||||
def __init__(self, store: BaseStore) -> None:
|
||||
self._store = store
|
||||
|
||||
async def _get_owned_record(
|
||||
self,
|
||||
thread_id: str,
|
||||
user_id: str | None | _AutoSentinel,
|
||||
method_name: str,
|
||||
) -> dict | None:
|
||||
"""Fetch a record and verify ownership. Returns a mutable copy, or None."""
|
||||
resolved = resolve_user_id(user_id, method_name=method_name)
|
||||
item = await self._store.aget(THREADS_NS, thread_id)
|
||||
if item is None:
|
||||
return None
|
||||
record = dict(item.value)
|
||||
if resolved is not None and record.get("user_id") != resolved:
|
||||
return None
|
||||
return record
|
||||
|
||||
async def create(
|
||||
self,
|
||||
thread_id: str,
|
||||
*,
|
||||
assistant_id: str | None = None,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
display_name: str | None = None,
|
||||
metadata: dict | None = None,
|
||||
) -> dict:
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="MemoryThreadMetaStore.create")
|
||||
now = time.time()
|
||||
record: dict[str, Any] = {
|
||||
"thread_id": thread_id,
|
||||
"assistant_id": assistant_id,
|
||||
"user_id": resolved_user_id,
|
||||
"display_name": display_name,
|
||||
"status": "idle",
|
||||
"metadata": metadata or {},
|
||||
"values": {},
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
await self._store.aput(THREADS_NS, thread_id, record)
|
||||
return record
|
||||
|
||||
async def get(self, thread_id: str, *, user_id: str | None | _AutoSentinel = AUTO) -> dict | None:
|
||||
return await self._get_owned_record(thread_id, user_id, "MemoryThreadMetaStore.get")
|
||||
|
||||
async def search(
|
||||
self,
|
||||
*,
|
||||
metadata: dict | None = None,
|
||||
status: str | None = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> list[dict]:
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="MemoryThreadMetaStore.search")
|
||||
filter_dict: dict[str, Any] = {}
|
||||
if metadata:
|
||||
filter_dict.update(metadata)
|
||||
if status:
|
||||
filter_dict["status"] = status
|
||||
if resolved_user_id is not None:
|
||||
filter_dict["user_id"] = resolved_user_id
|
||||
|
||||
items = await self._store.asearch(
|
||||
THREADS_NS,
|
||||
filter=filter_dict or None,
|
||||
limit=limit,
|
||||
offset=offset,
|
||||
)
|
||||
return [self._item_to_dict(item) for item in items]
|
||||
|
||||
async def check_access(self, thread_id: str, user_id: str, *, require_existing: bool = False) -> bool:
|
||||
item = await self._store.aget(THREADS_NS, thread_id)
|
||||
if item is None:
|
||||
return not require_existing
|
||||
record_user_id = item.value.get("user_id")
|
||||
if record_user_id is None:
|
||||
return True
|
||||
return record_user_id == user_id
|
||||
|
||||
async def update_display_name(self, thread_id: str, display_name: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
|
||||
record = await self._get_owned_record(thread_id, user_id, "MemoryThreadMetaStore.update_display_name")
|
||||
if record is None:
|
||||
return
|
||||
record["display_name"] = display_name
|
||||
record["updated_at"] = time.time()
|
||||
await self._store.aput(THREADS_NS, thread_id, record)
|
||||
|
||||
async def update_status(self, thread_id: str, status: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
|
||||
record = await self._get_owned_record(thread_id, user_id, "MemoryThreadMetaStore.update_status")
|
||||
if record is None:
|
||||
return
|
||||
record["status"] = status
|
||||
record["updated_at"] = time.time()
|
||||
await self._store.aput(THREADS_NS, thread_id, record)
|
||||
|
||||
async def update_metadata(self, thread_id: str, metadata: dict, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
|
||||
record = await self._get_owned_record(thread_id, user_id, "MemoryThreadMetaStore.update_metadata")
|
||||
if record is None:
|
||||
return
|
||||
merged = dict(record.get("metadata") or {})
|
||||
merged.update(metadata)
|
||||
record["metadata"] = merged
|
||||
record["updated_at"] = time.time()
|
||||
await self._store.aput(THREADS_NS, thread_id, record)
|
||||
|
||||
async def delete(self, thread_id: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
|
||||
record = await self._get_owned_record(thread_id, user_id, "MemoryThreadMetaStore.delete")
|
||||
if record is None:
|
||||
return
|
||||
await self._store.adelete(THREADS_NS, thread_id)
|
||||
|
||||
@staticmethod
|
||||
def _item_to_dict(item) -> dict[str, Any]:
|
||||
"""Convert a Store SearchItem to the dict format expected by callers."""
|
||||
val = item.value
|
||||
return {
|
||||
"thread_id": item.key,
|
||||
"assistant_id": val.get("assistant_id"),
|
||||
"user_id": val.get("user_id"),
|
||||
"display_name": val.get("display_name"),
|
||||
"status": val.get("status", "idle"),
|
||||
"metadata": val.get("metadata", {}),
|
||||
"created_at": str(val.get("created_at", "")),
|
||||
"updated_at": str(val.get("updated_at", "")),
|
||||
}
|
||||
@@ -0,0 +1,23 @@
|
||||
"""ORM model for thread metadata."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from sqlalchemy import JSON, DateTime, String
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from deerflow.persistence.base import Base
|
||||
|
||||
|
||||
class ThreadMetaRow(Base):
|
||||
__tablename__ = "threads_meta"
|
||||
|
||||
thread_id: Mapped[str] = mapped_column(String(64), primary_key=True)
|
||||
assistant_id: Mapped[str | None] = mapped_column(String(128), index=True)
|
||||
user_id: Mapped[str | None] = mapped_column(String(64), index=True)
|
||||
display_name: Mapped[str | None] = mapped_column(String(256))
|
||||
status: Mapped[str] = mapped_column(String(20), default="idle")
|
||||
metadata_json: Mapped[dict] = mapped_column(JSON, default=dict)
|
||||
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC))
|
||||
updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC))
|
||||
@@ -0,0 +1,217 @@
|
||||
"""SQLAlchemy-backed thread metadata repository."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy import select, update
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||
|
||||
from deerflow.persistence.thread_meta.base import ThreadMetaStore
|
||||
from deerflow.persistence.thread_meta.model import ThreadMetaRow
|
||||
from deerflow.runtime.user_context import AUTO, _AutoSentinel, resolve_user_id
|
||||
|
||||
|
||||
class ThreadMetaRepository(ThreadMetaStore):
|
||||
def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
|
||||
self._sf = session_factory
|
||||
|
||||
@staticmethod
|
||||
def _row_to_dict(row: ThreadMetaRow) -> dict[str, Any]:
|
||||
d = row.to_dict()
|
||||
d["metadata"] = d.pop("metadata_json", {})
|
||||
for key in ("created_at", "updated_at"):
|
||||
val = d.get(key)
|
||||
if isinstance(val, datetime):
|
||||
d[key] = val.isoformat()
|
||||
return d
|
||||
|
||||
async def create(
|
||||
self,
|
||||
thread_id: str,
|
||||
*,
|
||||
assistant_id: str | None = None,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
display_name: str | None = None,
|
||||
metadata: dict | None = None,
|
||||
) -> dict:
|
||||
# Auto-resolve user_id from contextvar when AUTO; explicit None
|
||||
# creates an orphan row (used by migration scripts).
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.create")
|
||||
now = datetime.now(UTC)
|
||||
row = ThreadMetaRow(
|
||||
thread_id=thread_id,
|
||||
assistant_id=assistant_id,
|
||||
user_id=resolved_user_id,
|
||||
display_name=display_name,
|
||||
metadata_json=metadata or {},
|
||||
created_at=now,
|
||||
updated_at=now,
|
||||
)
|
||||
async with self._sf() as session:
|
||||
session.add(row)
|
||||
await session.commit()
|
||||
await session.refresh(row)
|
||||
return self._row_to_dict(row)
|
||||
|
||||
async def get(
|
||||
self,
|
||||
thread_id: str,
|
||||
*,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> dict | None:
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.get")
|
||||
async with self._sf() as session:
|
||||
row = await session.get(ThreadMetaRow, thread_id)
|
||||
if row is None:
|
||||
return None
|
||||
# Enforce owner filter unless explicitly bypassed (user_id=None).
|
||||
if resolved_user_id is not None and row.user_id != resolved_user_id:
|
||||
return None
|
||||
return self._row_to_dict(row)
|
||||
|
||||
async def check_access(self, thread_id: str, user_id: str, *, require_existing: bool = False) -> bool:
|
||||
"""Check if ``user_id`` has access to ``thread_id``.
|
||||
|
||||
Two modes — one row, two distinct semantics depending on what
|
||||
the caller is about to do:
|
||||
|
||||
- ``require_existing=False`` (default, permissive):
|
||||
Returns True for: row missing (untracked legacy thread),
|
||||
``row.user_id`` is None (shared / pre-auth data),
|
||||
or ``row.user_id == user_id``. Use for **read-style**
|
||||
decorators where treating an untracked thread as accessible
|
||||
preserves backward-compat.
|
||||
|
||||
- ``require_existing=True`` (strict):
|
||||
Returns True **only** when the row exists AND
|
||||
(``row.user_id == user_id`` OR ``row.user_id is None``).
|
||||
Use for **destructive / mutating** decorators (DELETE, PATCH,
|
||||
state-update) so a thread that has *already been deleted*
|
||||
cannot be re-targeted by any caller — closing the
|
||||
delete-idempotence cross-user gap where the row vanishing
|
||||
made every other user appear to "own" it.
|
||||
"""
|
||||
async with self._sf() as session:
|
||||
row = await session.get(ThreadMetaRow, thread_id)
|
||||
if row is None:
|
||||
return not require_existing
|
||||
if row.user_id is None:
|
||||
return True
|
||||
return row.user_id == user_id
|
||||
|
||||
async def search(
|
||||
self,
|
||||
*,
|
||||
metadata: dict | None = None,
|
||||
status: str | None = None,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> list[dict]:
|
||||
"""Search threads with optional metadata and status filters.
|
||||
|
||||
Owner filter is enforced by default: caller must be in a user
|
||||
context. Pass ``user_id=None`` to bypass (migration/CLI).
|
||||
"""
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.search")
|
||||
stmt = select(ThreadMetaRow).order_by(ThreadMetaRow.updated_at.desc())
|
||||
if resolved_user_id is not None:
|
||||
stmt = stmt.where(ThreadMetaRow.user_id == resolved_user_id)
|
||||
if status:
|
||||
stmt = stmt.where(ThreadMetaRow.status == status)
|
||||
|
||||
if metadata:
|
||||
# When metadata filter is active, fetch a larger window and filter
|
||||
# in Python. TODO(Phase 2): use JSON DB operators (Postgres @>,
|
||||
# SQLite json_extract) for server-side filtering.
|
||||
stmt = stmt.limit(limit * 5 + offset)
|
||||
async with self._sf() as session:
|
||||
result = await session.execute(stmt)
|
||||
rows = [self._row_to_dict(r) for r in result.scalars()]
|
||||
rows = [r for r in rows if all(r.get("metadata", {}).get(k) == v for k, v in metadata.items())]
|
||||
return rows[offset : offset + limit]
|
||||
else:
|
||||
stmt = stmt.limit(limit).offset(offset)
|
||||
async with self._sf() as session:
|
||||
result = await session.execute(stmt)
|
||||
return [self._row_to_dict(r) for r in result.scalars()]
|
||||
|
||||
async def _check_ownership(self, session: AsyncSession, thread_id: str, resolved_user_id: str | None) -> bool:
|
||||
"""Return True if the row exists and is owned (or filter bypassed)."""
|
||||
if resolved_user_id is None:
|
||||
return True # explicit bypass
|
||||
row = await session.get(ThreadMetaRow, thread_id)
|
||||
return row is not None and row.user_id == resolved_user_id
|
||||
|
||||
async def update_display_name(
|
||||
self,
|
||||
thread_id: str,
|
||||
display_name: str,
|
||||
*,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> None:
|
||||
"""Update the display_name (title) for a thread."""
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.update_display_name")
|
||||
async with self._sf() as session:
|
||||
if not await self._check_ownership(session, thread_id, resolved_user_id):
|
||||
return
|
||||
await session.execute(update(ThreadMetaRow).where(ThreadMetaRow.thread_id == thread_id).values(display_name=display_name, updated_at=datetime.now(UTC)))
|
||||
await session.commit()
|
||||
|
||||
async def update_status(
|
||||
self,
|
||||
thread_id: str,
|
||||
status: str,
|
||||
*,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> None:
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.update_status")
|
||||
async with self._sf() as session:
|
||||
if not await self._check_ownership(session, thread_id, resolved_user_id):
|
||||
return
|
||||
await session.execute(update(ThreadMetaRow).where(ThreadMetaRow.thread_id == thread_id).values(status=status, updated_at=datetime.now(UTC)))
|
||||
await session.commit()
|
||||
|
||||
async def update_metadata(
|
||||
self,
|
||||
thread_id: str,
|
||||
metadata: dict,
|
||||
*,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> None:
|
||||
"""Merge ``metadata`` into ``metadata_json``.
|
||||
|
||||
Read-modify-write inside a single session/transaction so concurrent
|
||||
callers see consistent state. No-op if the row does not exist or
|
||||
the user_id check fails.
|
||||
"""
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.update_metadata")
|
||||
async with self._sf() as session:
|
||||
row = await session.get(ThreadMetaRow, thread_id)
|
||||
if row is None:
|
||||
return
|
||||
if resolved_user_id is not None and row.user_id != resolved_user_id:
|
||||
return
|
||||
merged = dict(row.metadata_json or {})
|
||||
merged.update(metadata)
|
||||
row.metadata_json = merged
|
||||
row.updated_at = datetime.now(UTC)
|
||||
await session.commit()
|
||||
|
||||
async def delete(
|
||||
self,
|
||||
thread_id: str,
|
||||
*,
|
||||
user_id: str | None | _AutoSentinel = AUTO,
|
||||
) -> None:
|
||||
resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.delete")
|
||||
async with self._sf() as session:
|
||||
row = await session.get(ThreadMetaRow, thread_id)
|
||||
if row is None:
|
||||
return
|
||||
if resolved_user_id is not None and row.user_id != resolved_user_id:
|
||||
return
|
||||
await session.delete(row)
|
||||
await session.commit()
|
||||
@@ -0,0 +1,12 @@
|
||||
"""User storage subpackage.
|
||||
|
||||
Holds the ORM model for the ``users`` table. The concrete repository
|
||||
implementation (``SQLiteUserRepository``) lives in the app layer
|
||||
(``app.gateway.auth.repositories.sqlite``) because it converts
|
||||
between the ORM row and the auth module's pydantic ``User`` class.
|
||||
This keeps the harness package free of any dependency on app code.
|
||||
"""
|
||||
|
||||
from deerflow.persistence.user.model import UserRow
|
||||
|
||||
__all__ = ["UserRow"]
|
||||
@@ -0,0 +1,59 @@
|
||||
"""ORM model for the users table.
|
||||
|
||||
Lives in the harness persistence package so it is picked up by
|
||||
``Base.metadata.create_all()`` alongside ``threads_meta``, ``runs``,
|
||||
``run_events``, and ``feedback``. Using the shared engine means:
|
||||
|
||||
- One SQLite/Postgres database, one connection pool
|
||||
- One schema initialisation codepath
|
||||
- Consistent async sessions across auth and persistence reads
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from sqlalchemy import Boolean, DateTime, Index, String, text
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from deerflow.persistence.base import Base
|
||||
|
||||
|
||||
class UserRow(Base):
|
||||
__tablename__ = "users"
|
||||
|
||||
# UUIDs are stored as 36-char strings for cross-backend portability.
|
||||
id: Mapped[str] = mapped_column(String(36), primary_key=True)
|
||||
|
||||
email: Mapped[str] = mapped_column(String(320), unique=True, nullable=False, index=True)
|
||||
password_hash: Mapped[str | None] = mapped_column(String(128), nullable=True)
|
||||
|
||||
# "admin" | "user" — kept as plain string to avoid ALTER TABLE pain
|
||||
# when new roles are introduced.
|
||||
system_role: Mapped[str] = mapped_column(String(16), nullable=False, default="user")
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True),
|
||||
nullable=False,
|
||||
default=lambda: datetime.now(UTC),
|
||||
)
|
||||
|
||||
# OAuth linkage (optional). A partial unique index enforces one
|
||||
# account per (provider, oauth_id) pair, leaving NULL/NULL rows
|
||||
# unconstrained so plain password accounts can coexist.
|
||||
oauth_provider: Mapped[str | None] = mapped_column(String(32), nullable=True)
|
||||
oauth_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
|
||||
|
||||
# Auth lifecycle flags
|
||||
needs_setup: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
token_version: Mapped[int] = mapped_column(nullable=False, default=0)
|
||||
|
||||
__table_args__ = (
|
||||
Index(
|
||||
"idx_users_oauth_identity",
|
||||
"oauth_provider",
|
||||
"oauth_id",
|
||||
unique=True,
|
||||
sqlite_where=text("oauth_provider IS NOT NULL AND oauth_id IS NOT NULL"),
|
||||
),
|
||||
)
|
||||
Reference in New Issue
Block a user