feat(storage): add storage package base

This commit is contained in:
rayhpeng
2026-05-12 19:08:37 +08:00
parent 20d2d2b373
commit 485f8a2bf2
45 changed files with 3199 additions and 2 deletions
@@ -0,0 +1,51 @@
from store.repositories.contracts import (
Feedback,
FeedbackAggregate,
FeedbackCreate,
FeedbackRepositoryProtocol,
Run,
RunCreate,
RunEvent,
RunEventCreate,
RunEventRepositoryProtocol,
RunRepositoryProtocol,
ThreadMeta,
ThreadMetaCreate,
ThreadMetaRepositoryProtocol,
User,
UserCreate,
UserNotFoundError,
UserRepositoryProtocol,
)
from store.repositories.factory import (
build_feedback_repository,
build_run_event_repository,
build_run_repository,
build_thread_meta_repository,
build_user_repository,
)
__all__ = [
"Feedback",
"FeedbackAggregate",
"FeedbackCreate",
"FeedbackRepositoryProtocol",
"Run",
"RunCreate",
"RunEvent",
"RunEventCreate",
"RunEventRepositoryProtocol",
"RunRepositoryProtocol",
"ThreadMeta",
"ThreadMetaCreate",
"ThreadMetaRepositoryProtocol",
"User",
"UserCreate",
"UserNotFoundError",
"UserRepositoryProtocol",
"build_run_repository",
"build_run_event_repository",
"build_thread_meta_repository",
"build_feedback_repository",
"build_user_repository",
]
@@ -0,0 +1,47 @@
from store.repositories.contracts.feedback import (
Feedback,
FeedbackAggregate,
FeedbackCreate,
FeedbackRepositoryProtocol,
)
from store.repositories.contracts.run import (
Run,
RunCreate,
RunRepositoryProtocol,
)
from store.repositories.contracts.run_event import (
RunEvent,
RunEventCreate,
RunEventRepositoryProtocol,
)
from store.repositories.contracts.thread_meta import (
ThreadMeta,
ThreadMetaCreate,
ThreadMetaRepositoryProtocol,
)
from store.repositories.contracts.user import (
User,
UserCreate,
UserNotFoundError,
UserRepositoryProtocol,
)
__all__ = [
"Feedback",
"FeedbackAggregate",
"FeedbackCreate",
"FeedbackRepositoryProtocol",
"Run",
"RunCreate",
"RunEvent",
"RunEventCreate",
"RunEventRepositoryProtocol",
"RunRepositoryProtocol",
"ThreadMeta",
"ThreadMetaCreate",
"ThreadMetaRepositoryProtocol",
"User",
"UserCreate",
"UserNotFoundError",
"UserRepositoryProtocol",
]
@@ -0,0 +1,62 @@
from __future__ import annotations
from datetime import datetime
from typing import Protocol, TypedDict
from pydantic import BaseModel, ConfigDict
class FeedbackCreate(BaseModel):
model_config = ConfigDict(extra="forbid")
feedback_id: str
run_id: str
thread_id: str
rating: int
user_id: str | None = None
message_id: str | None = None
comment: str | None = None
class Feedback(BaseModel):
model_config = ConfigDict(frozen=True)
feedback_id: str
run_id: str
thread_id: str
rating: int
user_id: str | None
message_id: str | None
comment: str | None
created_time: datetime
class FeedbackAggregate(TypedDict):
run_id: str
total: int
positive: int
negative: int
class FeedbackRepositoryProtocol(Protocol):
async def create_feedback(self, data: FeedbackCreate) -> Feedback: ...
async def upsert_feedback(self, data: FeedbackCreate) -> Feedback: ...
async def get_feedback(self, feedback_id: str) -> Feedback | None: ...
async def list_feedback_by_run(
self,
run_id: str,
*,
thread_id: str | None = None,
user_id: str | None = None,
limit: int | None = None,
) -> list[Feedback]: ...
async def list_feedback_by_thread(
self,
thread_id: str,
*,
user_id: str | None = None,
limit: int | None = None,
) -> list[Feedback]: ...
async def delete_feedback(self, feedback_id: str) -> bool: ...
async def delete_feedback_by_run(self, thread_id: str, run_id: str, *, user_id: str | None = None) -> bool: ...
async def aggregate_feedback_by_run(self, thread_id: str, run_id: str) -> FeedbackAggregate: ...
@@ -0,0 +1,85 @@
from __future__ import annotations
from datetime import datetime
from typing import Any, Protocol
from pydantic import BaseModel, ConfigDict, Field
class RunCreate(BaseModel):
model_config = ConfigDict(extra="forbid")
run_id: str
thread_id: str
assistant_id: str | None = None
user_id: str | None = None
status: str = "pending"
model_name: str | None = None
multitask_strategy: str = "reject"
error: str | None = None
follow_up_to_run_id: str | None = None
metadata: dict[str, Any] = Field(default_factory=dict)
kwargs: dict[str, Any] = Field(default_factory=dict)
created_time: datetime | None = None
class Run(BaseModel):
model_config = ConfigDict(frozen=True)
run_id: str
thread_id: str
assistant_id: str | None
user_id: str | None
status: str
model_name: str | None
multitask_strategy: str
error: str | None
follow_up_to_run_id: str | None
metadata: dict[str, Any]
kwargs: dict[str, Any]
total_input_tokens: int
total_output_tokens: int
total_tokens: int
llm_call_count: int
lead_agent_tokens: int
subagent_tokens: int
middleware_tokens: int
message_count: int
first_human_message: str | None
last_ai_message: str | None
created_time: datetime
updated_time: datetime | None
class RunRepositoryProtocol(Protocol):
async def create_run(self, data: RunCreate) -> Run: ...
async def get_run(self, run_id: str) -> Run | None: ...
async def list_runs_by_thread(
self,
thread_id: str,
*,
user_id: str | None = None,
limit: int = 50,
offset: int = 0,
) -> list[Run]: ...
async def update_run_status(self, run_id: str, status: str, *, error: str | None = None) -> None: ...
async def delete_run(self, run_id: str) -> None: ...
async def list_pending(self, *, before: datetime | str | None = None) -> list[Run]: ...
async def update_run_completion(
self,
run_id: str,
*,
status: str,
total_input_tokens: int = 0,
total_output_tokens: int = 0,
total_tokens: int = 0,
llm_call_count: int = 0,
lead_agent_tokens: int = 0,
subagent_tokens: int = 0,
middleware_tokens: int = 0,
message_count: int = 0,
first_human_message: str | None = None,
last_ai_message: str | None = None,
error: str | None = None,
) -> None: ...
async def aggregate_tokens_by_thread(self, thread_id: str) -> dict[str, Any]: ...
@@ -0,0 +1,74 @@
from __future__ import annotations
from datetime import datetime
from typing import Any, Protocol
from pydantic import BaseModel, ConfigDict, Field
class RunEventCreate(BaseModel):
model_config = ConfigDict(extra="forbid")
thread_id: str
run_id: str
user_id: str | None = None
event_type: str
category: str
content: Any = ""
metadata: dict[str, Any] = Field(default_factory=dict)
created_at: datetime | None = None
class RunEvent(BaseModel):
model_config = ConfigDict(frozen=True)
thread_id: str
run_id: str
user_id: str | None
event_type: str
category: str
content: Any
metadata: dict[str, Any]
seq: int
created_at: datetime
class RunEventRepositoryProtocol(Protocol):
async def append_batch(self, events: list[RunEventCreate]) -> list[RunEvent]: ...
async def list_messages(
self,
thread_id: str,
*,
limit: int = 50,
before_seq: int | None = None,
after_seq: int | None = None,
user_id: str | None = None,
) -> list[RunEvent]: ...
async def list_events(
self,
thread_id: str,
run_id: str,
*,
event_types: list[str] | None = None,
limit: int = 500,
user_id: str | None = None,
) -> list[RunEvent]: ...
async def list_messages_by_run(
self,
thread_id: str,
run_id: str,
*,
limit: int = 50,
before_seq: int | None = None,
after_seq: int | None = None,
user_id: str | None = None,
) -> list[RunEvent]: ...
async def count_messages(self, thread_id: str, *, user_id: str | None = None) -> int: ...
async def delete_by_thread(self, thread_id: str, *, user_id: str | None = None) -> int: ...
async def delete_by_run(self, thread_id: str, run_id: str, *, user_id: str | None = None) -> int: ...
@@ -0,0 +1,58 @@
from __future__ import annotations
from datetime import datetime
from typing import Any, Protocol
from pydantic import BaseModel, ConfigDict, Field
class ThreadMetaCreate(BaseModel):
model_config = ConfigDict(extra="forbid")
thread_id: str
assistant_id: str | None = None
user_id: str | None = None
display_name: str | None = None
status: str = "idle"
metadata: dict[str, Any] = Field(default_factory=dict)
class ThreadMeta(BaseModel):
model_config = ConfigDict(frozen=True)
thread_id: str
assistant_id: str | None
user_id: str | None
display_name: str | None
status: str
metadata: dict[str, Any]
created_time: datetime
updated_time: datetime | None
class ThreadMetaRepositoryProtocol(Protocol):
async def create_thread_meta(self, data: ThreadMetaCreate) -> ThreadMeta: ...
async def get_thread_meta(self, thread_id: str) -> ThreadMeta | None: ...
async def update_thread_meta(
self,
thread_id: str,
*,
display_name: str | None = None,
status: str | None = None,
metadata: dict[str, Any] | None = None,
) -> None: ...
async def delete_thread(self, thread_id: str) -> None: ...
async def search_threads(
self,
*,
metadata: dict[str, Any] | None = None,
status: str | None = None,
user_id: str | None = None,
assistant_id: str | None = None,
limit: int = 100,
offset: int = 0,
) -> list[ThreadMeta]: ...
@@ -0,0 +1,56 @@
from __future__ import annotations
from datetime import datetime
from typing import Literal, Protocol
from pydantic import BaseModel, ConfigDict
class UserNotFoundError(LookupError):
"""Raised when an update targets a user row that no longer exists."""
class UserCreate(BaseModel):
model_config = ConfigDict(extra="forbid")
id: str
email: str
password_hash: str | None = None
system_role: Literal["admin", "user"] = "user"
created_at: datetime | None = None
oauth_provider: str | None = None
oauth_id: str | None = None
needs_setup: bool = False
token_version: int = 0
class User(BaseModel):
model_config = ConfigDict(frozen=True)
id: str
email: str
password_hash: str | None
system_role: Literal["admin", "user"]
created_at: datetime
oauth_provider: str | None
oauth_id: str | None
needs_setup: bool
token_version: int
class UserRepositoryProtocol(Protocol):
async def create_user(self, data: UserCreate) -> User: ...
async def get_user_by_id(self, user_id: str) -> User | None: ...
async def get_user_by_email(self, email: str) -> User | None: ...
async def get_user_by_oauth(self, provider: str, oauth_id: str) -> User | None: ...
async def get_first_admin(self) -> User | None: ...
async def update_user(self, data: User) -> User: ...
async def count_users(self) -> int: ...
async def count_admin_users(self) -> int: ...
@@ -0,0 +1,13 @@
from store.repositories.db.feedback import DbFeedbackRepository
from store.repositories.db.run import DbRunRepository
from store.repositories.db.run_event import DbRunEventRepository
from store.repositories.db.thread_meta import DbThreadMetaRepository
from store.repositories.db.user import DbUserRepository
__all__ = [
"DbFeedbackRepository",
"DbRunRepository",
"DbRunEventRepository",
"DbThreadMetaRepository",
"DbUserRepository",
]
@@ -0,0 +1,146 @@
from __future__ import annotations
from datetime import UTC, datetime
from sqlalchemy import case, delete, func, select
from sqlalchemy.ext.asyncio import AsyncSession
from store.repositories.contracts.feedback import Feedback, FeedbackAggregate, FeedbackCreate, FeedbackRepositoryProtocol
from store.repositories.models.feedback import Feedback as FeedbackModel
def _to_feedback(m: FeedbackModel) -> Feedback:
return Feedback(
feedback_id=m.feedback_id,
run_id=m.run_id,
thread_id=m.thread_id,
rating=m.rating,
user_id=m.user_id,
message_id=m.message_id,
comment=m.comment,
created_time=m.created_time,
)
class DbFeedbackRepository(FeedbackRepositoryProtocol):
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def create_feedback(self, data: FeedbackCreate) -> Feedback:
if data.rating not in (1, -1):
raise ValueError(f"rating must be +1 or -1, got {data.rating}")
model = FeedbackModel(
feedback_id=data.feedback_id,
run_id=data.run_id,
thread_id=data.thread_id,
rating=data.rating,
user_id=data.user_id,
message_id=data.message_id,
comment=data.comment,
)
self._session.add(model)
await self._session.flush()
await self._session.refresh(model)
return _to_feedback(model)
async def upsert_feedback(self, data: FeedbackCreate) -> Feedback:
if data.rating not in (1, -1):
raise ValueError(f"rating must be +1 or -1, got {data.rating}")
result = await self._session.execute(
select(FeedbackModel).where(
FeedbackModel.thread_id == data.thread_id,
FeedbackModel.run_id == data.run_id,
FeedbackModel.user_id == data.user_id,
)
)
model = result.scalar_one_or_none()
if model is None:
return await self.create_feedback(data)
model.rating = data.rating
model.message_id = data.message_id
model.comment = data.comment
model.created_time = datetime.now(UTC)
await self._session.flush()
await self._session.refresh(model)
return _to_feedback(model)
async def get_feedback(self, feedback_id: str) -> Feedback | None:
result = await self._session.execute(
select(FeedbackModel).where(FeedbackModel.feedback_id == feedback_id)
)
model = result.scalar_one_or_none()
return _to_feedback(model) if model else None
async def list_feedback_by_run(
self,
run_id: str,
*,
thread_id: str | None = None,
user_id: str | None = None,
limit: int | None = None,
) -> list[Feedback]:
stmt = select(FeedbackModel).where(FeedbackModel.run_id == run_id)
if thread_id is not None:
stmt = stmt.where(FeedbackModel.thread_id == thread_id)
if user_id is not None:
stmt = stmt.where(FeedbackModel.user_id == user_id)
stmt = stmt.order_by(FeedbackModel.created_time.desc())
if limit is not None:
stmt = stmt.limit(limit)
result = await self._session.execute(stmt)
return [_to_feedback(m) for m in result.scalars().all()]
async def list_feedback_by_thread(
self,
thread_id: str,
*,
user_id: str | None = None,
limit: int | None = None,
) -> list[Feedback]:
stmt = select(FeedbackModel).where(FeedbackModel.thread_id == thread_id)
if user_id is not None:
stmt = stmt.where(FeedbackModel.user_id == user_id)
stmt = stmt.order_by(FeedbackModel.created_time.desc())
if limit is not None:
stmt = stmt.limit(limit)
result = await self._session.execute(stmt)
return [_to_feedback(m) for m in result.scalars().all()]
async def delete_feedback(self, feedback_id: str) -> bool:
existing = await self.get_feedback(feedback_id)
if existing is None:
return False
await self._session.execute(
delete(FeedbackModel).where(FeedbackModel.feedback_id == feedback_id)
)
return True
async def delete_feedback_by_run(self, thread_id: str, run_id: str, *, user_id: str | None = None) -> bool:
stmt = select(FeedbackModel).where(
FeedbackModel.thread_id == thread_id,
FeedbackModel.run_id == run_id,
)
if user_id is not None:
stmt = stmt.where(FeedbackModel.user_id == user_id)
result = await self._session.execute(stmt)
model = result.scalar_one_or_none()
if model is None:
return False
await self._session.delete(model)
return True
async def aggregate_feedback_by_run(self, thread_id: str, run_id: str) -> FeedbackAggregate:
stmt = select(
func.count().label("total"),
func.coalesce(func.sum(case((FeedbackModel.rating == 1, 1), else_=0)), 0).label("positive"),
func.coalesce(func.sum(case((FeedbackModel.rating == -1, 1), else_=0)), 0).label("negative"),
).where(FeedbackModel.thread_id == thread_id, FeedbackModel.run_id == run_id)
row = (await self._session.execute(stmt)).one()
return {
"run_id": run_id,
"total": int(row.total),
"positive": int(row.positive),
"negative": int(row.negative),
}
@@ -0,0 +1,196 @@
from __future__ import annotations
from datetime import datetime
from typing import Any
from sqlalchemy import delete, func, select, update
from sqlalchemy.ext.asyncio import AsyncSession
from store.repositories.contracts.run import Run, RunCreate, RunRepositoryProtocol
from store.repositories.models.run import Run as RunModel
def _to_run(m: RunModel) -> Run:
return Run(
run_id=m.run_id,
thread_id=m.thread_id,
assistant_id=m.assistant_id,
user_id=m.user_id,
status=m.status,
model_name=m.model_name,
multitask_strategy=m.multitask_strategy,
error=m.error,
follow_up_to_run_id=m.follow_up_to_run_id,
metadata=dict(m.meta or {}),
kwargs=dict(m.kwargs or {}),
total_input_tokens=m.total_input_tokens,
total_output_tokens=m.total_output_tokens,
total_tokens=m.total_tokens,
llm_call_count=m.llm_call_count,
lead_agent_tokens=m.lead_agent_tokens,
subagent_tokens=m.subagent_tokens,
middleware_tokens=m.middleware_tokens,
message_count=m.message_count,
first_human_message=m.first_human_message,
last_ai_message=m.last_ai_message,
created_time=m.created_time,
updated_time=m.updated_time,
)
class DbRunRepository(RunRepositoryProtocol):
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def create_run(self, data: RunCreate) -> Run:
model = RunModel(
run_id=data.run_id,
thread_id=data.thread_id,
assistant_id=data.assistant_id,
user_id=data.user_id,
status=data.status,
model_name=data.model_name,
multitask_strategy=data.multitask_strategy,
error=data.error,
follow_up_to_run_id=data.follow_up_to_run_id,
meta=dict(data.metadata),
kwargs=dict(data.kwargs),
)
if data.created_time is not None:
model.created_time = data.created_time
self._session.add(model)
await self._session.flush()
await self._session.refresh(model)
return _to_run(model)
async def get_run(self, run_id: str) -> Run | None:
result = await self._session.execute(
select(RunModel).where(RunModel.run_id == run_id)
)
model = result.scalar_one_or_none()
return _to_run(model) if model else None
async def list_runs_by_thread(
self,
thread_id: str,
*,
user_id: str | None = None,
limit: int = 50,
offset: int = 0,
) -> list[Run]:
stmt = select(RunModel).where(RunModel.thread_id == thread_id)
if user_id is not None:
stmt = stmt.where(RunModel.user_id == user_id)
stmt = stmt.order_by(RunModel.created_time.desc()).limit(limit).offset(offset)
result = await self._session.execute(stmt)
return [_to_run(m) for m in result.scalars().all()]
async def update_run_status(
self, run_id: str, status: str, *, error: str | None = None
) -> None:
values: dict = {"status": status}
if error is not None:
values["error"] = error
await self._session.execute(
update(RunModel).where(RunModel.run_id == run_id).values(**values)
)
async def delete_run(self, run_id: str) -> None:
await self._session.execute(delete(RunModel).where(RunModel.run_id == run_id))
async def list_pending(self, *, before: datetime | str | None = None) -> list[Run]:
if before is None:
before_dt = datetime.now().astimezone()
elif isinstance(before, datetime):
before_dt = before
else:
before_dt = datetime.fromisoformat(before)
result = await self._session.execute(
select(RunModel)
.where(RunModel.status == "pending", RunModel.created_time <= before_dt)
.order_by(RunModel.created_time.asc())
)
return [_to_run(m) for m in result.scalars().all()]
async def update_run_completion(
self,
run_id: str,
*,
status: str,
total_input_tokens: int = 0,
total_output_tokens: int = 0,
total_tokens: int = 0,
llm_call_count: int = 0,
lead_agent_tokens: int = 0,
subagent_tokens: int = 0,
middleware_tokens: int = 0,
message_count: int = 0,
first_human_message: str | None = None,
last_ai_message: str | None = None,
error: str | None = None,
) -> None:
values = {
"status": status,
"total_input_tokens": total_input_tokens,
"total_output_tokens": total_output_tokens,
"total_tokens": total_tokens,
"llm_call_count": llm_call_count,
"lead_agent_tokens": lead_agent_tokens,
"subagent_tokens": subagent_tokens,
"middleware_tokens": middleware_tokens,
"message_count": message_count,
}
if first_human_message is not None:
values["first_human_message"] = first_human_message[:2000]
if last_ai_message is not None:
values["last_ai_message"] = last_ai_message[:2000]
if error is not None:
values["error"] = error
await self._session.execute(
update(RunModel).where(RunModel.run_id == run_id).values(**values)
)
async def aggregate_tokens_by_thread(self, thread_id: str) -> dict[str, Any]:
completed = RunModel.status.in_(("success", "error"))
stmt = (
select(
func.coalesce(RunModel.model_name, "unknown").label("model"),
func.count().label("runs"),
func.coalesce(func.sum(RunModel.total_tokens), 0).label("total_tokens"),
func.coalesce(func.sum(RunModel.total_input_tokens), 0).label("total_input_tokens"),
func.coalesce(func.sum(RunModel.total_output_tokens), 0).label("total_output_tokens"),
func.coalesce(func.sum(RunModel.lead_agent_tokens), 0).label("lead_agent"),
func.coalesce(func.sum(RunModel.subagent_tokens), 0).label("subagent"),
func.coalesce(func.sum(RunModel.middleware_tokens), 0).label("middleware"),
)
.where(RunModel.thread_id == thread_id, completed)
.group_by(func.coalesce(RunModel.model_name, "unknown"))
)
rows = (await self._session.execute(stmt)).all()
total_tokens = total_input = total_output = total_runs = 0
lead_agent = subagent = middleware = 0
by_model: dict[str, dict] = {}
for row in rows:
by_model[row.model] = {"tokens": row.total_tokens, "runs": row.runs}
total_tokens += row.total_tokens
total_input += row.total_input_tokens
total_output += row.total_output_tokens
total_runs += row.runs
lead_agent += row.lead_agent
subagent += row.subagent
middleware += row.middleware
return {
"total_tokens": total_tokens,
"total_input_tokens": total_input,
"total_output_tokens": total_output,
"total_runs": total_runs,
"by_model": by_model,
"by_caller": {
"lead_agent": lead_agent,
"subagent": subagent,
"middleware": middleware,
},
}
@@ -0,0 +1,195 @@
from __future__ import annotations
import json
from typing import Any
from sqlalchemy import delete, func, select
from sqlalchemy.ext.asyncio import AsyncSession
from store.repositories.contracts.run_event import RunEvent, RunEventCreate, RunEventRepositoryProtocol
from store.repositories.models.run_event import RunEvent as RunEventModel
def _serialize_content(content: Any, metadata: dict[str, Any]) -> tuple[str, dict[str, Any]]:
if not isinstance(content, str):
next_metadata = {**metadata, "content_is_json": True}
if isinstance(content, dict):
next_metadata["content_is_dict"] = True
return json.dumps(content, default=str, ensure_ascii=False), next_metadata
return content, metadata
def _deserialize_content(content: str, metadata: dict[str, Any]) -> Any:
if not (metadata.get("content_is_json") or metadata.get("content_is_dict")):
return content
try:
return json.loads(content)
except json.JSONDecodeError:
return content
def _to_run_event(model: RunEventModel) -> RunEvent:
raw_metadata = dict(model.meta or {})
metadata = {key: value for key, value in raw_metadata.items() if key != "content_is_dict"}
return RunEvent(
thread_id=model.thread_id,
run_id=model.run_id,
user_id=model.user_id,
event_type=model.event_type,
category=model.category,
content=_deserialize_content(model.content, raw_metadata),
metadata=metadata,
seq=model.seq,
created_at=model.created_at,
)
class DbRunEventRepository(RunEventRepositoryProtocol):
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def append_batch(self, events: list[RunEventCreate]) -> list[RunEvent]:
if not events:
return []
thread_ids = {event.thread_id for event in events}
seq_by_thread: dict[str, int] = {}
for thread_id in thread_ids:
max_seq = await self._session.scalar(
select(func.max(RunEventModel.seq))
.where(RunEventModel.thread_id == thread_id)
.with_for_update()
)
seq_by_thread[thread_id] = max_seq or 0
rows: list[RunEventModel] = []
for event in events:
seq_by_thread[event.thread_id] += 1
content, metadata = _serialize_content(event.content, dict(event.metadata))
row = RunEventModel(
thread_id=event.thread_id,
run_id=event.run_id,
user_id=event.user_id,
seq=seq_by_thread[event.thread_id],
event_type=event.event_type,
category=event.category,
content=content,
meta=metadata,
)
if event.created_at is not None:
row.created_at = event.created_at
self._session.add(row)
rows.append(row)
await self._session.flush()
return [_to_run_event(row) for row in rows]
async def list_messages(
self,
thread_id: str,
*,
limit: int = 50,
before_seq: int | None = None,
after_seq: int | None = None,
user_id: str | None = None,
) -> list[RunEvent]:
stmt = select(RunEventModel).where(
RunEventModel.thread_id == thread_id,
RunEventModel.category == "message",
)
if user_id is not None:
stmt = stmt.where(RunEventModel.user_id == user_id)
if before_seq is not None:
stmt = stmt.where(RunEventModel.seq < before_seq).order_by(RunEventModel.seq.desc()).limit(limit)
result = await self._session.execute(stmt)
return list(reversed([_to_run_event(row) for row in result.scalars().all()]))
if after_seq is not None:
stmt = stmt.where(RunEventModel.seq > after_seq).order_by(RunEventModel.seq.asc()).limit(limit)
result = await self._session.execute(stmt)
return [_to_run_event(row) for row in result.scalars().all()]
stmt = stmt.order_by(RunEventModel.seq.desc()).limit(limit)
result = await self._session.execute(stmt)
return list(reversed([_to_run_event(row) for row in result.scalars().all()]))
async def list_events(
self,
thread_id: str,
run_id: str,
*,
event_types: list[str] | None = None,
limit: int = 500,
user_id: str | None = None,
) -> list[RunEvent]:
stmt = select(RunEventModel).where(
RunEventModel.thread_id == thread_id,
RunEventModel.run_id == run_id,
)
if user_id is not None:
stmt = stmt.where(RunEventModel.user_id == user_id)
if event_types is not None:
stmt = stmt.where(RunEventModel.event_type.in_(event_types))
stmt = stmt.order_by(RunEventModel.seq.asc()).limit(limit)
result = await self._session.execute(stmt)
return [_to_run_event(row) for row in result.scalars().all()]
async def list_messages_by_run(
self,
thread_id: str,
run_id: str,
*,
limit: int = 50,
before_seq: int | None = None,
after_seq: int | None = None,
user_id: str | None = None,
) -> list[RunEvent]:
stmt = (
select(RunEventModel)
.where(
RunEventModel.thread_id == thread_id,
RunEventModel.run_id == run_id,
RunEventModel.category == "message",
)
)
if user_id is not None:
stmt = stmt.where(RunEventModel.user_id == user_id)
if before_seq is not None:
stmt = stmt.where(RunEventModel.seq < before_seq).order_by(RunEventModel.seq.desc()).limit(limit)
result = await self._session.execute(stmt)
return list(reversed([_to_run_event(row) for row in result.scalars().all()]))
if after_seq is not None:
stmt = stmt.where(RunEventModel.seq > after_seq).order_by(RunEventModel.seq.asc()).limit(limit)
result = await self._session.execute(stmt)
return [_to_run_event(row) for row in result.scalars().all()]
stmt = stmt.order_by(RunEventModel.seq.desc()).limit(limit)
result = await self._session.execute(stmt)
return list(reversed([_to_run_event(row) for row in result.scalars().all()]))
async def count_messages(self, thread_id: str, *, user_id: str | None = None) -> int:
stmt = (
select(func.count())
.select_from(RunEventModel)
.where(RunEventModel.thread_id == thread_id, RunEventModel.category == "message")
)
if user_id is not None:
stmt = stmt.where(RunEventModel.user_id == user_id)
count = await self._session.scalar(stmt)
return int(count or 0)
async def delete_by_thread(self, thread_id: str, *, user_id: str | None = None) -> int:
conditions = [RunEventModel.thread_id == thread_id]
if user_id is not None:
conditions.append(RunEventModel.user_id == user_id)
count = await self._session.scalar(select(func.count()).select_from(RunEventModel).where(*conditions))
await self._session.execute(delete(RunEventModel).where(*conditions))
return int(count or 0)
async def delete_by_run(self, thread_id: str, run_id: str, *, user_id: str | None = None) -> int:
conditions = [RunEventModel.thread_id == thread_id, RunEventModel.run_id == run_id]
if user_id is not None:
conditions.append(RunEventModel.user_id == user_id)
count = await self._session.scalar(select(func.count()).select_from(RunEventModel).where(*conditions))
await self._session.execute(delete(RunEventModel).where(*conditions))
return int(count or 0)
@@ -0,0 +1,97 @@
from __future__ import annotations
from typing import Any
from sqlalchemy import delete, select, update
from sqlalchemy.ext.asyncio import AsyncSession
from store.repositories.contracts.thread_meta import ThreadMeta, ThreadMetaCreate, ThreadMetaRepositoryProtocol
from store.repositories.models.thread_meta import ThreadMeta as ThreadMetaModel
def _to_thread_meta(m: ThreadMetaModel) -> ThreadMeta:
return ThreadMeta(
thread_id=m.thread_id,
assistant_id=m.assistant_id,
user_id=m.user_id,
display_name=m.display_name,
status=m.status,
metadata=dict(m.meta or {}),
created_time=m.created_time,
updated_time=m.updated_time,
)
class DbThreadMetaRepository(ThreadMetaRepositoryProtocol):
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def create_thread_meta(self, data: ThreadMetaCreate) -> ThreadMeta:
model = ThreadMetaModel(
thread_id=data.thread_id,
assistant_id=data.assistant_id,
user_id=data.user_id,
display_name=data.display_name,
status=data.status,
meta=dict(data.metadata),
)
self._session.add(model)
await self._session.flush()
await self._session.refresh(model)
return _to_thread_meta(model)
async def get_thread_meta(self, thread_id: str) -> ThreadMeta | None:
result = await self._session.execute(select(ThreadMetaModel).where(ThreadMetaModel.thread_id == thread_id))
model = result.scalar_one_or_none()
return _to_thread_meta(model) if model else None
async def update_thread_meta(
self,
thread_id: str,
*,
display_name: str | None = None,
status: str | None = None,
metadata: dict[str, Any] | None = None,
) -> None:
values: dict = {}
if display_name is not None:
values["display_name"] = display_name
if status is not None:
values["status"] = status
if metadata is not None:
values["meta"] = dict(metadata)
if not values:
return
await self._session.execute(
update(ThreadMetaModel).where(ThreadMetaModel.thread_id == thread_id).values(**values))
async def delete_thread(self, thread_id: str) -> None:
await self._session.execute(delete(ThreadMetaModel).where(ThreadMetaModel.thread_id == thread_id))
async def search_threads(
self,
*,
metadata: dict[str, Any] | None = None,
status: str | None = None,
user_id: str | None = None,
assistant_id: str | None = None,
limit: int = 100,
offset: int = 0,
) -> list[ThreadMeta]:
stmt = select(ThreadMetaModel)
if status is not None:
stmt = stmt.where(ThreadMetaModel.status == status)
if user_id is not None:
stmt = stmt.where(ThreadMetaModel.user_id == user_id)
if assistant_id is not None:
stmt = stmt.where(ThreadMetaModel.assistant_id == assistant_id)
if metadata:
for key, value in metadata.items():
stmt = stmt.where(ThreadMetaModel.meta[key].as_string() == str(value))
stmt = stmt.order_by(ThreadMetaModel.created_time.desc())
stmt = stmt.limit(limit).offset(offset)
result = await self._session.execute(stmt)
return [_to_thread_meta(m) for m in result.scalars().all()]
@@ -0,0 +1,98 @@
from __future__ import annotations
from sqlalchemy import func, select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.ext.asyncio import AsyncSession
from store.repositories.contracts.user import User, UserCreate, UserNotFoundError, UserRepositoryProtocol
from store.repositories.models.user import User as UserModel
def _to_user(model: UserModel) -> User:
return User(
id=model.id,
email=model.email,
password_hash=model.password_hash,
system_role=model.system_role, # type: ignore[arg-type]
created_at=model.created_at,
oauth_provider=model.oauth_provider,
oauth_id=model.oauth_id,
needs_setup=model.needs_setup,
token_version=model.token_version,
)
class DbUserRepository(UserRepositoryProtocol):
def __init__(self, session: AsyncSession) -> None:
self._session = session
async def create_user(self, data: UserCreate) -> User:
model = UserModel(
id=data.id,
email=data.email,
system_role=data.system_role,
password_hash=data.password_hash,
oauth_provider=data.oauth_provider,
oauth_id=data.oauth_id,
needs_setup=data.needs_setup,
token_version=data.token_version,
)
if data.created_at is not None:
model.created_at = data.created_at
self._session.add(model)
try:
await self._session.flush()
except IntegrityError as exc:
await self._session.rollback()
raise ValueError(f"Email already registered: {data.email}") from exc
await self._session.refresh(model)
return _to_user(model)
async def get_user_by_id(self, user_id: str) -> User | None:
model = await self._session.get(UserModel, user_id)
return _to_user(model) if model is not None else None
async def get_user_by_email(self, email: str) -> User | None:
result = await self._session.execute(select(UserModel).where(UserModel.email == email))
model = result.scalar_one_or_none()
return _to_user(model) if model is not None else None
async def get_user_by_oauth(self, provider: str, oauth_id: str) -> User | None:
result = await self._session.execute(
select(UserModel).where(
UserModel.oauth_provider == provider,
UserModel.oauth_id == oauth_id,
)
)
model = result.scalar_one_or_none()
return _to_user(model) if model is not None else None
async def get_first_admin(self) -> User | None:
result = await self._session.execute(select(UserModel).where(UserModel.system_role == "admin").limit(1))
model = result.scalar_one_or_none()
return _to_user(model) if model is not None else None
async def update_user(self, data: User) -> User:
model = await self._session.get(UserModel, data.id)
if model is None:
raise UserNotFoundError(f"User {data.id} no longer exists")
model.email = data.email
model.password_hash = data.password_hash
model.system_role = data.system_role
model.oauth_provider = data.oauth_provider
model.oauth_id = data.oauth_id
model.needs_setup = data.needs_setup
model.token_version = data.token_version
await self._session.flush()
await self._session.refresh(model)
return _to_user(model)
async def count_users(self) -> int:
count = await self._session.scalar(select(func.count()).select_from(UserModel))
return int(count or 0)
async def count_admin_users(self) -> int:
count = await self._session.scalar(select(func.count()).select_from(UserModel).where(UserModel.system_role == "admin"))
return int(count or 0)
@@ -0,0 +1,36 @@
from sqlalchemy.ext.asyncio import AsyncSession
from store.repositories import (
FeedbackRepositoryProtocol,
RunEventRepositoryProtocol,
RunRepositoryProtocol,
ThreadMetaRepositoryProtocol,
UserRepositoryProtocol,
)
from store.repositories.db import (
DbFeedbackRepository,
DbRunEventRepository,
DbRunRepository,
DbThreadMetaRepository,
DbUserRepository,
)
def build_thread_meta_repository(session: AsyncSession) -> ThreadMetaRepositoryProtocol:
return DbThreadMetaRepository(session)
def build_run_repository(session: AsyncSession) -> RunRepositoryProtocol:
return DbRunRepository(session)
def build_feedback_repository(session: AsyncSession) -> FeedbackRepositoryProtocol:
return DbFeedbackRepository(session)
def build_run_event_repository(session: AsyncSession) -> RunEventRepositoryProtocol:
return DbRunEventRepository(session)
def build_user_repository(session: AsyncSession) -> UserRepositoryProtocol:
return DbUserRepository(session)
@@ -0,0 +1,7 @@
from store.repositories.models.feedback import Feedback
from store.repositories.models.run import Run
from store.repositories.models.run_event import RunEvent
from store.repositories.models.thread_meta import ThreadMeta
from store.repositories.models.user import User
__all__ = ["Feedback", "Run", "RunEvent", "ThreadMeta", "User"]
@@ -0,0 +1,39 @@
from __future__ import annotations
from datetime import datetime
from sqlalchemy import Integer, String, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column
from store.persistence.base_model import DataClassBase, TimeZone, UniversalText
from store.utils import get_timezone
_tz = get_timezone()
class Feedback(DataClassBase):
"""Feedback table (create-only, no updated_time)."""
__tablename__ = "feedback"
__table_args__ = (
UniqueConstraint("thread_id", "run_id", "user_id", name="uq_feedback_thread_run_user"),
{"comment": "Feedback table."},
)
feedback_id: Mapped[str] = mapped_column(String(64), primary_key=True)
run_id: Mapped[str] = mapped_column(String(64), index=True)
thread_id: Mapped[str] = mapped_column(String(64), index=True)
rating: Mapped[int] = mapped_column(Integer)
user_id: Mapped[str | None] = mapped_column(String(64), default=None, index=True)
message_id: Mapped[str | None] = mapped_column(String(64), default=None)
comment: Mapped[str | None] = mapped_column(UniversalText, default=None)
created_time: Mapped[datetime] = mapped_column(
"created_at",
TimeZone,
init=False,
default_factory=_tz.now,
sort_order=999,
comment="Created at",
)
@@ -0,0 +1,66 @@
from __future__ import annotations
from datetime import datetime
from typing import Any
from sqlalchemy import JSON, Index, Integer, String
from sqlalchemy.orm import Mapped, mapped_column
from store.persistence.base_model import DataClassBase, TimeZone, UniversalText
from store.utils import get_timezone
_tz = get_timezone()
class Run(DataClassBase):
"""Run metadata table."""
__tablename__ = "runs"
__table_args__ = (
Index("ix_runs_thread_status", "thread_id", "status"),
{"comment": "Run metadata table."},
)
run_id: Mapped[str] = mapped_column(String(64), primary_key=True)
thread_id: Mapped[str] = mapped_column(String(64), index=True)
assistant_id: Mapped[str | None] = mapped_column(String(128), default=None)
user_id: Mapped[str | None] = mapped_column(String(64), default=None, index=True)
status: Mapped[str] = mapped_column(String(20), default="pending", index=True)
model_name: Mapped[str | None] = mapped_column(String(128), default=None)
multitask_strategy: Mapped[str] = mapped_column(String(20), default="reject")
error: Mapped[str | None] = mapped_column(UniversalText, default=None)
follow_up_to_run_id: Mapped[str | None] = mapped_column(String(64), default=None)
meta: Mapped[dict[str, Any]] = mapped_column("metadata_json", JSON, default_factory=dict)
kwargs: Mapped[dict[str, Any]] = mapped_column("kwargs_json", JSON, default_factory=dict)
total_input_tokens: Mapped[int] = mapped_column(Integer, default=0)
total_output_tokens: Mapped[int] = mapped_column(Integer, default=0)
total_tokens: Mapped[int] = mapped_column(Integer, default=0)
llm_call_count: Mapped[int] = mapped_column(Integer, default=0)
lead_agent_tokens: Mapped[int] = mapped_column(Integer, default=0)
subagent_tokens: Mapped[int] = mapped_column(Integer, default=0)
middleware_tokens: Mapped[int] = mapped_column(Integer, default=0)
message_count: Mapped[int] = mapped_column(Integer, default=0)
first_human_message: Mapped[str | None] = mapped_column(UniversalText, default=None)
last_ai_message: Mapped[str | None] = mapped_column(UniversalText, default=None)
created_time: Mapped[datetime] = mapped_column(
"created_at",
TimeZone,
init=False,
default_factory=_tz.now,
sort_order=999,
comment="Created at",
)
updated_time: Mapped[datetime | None] = mapped_column(
"updated_at",
TimeZone,
init=False,
default=None,
onupdate=_tz.now,
sort_order=999,
comment="Updated at",
)
@@ -0,0 +1,43 @@
from __future__ import annotations
from datetime import datetime
from typing import Any
from sqlalchemy import JSON, Index, Integer, String, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column
from store.persistence.base_model import DataClassBase, TimeZone, UniversalText, id_key
from store.utils import get_timezone
_tz = get_timezone()
class RunEvent(DataClassBase):
"""Run event table."""
__tablename__ = "run_events"
__table_args__ = (
UniqueConstraint("thread_id", "seq", name="uq_events_thread_seq"),
Index("ix_events_thread_cat_seq", "thread_id", "category", "seq"),
Index("ix_events_run", "thread_id", "run_id", "seq"),
{"comment": "Run event table."},
)
id: Mapped[id_key] = mapped_column(init=False)
thread_id: Mapped[str] = mapped_column(String(64), index=True)
run_id: Mapped[str] = mapped_column(String(64), index=True)
event_type: Mapped[str] = mapped_column(String(32), index=True)
category: Mapped[str] = mapped_column(String(16), index=True)
user_id: Mapped[str | None] = mapped_column(String(64), default=None, index=True)
seq: Mapped[int] = mapped_column(Integer, default=0, index=True)
content: Mapped[str] = mapped_column(UniversalText, default="")
meta: Mapped[dict[str, Any]] = mapped_column("event_metadata", JSON, default_factory=dict)
created_at: Mapped[datetime] = mapped_column(
TimeZone,
init=False,
default_factory=_tz.now,
sort_order=999,
comment="Event timestamp",
)
@@ -0,0 +1,46 @@
from __future__ import annotations
from datetime import datetime
from typing import Any
from sqlalchemy import JSON, String
from sqlalchemy.orm import Mapped, mapped_column
from store.persistence.base_model import DataClassBase, TimeZone
from store.utils import get_timezone
_tz = get_timezone()
class ThreadMeta(DataClassBase):
"""Thread metadata table."""
__tablename__ = "threads_meta"
__table_args__ = {"comment": "Thread metadata table."}
thread_id: Mapped[str] = mapped_column(String(64), primary_key=True)
assistant_id: Mapped[str | None] = mapped_column(String(128), default=None, index=True)
user_id: Mapped[str | None] = mapped_column(String(64), default=None, index=True)
display_name: Mapped[str | None] = mapped_column(String(256), default=None)
status: Mapped[str] = mapped_column(String(20), default="idle", index=True)
meta: Mapped[dict[str, Any]] = mapped_column("metadata_json", JSON, default_factory=dict)
created_time: Mapped[datetime] = mapped_column(
"created_at",
TimeZone,
init=False,
default_factory=_tz.now,
sort_order=999,
comment="Created at",
)
updated_time: Mapped[datetime | None] = mapped_column(
"updated_at",
TimeZone,
init=False,
default=None,
onupdate=_tz.now,
sort_order=999,
comment="Updated at",
)
@@ -0,0 +1,45 @@
from __future__ import annotations
from datetime import datetime
from sqlalchemy import Boolean, Index, String, text
from sqlalchemy.orm import Mapped, mapped_column
from store.persistence.base_model import DataClassBase, TimeZone
from store.utils import get_timezone
_tz = get_timezone()
class User(DataClassBase):
"""User account table."""
__tablename__ = "users"
__table_args__ = (
Index(
"idx_users_oauth_identity",
"oauth_provider",
"oauth_id",
unique=True,
sqlite_where=text("oauth_provider IS NOT NULL AND oauth_id IS NOT NULL"),
),
{"comment": "User account table."},
)
id: Mapped[str] = mapped_column(String(36), primary_key=True)
email: Mapped[str] = mapped_column(String(320), unique=True, nullable=False, index=True)
system_role: Mapped[str] = mapped_column(String(16), default="user")
password_hash: Mapped[str | None] = mapped_column(String(128), default=None)
oauth_provider: Mapped[str | None] = mapped_column(String(32), default=None)
oauth_id: Mapped[str | None] = mapped_column(String(128), default=None)
needs_setup: Mapped[bool] = mapped_column(Boolean, default=False)
token_version: Mapped[int] = mapped_column(default=0)
created_at: Mapped[datetime] = mapped_column(
TimeZone,
init=False,
default_factory=_tz.now,
sort_order=999,
comment="Created at",
)