deer-flow/backend/packages/harness/deerflow/persistence/repositories/run_repo.py

"""SQLAlchemy-backed RunStore implementation.

Each method acquires and releases its own short-lived session.
Run status updates happen from background workers that may live
minutes -- we don't hold connections across long execution.
"""

from __future__ import annotations

import json
import logging
from datetime import UTC, datetime
from typing import Any

from sqlalchemy import select, update
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker

from deerflow.persistence.models.run import RunRow
from deerflow.runtime.runs.store.base import RunStore

logger = logging.getLogger(__name__)


class RunRepository(RunStore):
    def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
        self._sf = session_factory

    @staticmethod
    def _safe_json(obj: Any) -> Any:
        """Ensure obj is JSON-serializable. Falls back to model_dump() or str()."""
        if obj is None:
            return None
        if isinstance(obj, (str, int, float, bool)):
            return obj
        if isinstance(obj, dict):
            return {k: RunRepository._safe_json(v) for k, v in obj.items()}
        if isinstance(obj, (list, tuple)):
            return [RunRepository._safe_json(v) for v in obj]
        if hasattr(obj, "model_dump"):
            try:
                return obj.model_dump()
            except Exception:
                pass
        if hasattr(obj, "dict"):
            try:
                return obj.dict()
            except Exception:
                pass
        try:
            json.dumps(obj)
            return obj
        except (TypeError, ValueError):
            return str(obj)

    @staticmethod
    def _row_to_dict(row: RunRow) -> dict[str, Any]:
        d = row.to_dict()
        # Remap JSON columns to match RunStore interface
        d["metadata"] = d.pop("metadata_json", {})
        d["kwargs"] = d.pop("kwargs_json", {})
        # Convert datetime to ISO string for consistency with MemoryRunStore
        for key in ("created_at", "updated_at"):
            val = d.get(key)
            if isinstance(val, datetime):
                d[key] = val.isoformat()
        return d

    async def put(
        self,
        run_id,
        *,
        thread_id,
        assistant_id=None,
        owner_id=None,
        status="pending",
        multitask_strategy="reject",
        metadata=None,
        kwargs=None,
        error=None,
        created_at=None,
    ):
        now = datetime.now(UTC)
        row = RunRow(
            run_id=run_id,
            thread_id=thread_id,
            assistant_id=assistant_id,
            owner_id=owner_id,
            status=status,
            multitask_strategy=multitask_strategy,
            metadata_json=self._safe_json(metadata) or {},
            kwargs_json=self._safe_json(kwargs) or {},
            error=error,
            created_at=datetime.fromisoformat(created_at) if created_at else now,
            updated_at=now,
        )
        async with self._sf() as session:
            session.add(row)
            await session.commit()

    async def get(self, run_id):
        async with self._sf() as session:
            row = await session.get(RunRow, run_id)
            return self._row_to_dict(row) if row else None

    async def list_by_thread(self, thread_id, *, owner_id=None, limit=100):
        stmt = select(RunRow).where(RunRow.thread_id == thread_id)
        if owner_id is not None:
            stmt = stmt.where(RunRow.owner_id == owner_id)
        stmt = stmt.order_by(RunRow.created_at.desc()).limit(limit)
        async with self._sf() as session:
            result = await session.execute(stmt)
            return [self._row_to_dict(r) for r in result.scalars()]

    async def update_status(self, run_id, status, *, error=None):
        values: dict[str, Any] = {"status": status, "updated_at": datetime.now(UTC)}
        if error is not None:
            values["error"] = error
        async with self._sf() as session:
            await session.execute(update(RunRow).where(RunRow.run_id == run_id).values(**values))
            await session.commit()

    async def delete(self, run_id):
        async with self._sf() as session:
            row = await session.get(RunRow, run_id)
            if row is not None:
                await session.delete(row)
                await session.commit()

    async def list_pending(self, *, before=None):
        now = before or datetime.now(UTC).isoformat()
        stmt = select(RunRow).where(RunRow.status == "pending", RunRow.created_at <= now).order_by(RunRow.created_at.asc())
        async with self._sf() as session:
            result = await session.execute(stmt)
            return [self._row_to_dict(r) for r in result.scalars()]

    async def update_run_completion(
        self,
        run_id: str,
        *,
        status: str,
        total_input_tokens: int = 0,
        total_output_tokens: int = 0,
        total_tokens: int = 0,
        llm_call_count: int = 0,
        lead_agent_tokens: int = 0,
        subagent_tokens: int = 0,
        middleware_tokens: int = 0,
        message_count: int = 0,
        last_ai_message: str | None = None,
        first_human_message: str | None = None,
        error: str | None = None,
    ) -> None:
        """Update status + token usage + convenience fields on run completion."""
        values: dict[str, Any] = {
            "status": status,
            "total_input_tokens": total_input_tokens,
            "total_output_tokens": total_output_tokens,
            "total_tokens": total_tokens,
            "llm_call_count": llm_call_count,
            "lead_agent_tokens": lead_agent_tokens,
            "subagent_tokens": subagent_tokens,
            "middleware_tokens": middleware_tokens,
            "message_count": message_count,
            "updated_at": datetime.now(UTC),
        }
        if last_ai_message is not None:
            values["last_ai_message"] = last_ai_message[:2000]
        if first_human_message is not None:
            values["first_human_message"] = first_human_message[:2000]
        if error is not None:
            values["error"] = error
        async with self._sf() as session:
            await session.execute(update(RunRow).where(RunRow.run_id == run_id).values(**values))
            await session.commit()