perf: use SQL aggregation for feedback stats and thread token usage

Replace Python-side counting in FeedbackRepository.aggregate_by_run with a single SELECT COUNT/SUM query. Add RunStore.aggregate_tokens_by_thread abstract method with SQL GROUP BY implementation in RunRepository and Python fallback in MemoryRunStore. Simplify the thread_token_usage endpoint to delegate to the new method, eliminating the limit=10000 truncation risk. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-25 01:15:58 +00:00 · 2026-04-06 11:20:34 +08:00
parent 332fb18b34
commit 0af0ae7fbb
5 changed files with 98 additions and 41 deletions
@@ -8,7 +8,7 @@ from __future__ import annotations
 import uuid
 from datetime import UTC, datetime

-from sqlalchemy import select
+from sqlalchemy import case, func, select
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker

 from deerflow.persistence.models.feedback import FeedbackRow
@@ -82,13 +82,17 @@ class FeedbackRepository:
            return True

    async def aggregate_by_run(self, thread_id: str, run_id: str) -> dict:
-        """Aggregate feedback stats for a run."""
-        items = await self.list_by_run(thread_id, run_id, limit=10000)
-        positive = sum(1 for i in items if i["rating"] == 1)
-        negative = sum(1 for i in items if i["rating"] == -1)
-        return {
-            "run_id": run_id,
-            "total": len(items),
-            "positive": positive,
-            "negative": negative,
-        }
+        """Aggregate feedback stats for a run using database-side counting."""
+        stmt = select(
+            func.count().label("total"),
+            func.coalesce(func.sum(case((FeedbackRow.rating == 1, 1), else_=0)), 0).label("positive"),
+            func.coalesce(func.sum(case((FeedbackRow.rating == -1, 1), else_=0)), 0).label("negative"),
+        ).where(FeedbackRow.thread_id == thread_id, FeedbackRow.run_id == run_id)
+        async with self._sf() as session:
+            row = (await session.execute(stmt)).one()
+            return {
+                "run_id": run_id,
+                "total": row.total,
+                "positive": row.positive,
+                "negative": row.negative,
+            }