perf: use SQL aggregation for feedback stats and thread token usage

Replace Python-side counting in FeedbackRepository.aggregate_by_run with a single SELECT COUNT/SUM query. Add RunStore.aggregate_tokens_by_thread abstract method with SQL GROUP BY implementation in RunRepository and Python fallback in MemoryRunStore. Simplify the thread_token_usage endpoint to delegate to the new method, eliminating the limit=10000 truncation risk. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-21 15:36:48 +00:00 · 2026-04-06 11:20:34 +08:00
parent 332fb18b34
commit 0af0ae7fbb
5 changed files with 98 additions and 41 deletions
@@ -310,32 +310,5 @@ async def list_run_events(
 async def thread_token_usage(thread_id: str, request: Request) -> dict:
    """Thread-level token usage aggregation."""
    run_store = get_run_store(request)
-    runs = await run_store.list_by_thread(thread_id, limit=10000)
-    completed = [r for r in runs if r.get("status") in ("success", "error")]
-
-    total_tokens = sum(r.get("total_tokens", 0) for r in completed)
-    total_input = sum(r.get("total_input_tokens", 0) for r in completed)
-    total_output = sum(r.get("total_output_tokens", 0) for r in completed)
-
-    by_model: dict[str, dict] = {}
-    for r in completed:
-        model = r.get("model_name") or "unknown"
-        entry = by_model.setdefault(model, {"tokens": 0, "runs": 0})
-        entry["tokens"] += r.get("total_tokens", 0)
-        entry["runs"] += 1
-
-    by_caller = {
-        "lead_agent": sum(r.get("lead_agent_tokens", 0) for r in completed),
-        "subagent": sum(r.get("subagent_tokens", 0) for r in completed),
-        "middleware": sum(r.get("middleware_tokens", 0) for r in completed),
-    }
-
-    return {
-        "thread_id": thread_id,
-        "total_tokens": total_tokens,
-        "total_input_tokens": total_input,
-        "total_output_tokens": total_output,
-        "total_runs": len(completed),
-        "by_model": by_model,
-        "by_caller": by_caller,
-    }
+    agg = await run_store.aggregate_tokens_by_thread(thread_id)
+    return {"thread_id": thread_id, **agg}