fix: use backend thread token usage for header total (#2800)

* fix: use backend thread token usage for header total * Refactor thread token usage fetch
2026-05-24 17:06:00 +00:00 · 2026-05-09 19:40:32 +08:00
parent 881ff71252
commit 417416087b
16 changed files with 540 additions and 35 deletions
@@ -166,6 +166,61 @@ class TestRunRepository:
        assert row["total_tokens"] == 100
        await _cleanup()

+    @pytest.mark.anyio
+    async def test_aggregate_tokens_by_thread_counts_completed_runs_only(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.put("success-run", thread_id="t1", status="running")
+        await repo.update_run_completion(
+            "success-run",
+            status="success",
+            total_input_tokens=70,
+            total_output_tokens=30,
+            total_tokens=100,
+            lead_agent_tokens=80,
+            subagent_tokens=15,
+            middleware_tokens=5,
+        )
+        await repo.put("error-run", thread_id="t1", status="running")
+        await repo.update_run_completion(
+            "error-run",
+            status="error",
+            total_input_tokens=20,
+            total_output_tokens=30,
+            total_tokens=50,
+            lead_agent_tokens=40,
+            subagent_tokens=10,
+        )
+        await repo.put("running-run", thread_id="t1", status="running")
+        await repo.update_run_completion(
+            "running-run",
+            status="running",
+            total_input_tokens=900,
+            total_output_tokens=99,
+            total_tokens=999,
+            lead_agent_tokens=999,
+        )
+        await repo.put("other-thread-run", thread_id="t2", status="running")
+        await repo.update_run_completion(
+            "other-thread-run",
+            status="success",
+            total_tokens=888,
+            lead_agent_tokens=888,
+        )
+
+        agg = await repo.aggregate_tokens_by_thread("t1")
+
+        assert agg["total_tokens"] == 150
+        assert agg["total_input_tokens"] == 90
+        assert agg["total_output_tokens"] == 60
+        assert agg["total_runs"] == 2
+        assert agg["by_model"] == {"unknown": {"tokens": 150, "runs": 2}}
+        assert agg["by_caller"] == {
+            "lead_agent": 120,
+            "subagent": 25,
+            "middleware": 5,
+        }
+        await _cleanup()
+
    @pytest.mark.anyio
    async def test_list_by_thread_ordered_desc(self, tmp_path):
        """list_by_thread returns newest first."""
@@ -0,0 +1,55 @@
+"""Tests for thread-level token usage aggregation API."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+from _router_auth_helpers import make_authed_test_app
+from fastapi.testclient import TestClient
+
+from app.gateway.routers import thread_runs
+
+
+def _make_app(run_store: MagicMock):
+    app = make_authed_test_app()
+    app.include_router(thread_runs.router)
+    app.state.run_store = run_store
+    return app
+
+
+def test_thread_token_usage_returns_stable_shape():
+    run_store = MagicMock()
+    run_store.aggregate_tokens_by_thread = AsyncMock(
+        return_value={
+            "total_tokens": 150,
+            "total_input_tokens": 90,
+            "total_output_tokens": 60,
+            "total_runs": 2,
+            "by_model": {"unknown": {"tokens": 150, "runs": 2}},
+            "by_caller": {
+                "lead_agent": 120,
+                "subagent": 25,
+                "middleware": 5,
+            },
+        },
+    )
+    app = _make_app(run_store)
+
+    with TestClient(app) as client:
+        response = client.get("/api/threads/thread-1/token-usage")
+
+    assert response.status_code == 200
+    assert response.json() == {
+        "thread_id": "thread-1",
+        "total_tokens": 150,
+        "total_input_tokens": 90,
+        "total_output_tokens": 60,
+        "total_runs": 2,
+        "by_model": {"unknown": {"tokens": 150, "runs": 2}},
+        "by_caller": {
+            "lead_agent": 120,
+            "subagent": 25,
+            "middleware": 5,
+        },
+    }
+    run_store.aggregate_tokens_by_thread.assert_awaited_once_with("thread-1")