fix: use backend thread token usage for header total (#2800)

* fix: use backend thread token usage for header total

* Refactor thread token usage fetch
This commit is contained in:
YuJitang
2026-05-09 19:40:32 +08:00
committed by GitHub
parent 881ff71252
commit 417416087b
16 changed files with 540 additions and 35 deletions
+55
View File
@@ -166,6 +166,61 @@ class TestRunRepository:
assert row["total_tokens"] == 100
await _cleanup()
@pytest.mark.anyio
async def test_aggregate_tokens_by_thread_counts_completed_runs_only(self, tmp_path):
repo = await _make_repo(tmp_path)
await repo.put("success-run", thread_id="t1", status="running")
await repo.update_run_completion(
"success-run",
status="success",
total_input_tokens=70,
total_output_tokens=30,
total_tokens=100,
lead_agent_tokens=80,
subagent_tokens=15,
middleware_tokens=5,
)
await repo.put("error-run", thread_id="t1", status="running")
await repo.update_run_completion(
"error-run",
status="error",
total_input_tokens=20,
total_output_tokens=30,
total_tokens=50,
lead_agent_tokens=40,
subagent_tokens=10,
)
await repo.put("running-run", thread_id="t1", status="running")
await repo.update_run_completion(
"running-run",
status="running",
total_input_tokens=900,
total_output_tokens=99,
total_tokens=999,
lead_agent_tokens=999,
)
await repo.put("other-thread-run", thread_id="t2", status="running")
await repo.update_run_completion(
"other-thread-run",
status="success",
total_tokens=888,
lead_agent_tokens=888,
)
agg = await repo.aggregate_tokens_by_thread("t1")
assert agg["total_tokens"] == 150
assert agg["total_input_tokens"] == 90
assert agg["total_output_tokens"] == 60
assert agg["total_runs"] == 2
assert agg["by_model"] == {"unknown": {"tokens": 150, "runs": 2}}
assert agg["by_caller"] == {
"lead_agent": 120,
"subagent": 25,
"middleware": 5,
}
await _cleanup()
@pytest.mark.anyio
async def test_list_by_thread_ordered_desc(self, tmp_path):
"""list_by_thread returns newest first."""
+55
View File
@@ -0,0 +1,55 @@
"""Tests for thread-level token usage aggregation API."""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock
from _router_auth_helpers import make_authed_test_app
from fastapi.testclient import TestClient
from app.gateway.routers import thread_runs
def _make_app(run_store: MagicMock):
app = make_authed_test_app()
app.include_router(thread_runs.router)
app.state.run_store = run_store
return app
def test_thread_token_usage_returns_stable_shape():
run_store = MagicMock()
run_store.aggregate_tokens_by_thread = AsyncMock(
return_value={
"total_tokens": 150,
"total_input_tokens": 90,
"total_output_tokens": 60,
"total_runs": 2,
"by_model": {"unknown": {"tokens": 150, "runs": 2}},
"by_caller": {
"lead_agent": 120,
"subagent": 25,
"middleware": 5,
},
},
)
app = _make_app(run_store)
with TestClient(app) as client:
response = client.get("/api/threads/thread-1/token-usage")
assert response.status_code == 200
assert response.json() == {
"thread_id": "thread-1",
"total_tokens": 150,
"total_input_tokens": 90,
"total_output_tokens": 60,
"total_runs": 2,
"by_model": {"unknown": {"tokens": 150, "runs": 2}},
"by_caller": {
"lead_agent": 120,
"subagent": 25,
"middleware": 5,
},
}
run_store.aggregate_tokens_by_thread.assert_awaited_once_with("thread-1")