mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-23 00:16:48 +00:00
fix(runs): expose active progress counters (#3148)
* fix(runs): expose active progress counters * fix(runs): avoid delayed progress flush on completion * fix(runs): tighten progress snapshot semantics * fix(runs): preserve omitted progress fields * chore(runs): remove duplicate journal initialization
This commit is contained in:
@@ -714,6 +714,110 @@ class TestExternalUsageRecords:
|
||||
assert j._subagent_tokens == 0
|
||||
|
||||
|
||||
class TestProgressSnapshots:
|
||||
@pytest.mark.anyio
|
||||
async def test_on_llm_end_reports_progress_snapshot(self):
|
||||
snapshots: list[dict] = []
|
||||
|
||||
async def reporter(snapshot: dict) -> None:
|
||||
snapshots.append(snapshot)
|
||||
|
||||
store = MemoryRunEventStore()
|
||||
j = RunJournal(
|
||||
"r1",
|
||||
"t1",
|
||||
store,
|
||||
flush_threshold=100,
|
||||
progress_reporter=reporter,
|
||||
progress_flush_interval=0,
|
||||
)
|
||||
usage = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
|
||||
j.on_llm_end(_make_llm_response("Answer", usage=usage), run_id=uuid4(), parent_run_id=None, tags=["lead_agent"])
|
||||
await j.flush()
|
||||
|
||||
assert snapshots
|
||||
assert snapshots[-1]["total_tokens"] == 15
|
||||
assert snapshots[-1]["llm_call_count"] == 1
|
||||
assert snapshots[-1]["message_count"] == 1
|
||||
assert snapshots[-1]["last_ai_message"] == "Answer"
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_throttled_progress_flush_emits_trailing_snapshot(self):
|
||||
snapshots: list[dict] = []
|
||||
trailing_seen = asyncio.Event()
|
||||
|
||||
async def reporter(snapshot: dict) -> None:
|
||||
snapshots.append(snapshot)
|
||||
if snapshot["total_tokens"] == 45:
|
||||
trailing_seen.set()
|
||||
|
||||
store = MemoryRunEventStore()
|
||||
j = RunJournal(
|
||||
"r1",
|
||||
"t1",
|
||||
store,
|
||||
flush_threshold=100,
|
||||
progress_reporter=reporter,
|
||||
progress_flush_interval=0.01,
|
||||
)
|
||||
j.on_llm_end(
|
||||
_make_llm_response("First", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}),
|
||||
run_id=uuid4(),
|
||||
parent_run_id=None,
|
||||
tags=["lead_agent"],
|
||||
)
|
||||
j.on_llm_end(
|
||||
_make_llm_response("Second", usage={"input_tokens": 20, "output_tokens": 10, "total_tokens": 30}),
|
||||
run_id=uuid4(),
|
||||
parent_run_id=None,
|
||||
tags=["lead_agent"],
|
||||
)
|
||||
await asyncio.wait_for(trailing_seen.wait(), timeout=1.0)
|
||||
await j.flush()
|
||||
|
||||
assert len(snapshots) >= 2
|
||||
assert snapshots[-1]["total_tokens"] == 45
|
||||
assert snapshots[-1]["llm_call_count"] == 2
|
||||
assert snapshots[-1]["last_ai_message"] == "Second"
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_flush_cancels_delayed_progress_without_final_progress_write(self):
|
||||
snapshots: list[dict] = []
|
||||
|
||||
async def reporter(snapshot: dict) -> None:
|
||||
snapshots.append(snapshot)
|
||||
|
||||
store = MemoryRunEventStore()
|
||||
j = RunJournal(
|
||||
"r1",
|
||||
"t1",
|
||||
store,
|
||||
flush_threshold=100,
|
||||
progress_reporter=reporter,
|
||||
progress_flush_interval=10.0,
|
||||
)
|
||||
j.on_llm_end(
|
||||
_make_llm_response("First", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}),
|
||||
run_id=uuid4(),
|
||||
parent_run_id=None,
|
||||
tags=["lead_agent"],
|
||||
)
|
||||
await asyncio.sleep(0)
|
||||
assert snapshots[-1]["total_tokens"] == 15
|
||||
j.on_llm_end(
|
||||
_make_llm_response("Second", usage={"input_tokens": 20, "output_tokens": 10, "total_tokens": 30}),
|
||||
run_id=uuid4(),
|
||||
parent_run_id=None,
|
||||
tags=["lead_agent"],
|
||||
)
|
||||
|
||||
await asyncio.wait_for(j.flush(), timeout=0.2)
|
||||
|
||||
assert snapshots[-1]["total_tokens"] == 15
|
||||
assert snapshots[-1]["llm_call_count"] == 1
|
||||
assert snapshots[-1]["last_ai_message"] == "First"
|
||||
|
||||
|
||||
class TestChatModelStartHumanMessage:
|
||||
"""Tests for on_chat_model_start extracting the first human message."""
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ from sqlalchemy.dialects import postgresql
|
||||
|
||||
from deerflow.persistence.run import RunRepository
|
||||
from deerflow.runtime import RunManager, RunStatus
|
||||
from deerflow.runtime.runs.store.base import RunStore
|
||||
|
||||
|
||||
async def _make_repo(tmp_path):
|
||||
@@ -26,6 +27,42 @@ async def _cleanup():
|
||||
await close_engine()
|
||||
|
||||
|
||||
class _CustomRunStoreWithoutProgress(RunStore):
|
||||
async def put(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
async def get(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
async def list_by_thread(self, *args, **kwargs):
|
||||
return []
|
||||
|
||||
async def update_status(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
async def delete(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
async def update_model_name(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
async def update_run_completion(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
async def list_pending(self, *args, **kwargs):
|
||||
return []
|
||||
|
||||
async def aggregate_tokens_by_thread(self, *args, **kwargs):
|
||||
return {}
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_update_run_progress_defaults_to_noop_for_custom_store():
|
||||
store = _CustomRunStoreWithoutProgress()
|
||||
|
||||
await store.update_run_progress("r1", total_tokens=1)
|
||||
|
||||
|
||||
class TestRunRepository:
|
||||
@pytest.mark.anyio
|
||||
async def test_put_and_get(self, tmp_path):
|
||||
@@ -170,6 +207,69 @@ class TestRunRepository:
|
||||
assert row["total_tokens"] == 100
|
||||
await _cleanup()
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_update_run_progress_keeps_status_running(self, tmp_path):
|
||||
repo = await _make_repo(tmp_path)
|
||||
await repo.put("r1", thread_id="t1", status="running")
|
||||
await repo.update_run_progress(
|
||||
"r1",
|
||||
total_input_tokens=40,
|
||||
total_output_tokens=10,
|
||||
total_tokens=50,
|
||||
llm_call_count=1,
|
||||
message_count=2,
|
||||
last_ai_message="partial answer",
|
||||
)
|
||||
row = await repo.get("r1")
|
||||
assert row["status"] == "running"
|
||||
assert row["total_tokens"] == 50
|
||||
assert row["llm_call_count"] == 1
|
||||
assert row["message_count"] == 2
|
||||
assert row["last_ai_message"] == "partial answer"
|
||||
await _cleanup()
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_update_run_progress_preserves_omitted_fields(self, tmp_path):
|
||||
repo = await _make_repo(tmp_path)
|
||||
await repo.put("r1", thread_id="t1", status="running")
|
||||
await repo.update_run_progress(
|
||||
"r1",
|
||||
total_input_tokens=40,
|
||||
total_output_tokens=10,
|
||||
total_tokens=50,
|
||||
llm_call_count=1,
|
||||
lead_agent_tokens=30,
|
||||
subagent_tokens=20,
|
||||
message_count=2,
|
||||
)
|
||||
|
||||
await repo.update_run_progress("r1", total_tokens=60, last_ai_message="updated")
|
||||
|
||||
row = await repo.get("r1")
|
||||
assert row["total_input_tokens"] == 40
|
||||
assert row["total_output_tokens"] == 10
|
||||
assert row["total_tokens"] == 60
|
||||
assert row["llm_call_count"] == 1
|
||||
assert row["lead_agent_tokens"] == 30
|
||||
assert row["subagent_tokens"] == 20
|
||||
assert row["message_count"] == 2
|
||||
assert row["last_ai_message"] == "updated"
|
||||
await _cleanup()
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_update_run_progress_skips_terminal_runs(self, tmp_path):
|
||||
repo = await _make_repo(tmp_path)
|
||||
await repo.put("r1", thread_id="t1", status="running")
|
||||
await repo.update_run_completion("r1", status="success", total_tokens=100, llm_call_count=1)
|
||||
|
||||
await repo.update_run_progress("r1", total_tokens=200, llm_call_count=2)
|
||||
|
||||
row = await repo.get("r1")
|
||||
assert row["status"] == "success"
|
||||
assert row["total_tokens"] == 100
|
||||
assert row["llm_call_count"] == 1
|
||||
await _cleanup()
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_aggregate_tokens_by_thread_counts_completed_runs_only(self, tmp_path):
|
||||
repo = await _make_repo(tmp_path)
|
||||
@@ -225,6 +325,28 @@ class TestRunRepository:
|
||||
}
|
||||
await _cleanup()
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_aggregate_tokens_by_thread_can_include_active_runs(self, tmp_path):
|
||||
repo = await _make_repo(tmp_path)
|
||||
await repo.put("success-run", thread_id="t1", status="running")
|
||||
await repo.update_run_completion("success-run", status="success", total_tokens=100, lead_agent_tokens=100)
|
||||
await repo.put("running-run", thread_id="t1", status="running")
|
||||
await repo.update_run_progress("running-run", total_tokens=25, lead_agent_tokens=20, subagent_tokens=5)
|
||||
|
||||
without_active = await repo.aggregate_tokens_by_thread("t1")
|
||||
with_active = await repo.aggregate_tokens_by_thread("t1", include_active=True)
|
||||
|
||||
assert without_active["total_tokens"] == 100
|
||||
assert without_active["total_runs"] == 1
|
||||
assert with_active["total_tokens"] == 125
|
||||
assert with_active["total_runs"] == 2
|
||||
assert with_active["by_caller"] == {
|
||||
"lead_agent": 120,
|
||||
"subagent": 5,
|
||||
"middleware": 0,
|
||||
}
|
||||
await _cleanup()
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_list_by_thread_ordered_desc(self, tmp_path):
|
||||
"""list_by_thread returns newest first."""
|
||||
|
||||
@@ -53,3 +53,30 @@ def test_thread_token_usage_returns_stable_shape():
|
||||
},
|
||||
}
|
||||
run_store.aggregate_tokens_by_thread.assert_awaited_once_with("thread-1")
|
||||
|
||||
|
||||
def test_thread_token_usage_can_include_active_runs():
|
||||
run_store = MagicMock()
|
||||
run_store.aggregate_tokens_by_thread = AsyncMock(
|
||||
return_value={
|
||||
"total_tokens": 175,
|
||||
"total_input_tokens": 120,
|
||||
"total_output_tokens": 55,
|
||||
"total_runs": 3,
|
||||
"by_model": {"unknown": {"tokens": 175, "runs": 3}},
|
||||
"by_caller": {
|
||||
"lead_agent": 145,
|
||||
"subagent": 25,
|
||||
"middleware": 5,
|
||||
},
|
||||
},
|
||||
)
|
||||
app = _make_app(run_store)
|
||||
|
||||
with TestClient(app) as client:
|
||||
response = client.get("/api/threads/thread-1/token-usage?include_active=true")
|
||||
|
||||
assert response.status_code == 200
|
||||
assert response.json()["total_tokens"] == 175
|
||||
assert response.json()["total_runs"] == 3
|
||||
run_store.aggregate_tokens_by_thread.assert_awaited_once_with("thread-1", include_active=True)
|
||||
|
||||
Reference in New Issue
Block a user