refactor(journal): fix flush, token tracking, and consolidate tests

RunJournal fixes:
- _flush_sync: retain events in buffer when no event loop instead of
  dropping them; worker's finally block flushes via async flush().
- on_llm_end: add tool_calls filter and caller=="lead_agent" guard for
  ai_message events; mark message IDs for dedup with record_llm_usage.
- worker.py: persist completion data (tokens, message count) to RunStore
  in finally block.

Model factory:
- Auto-inject stream_usage=True for BaseChatOpenAI subclasses with
  custom api_base, so usage_metadata is populated in streaming responses.

Test consolidation:
- Delete test_phase2b_integration.py (redundant with existing tests).
- Move DB-backed lifecycle test into test_run_journal.py.
- Add tests for stream_usage injection in test_model_factory.py.
- Clean up executor/task_tool dead journal references.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
rayhpeng
2026-04-03 17:26:11 +08:00
parent e5b01d7e74
commit b92ddafd4b
7 changed files with 360 additions and 451 deletions
+78
View File
@@ -593,6 +593,84 @@ def test_codex_provider_strips_unsupported_max_tokens(monkeypatch):
assert "max_tokens" not in FakeChatModel.captured_kwargs
# ---------------------------------------------------------------------------
# stream_usage injection
# ---------------------------------------------------------------------------
class _FakeWithStreamUsage(FakeChatModel):
"""Fake model that declares stream_usage in model_fields (like BaseChatOpenAI)."""
stream_usage: bool | None = None
def test_stream_usage_injected_for_openai_compatible_model(monkeypatch):
"""Factory should set stream_usage=True for models with stream_usage field."""
cfg = _make_app_config([_make_model("deepseek", use="langchain_deepseek:ChatDeepSeek")])
_patch_factory(monkeypatch, cfg, model_class=_FakeWithStreamUsage)
captured: dict = {}
class CapturingModel(_FakeWithStreamUsage):
def __init__(self, **kwargs):
captured.update(kwargs)
BaseChatModel.__init__(self, **kwargs)
monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
factory_module.create_chat_model(name="deepseek")
assert captured.get("stream_usage") is True
def test_stream_usage_not_injected_for_non_openai_model(monkeypatch):
"""Factory should NOT inject stream_usage for models without the field."""
cfg = _make_app_config([_make_model("claude", use="langchain_anthropic:ChatAnthropic")])
_patch_factory(monkeypatch, cfg)
captured: dict = {}
class CapturingModel(FakeChatModel):
def __init__(self, **kwargs):
captured.update(kwargs)
BaseChatModel.__init__(self, **kwargs)
monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
factory_module.create_chat_model(name="claude")
assert "stream_usage" not in captured
def test_stream_usage_not_overridden_when_explicitly_set_in_config(monkeypatch):
"""If config dumps stream_usage=False, factory should respect it."""
cfg = _make_app_config([_make_model("deepseek", use="langchain_deepseek:ChatDeepSeek")])
_patch_factory(monkeypatch, cfg, model_class=_FakeWithStreamUsage)
captured: dict = {}
class CapturingModel(_FakeWithStreamUsage):
def __init__(self, **kwargs):
captured.update(kwargs)
BaseChatModel.__init__(self, **kwargs)
monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
# Simulate config having stream_usage explicitly set by patching model_dump
original_get_model_config = cfg.get_model_config
def patched_get_model_config(name):
mc = original_get_model_config(name)
mc.stream_usage = False # type: ignore[attr-defined]
return mc
monkeypatch.setattr(cfg, "get_model_config", patched_get_model_config)
factory_module.create_chat_model(name="deepseek")
assert captured.get("stream_usage") is False
def test_openai_responses_api_settings_are_passed_to_chatopenai(monkeypatch):
model = ModelConfig(
name="gpt-5-responses",