test+config: comprehensive Phase 2 test coverage + deprecate checkpointer config

- config.example.yaml: deprecate standalone checkpointer section, activate
  unified database:sqlite as default (drives both checkpointer + app data)
- New: test_thread_meta_repo.py (14 tests) — full ThreadMetaRepository coverage
  including check_access owner logic, list_by_owner pagination
- Extended test_run_repository.py (+4 tests) — completion preserves fields,
  list ordering desc, limit, owner_none returns all
- Extended test_run_journal.py (+8 tests) — on_chain_error, track_tokens=false,
  middleware no ai_message, unknown caller tokens, convenience fields,
  tool_error, non-summarization custom event
- Extended test_run_event_store.py (+7 tests) — DB batch seq continuity,
  make_run_event_store factory (memory/db/jsonl/fallback/unknown)
- Extended test_phase2b_integration.py (+4 tests) — create_or_reject persists,
  follow-up metadata, summarization in history, full DB-backed lifecycle
- Fixed DB integration test to use proper fake objects (not MagicMock)
  for JSON-serializable metadata
- 157 total Phase 2 tests pass, zero regressions

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
rayhpeng
2026-04-02 19:36:15 +08:00
parent 5cb0471af5
commit 2e4cb5c6a9
6 changed files with 510 additions and 39 deletions
+138 -13
View File
@@ -5,7 +5,6 @@ is correctly written to both RunStore and RunEventStore.
"""
import asyncio
from unittest.mock import MagicMock
from uuid import uuid4
import pytest
@@ -15,19 +14,30 @@ from deerflow.runtime.journal import RunJournal
from deerflow.runtime.runs.store.memory import MemoryRunStore
class _FakeMessage:
def __init__(self, content, usage):
self.content = content
self.tool_calls = []
self.response_metadata = {"model_name": "test-model"}
self.usage_metadata = usage
self.id = "test-msg-id"
def model_dump(self):
return {"type": "ai", "content": self.content, "id": self.id, "tool_calls": [], "usage_metadata": self.usage_metadata, "response_metadata": self.response_metadata}
class _FakeGeneration:
def __init__(self, message):
self.message = message
class _FakeLLMResult:
def __init__(self, content, usage):
self.generations = [[_FakeGeneration(_FakeMessage(content, usage))]]
def _make_llm_response(content="Hello", usage=None):
msg = MagicMock()
msg.content = content
msg.tool_calls = []
msg.response_metadata = {"model_name": "test-model"}
msg.usage_metadata = usage
gen = MagicMock()
gen.message = msg
response = MagicMock()
response.generations = [[gen]]
return response
return _FakeLLMResult(content, usage)
class TestRunLifecycle:
@@ -152,3 +162,118 @@ class TestRunLifecycle:
await mgr.set_status(record.run_id, RunStatus.running)
row = await run_store.get(record.run_id)
assert row["status"] == "running"
@pytest.mark.anyio
async def test_runmanager_create_or_reject_persists(self):
"""create_or_reject also persists to store."""
from deerflow.runtime.runs.manager import RunManager
run_store = MemoryRunStore()
mgr = RunManager(store=run_store)
record = await mgr.create_or_reject("t1", "lead_agent", metadata={"key": "val"})
row = await run_store.get(record.run_id)
assert row is not None
assert row["status"] == "pending"
assert row["metadata"] == {"key": "val"}
@pytest.mark.anyio
async def test_follow_up_metadata_in_messages(self):
"""human_message metadata carries follow_up_to_run_id."""
event_store = MemoryRunEventStore()
# Run 1
await event_store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="Q1")
await event_store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content="A1")
# Run 2 (follow-up)
await event_store.put(
thread_id="t1",
run_id="r2",
event_type="human_message",
category="message",
content="Tell me more",
metadata={"follow_up_to_run_id": "r1"},
)
messages = await event_store.list_messages("t1")
assert len(messages) == 3
assert messages[2]["metadata"]["follow_up_to_run_id"] == "r1"
@pytest.mark.anyio
async def test_summarization_in_history(self):
"""summary message appears correctly in message history."""
event_store = MemoryRunEventStore()
await event_store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="Q1")
await event_store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content="A1")
await event_store.put(thread_id="t1", run_id="r2", event_type="summary", category="message", content="Previous conversation summarized.", metadata={"replaced_count": 2})
await event_store.put(thread_id="t1", run_id="r2", event_type="human_message", category="message", content="Q2")
await event_store.put(thread_id="t1", run_id="r2", event_type="ai_message", category="message", content="A2")
messages = await event_store.list_messages("t1")
assert len(messages) == 5
assert messages[2]["event_type"] == "summary"
assert messages[2]["metadata"]["replaced_count"] == 2
@pytest.mark.anyio
async def test_db_backed_run_lifecycle(self, tmp_path):
"""Full lifecycle with SQLite-backed RunRepository + DbRunEventStore."""
from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
from deerflow.persistence.repositories.run_repo import RunRepository
from deerflow.runtime.events.store.db import DbRunEventStore
from deerflow.runtime.runs.manager import RunManager
url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
sf = get_session_factory()
run_store = RunRepository(sf)
event_store = DbRunEventStore(sf)
mgr = RunManager(store=run_store)
# Create run
record = await mgr.create("t1", "lead_agent")
run_id = record.run_id
# Write human_message
await event_store.put(thread_id="t1", run_id=run_id, event_type="human_message", category="message", content="Hello DB")
# Simulate journal
on_complete_data = {}
journal = RunJournal(run_id, "t1", event_store, on_complete=lambda **d: on_complete_data.update(d), flush_threshold=100)
journal.set_first_human_message("Hello DB")
journal.on_chain_start({}, {}, run_id=uuid4(), parent_run_id=None)
llm_rid = uuid4()
journal.on_llm_start({"name": "test"}, [], run_id=llm_rid, tags=["lead_agent"])
journal.on_llm_end(_make_llm_response("DB response", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}), run_id=llm_rid, tags=["lead_agent"])
journal.on_chain_end({}, run_id=uuid4(), parent_run_id=None)
await journal.flush()
await asyncio.sleep(0.05)
# Verify run persisted
row = await run_store.get(run_id)
assert row is not None
assert row["status"] == "pending" # RunManager set it, journal doesn't update status
# Update completion
await run_store.update_run_completion(run_id, status="success", **on_complete_data)
row = await run_store.get(run_id)
assert row["status"] == "success"
assert row["total_tokens"] == 15
# Verify messages from DB
messages = await event_store.list_messages("t1")
assert len(messages) == 2
assert messages[0]["event_type"] == "human_message"
assert messages[1]["event_type"] == "ai_message"
# Verify events from DB
events = await event_store.list_events("t1", run_id)
event_types = {e["event_type"] for e in events}
assert "run_start" in event_types
assert "llm_end" in event_types
assert "run_end" in event_types
await close_engine()