mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-13 10:55:59 +00:00
perf(runtime): index messages in MemoryRunEventStore to avoid O(n) scans (#3531)
list_messages re-scanned every event in the thread on each call (category filter + seq filter) — O(total events) per paginated request on the default run-events backend. Maintain a messages-only, seq-sorted projection of _events (shared dict refs, no copies) and locate the seq window with bisect: list_messages drops to O(log m + page) and count_messages to O(1). The index is kept in lockstep at every mutation site (put / put_batch via _put_one, delete_by_run, delete_by_thread). Externally observable behavior is unchanged — the full RunEventStore contract suite passes across memory/db/jsonl. Add a test covering pagination over non-contiguous message seqs (messages interleaved with trace events), including in-gap and exact-boundary cursors. Co-authored-by: ly-wang19 <ly-wang19@users.noreply.github.com> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -122,6 +122,26 @@ class TestListMessages:
|
||||
messages = await store.list_messages("t1", limit=3)
|
||||
assert [m["seq"] for m in messages] == [8, 9, 10]
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_pagination_with_interleaved_trace_events(self, store):
|
||||
# Messages and non-message events interleave, so message seqs are
|
||||
# non-contiguous (1, 3, 5, 7, 9). Seq-window pagination must still be
|
||||
# correct over the messages-only projection, including when the cursor
|
||||
# lands in a gap or exactly on a message seq (exclusive bound).
|
||||
for i in range(10):
|
||||
category = "message" if i % 2 == 0 else "trace"
|
||||
await store.put(thread_id="t1", run_id="r1", event_type="e", category=category, content=str(i))
|
||||
|
||||
assert [m["seq"] for m in await store.list_messages("t1")] == [1, 3, 5, 7, 9]
|
||||
# before_seq in a gap: seq < 6 -> [1, 3, 5], last 2
|
||||
assert [m["seq"] for m in await store.list_messages("t1", before_seq=6, limit=2)] == [3, 5]
|
||||
# before_seq on a message seq is exclusive: seq < 5 -> [1, 3]
|
||||
assert [m["seq"] for m in await store.list_messages("t1", before_seq=5, limit=5)] == [1, 3]
|
||||
# after_seq in a gap: seq > 4 -> [5, 7, 9], first 2
|
||||
assert [m["seq"] for m in await store.list_messages("t1", after_seq=4, limit=2)] == [5, 7]
|
||||
# after_seq on a message seq is exclusive: seq > 5 -> [7, 9]
|
||||
assert [m["seq"] for m in await store.list_messages("t1", after_seq=5, limit=5)] == [7, 9]
|
||||
|
||||
|
||||
# -- list_events --
|
||||
|
||||
|
||||
Reference in New Issue
Block a user