mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-21 15:36:48 +00:00
fix(backend): stream DeerFlowClient AI text as token deltas (#1969)
DeerFlowClient.stream() subscribed to LangGraph stream_mode=["values",
"custom"] which only delivers full-state snapshots at graph-node
boundaries, so AI replies were dumped as a single messages-tuple event
per node instead of streaming token-by-token. `client.stream("hello")`
looked identical to `client.chat("hello")` — the bug reported in #1969.
Subscribe to "messages" mode as well, forward AIMessageChunk deltas as
messages-tuple events with delta semantics (consumers accumulate by id),
and dedup the values-snapshot path so it does not re-synthesize AI
text that was already streamed. Introduce a per-id usage_metadata
counter so the final AIMessage in the values snapshot and the final
"messages" chunk — which carry the same cumulative usage — are not
double-counted.
chat() now accumulates per-id deltas and returns the last message's
full accumulated text. Non-streaming mock sources (single event per id)
are a degenerate case of the same logic, keeping existing callers and
tests backward compatible.
Verified end-to-end against a real LLM: a 15-number count emits 35
messages-tuple events with BPE subword boundaries clearly visible
("eleven" -> "ele" / "ven", "twelve" -> "tw" / "elve"), 476ms across
the window, end-event usage matches the values-snapshot usage exactly
(not doubled). tests/test_client_live.py::TestLiveStreaming passes.
New unit tests:
- test_messages_mode_emits_token_deltas: 3 AIMessageChunks produce 3
delta events with correct content/id/usage, values-snapshot does not
duplicate, usage counted once.
- test_chat_accumulates_streamed_deltas: chat() rebuilds full text
from deltas.
- test_messages_mode_tool_message: ToolMessage delivered via messages
mode is not duplicated by the values-snapshot synthesis path.
The stream() docstring now documents why this client does not reuse
Gateway's run_agent() / StreamBridge pipeline (sync vs async, raw
LangChain objects vs serialized dicts, single caller vs HTTP fan-out).
Fixes #1969
This commit is contained in:
@@ -10,7 +10,7 @@ from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, ToolMessage # noqa: F401
|
||||
from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage, SystemMessage, ToolMessage # noqa: F401
|
||||
|
||||
from app.gateway.routers.mcp import McpConfigResponse
|
||||
from app.gateway.routers.memory import MemoryConfigResponse, MemoryStatusResponse
|
||||
@@ -225,7 +225,9 @@ class TestStream:
|
||||
|
||||
agent.stream.assert_called_once()
|
||||
call_kwargs = agent.stream.call_args.kwargs
|
||||
assert call_kwargs["stream_mode"] == ["values", "custom"]
|
||||
# ``messages`` enables token-level streaming of AI text deltas;
|
||||
# see DeerFlowClient.stream() docstring and GitHub issue #1969.
|
||||
assert call_kwargs["stream_mode"] == ["values", "messages", "custom"]
|
||||
|
||||
assert events[0].type == "custom"
|
||||
assert events[0].data == {"type": "task_started", "task_id": "task-1"}
|
||||
@@ -351,6 +353,123 @@ class TestStream:
|
||||
# Should not raise; end event proves it completed
|
||||
assert events[-1].type == "end"
|
||||
|
||||
def test_messages_mode_emits_token_deltas(self, client):
|
||||
"""stream() forwards LangGraph ``messages`` mode chunks as delta events.
|
||||
|
||||
Regression for bytedance/deer-flow#1969 — before the fix the client
|
||||
only subscribed to ``values`` mode, so LLM output was delivered as
|
||||
a single cumulative dump after each graph node finished instead of
|
||||
token-by-token deltas as the model generated them.
|
||||
"""
|
||||
# Three AI chunks sharing the same id, followed by a terminal
|
||||
# values snapshot with the fully assembled message — this matches
|
||||
# the shape LangGraph emits when ``stream_mode`` includes both
|
||||
# ``messages`` and ``values``.
|
||||
assembled = AIMessage(content="Hel lo world!", id="ai-1", usage_metadata={"input_tokens": 3, "output_tokens": 4, "total_tokens": 7})
|
||||
agent = MagicMock()
|
||||
agent.stream.return_value = iter(
|
||||
[
|
||||
("messages", (AIMessageChunk(content="Hel", id="ai-1"), {})),
|
||||
("messages", (AIMessageChunk(content=" lo ", id="ai-1"), {})),
|
||||
(
|
||||
"messages",
|
||||
(
|
||||
AIMessageChunk(
|
||||
content="world!",
|
||||
id="ai-1",
|
||||
usage_metadata={"input_tokens": 3, "output_tokens": 4, "total_tokens": 7},
|
||||
),
|
||||
{},
|
||||
),
|
||||
),
|
||||
("values", {"messages": [HumanMessage(content="hi", id="h-1"), assembled]}),
|
||||
]
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(client, "_ensure_agent"),
|
||||
patch.object(client, "_agent", agent),
|
||||
):
|
||||
events = list(client.stream("hi", thread_id="t-stream"))
|
||||
|
||||
# Three delta messages-tuple events, all with the same id, each
|
||||
# carrying only its own delta (not cumulative).
|
||||
ai_text_events = [e for e in events if e.type == "messages-tuple" and e.data.get("type") == "ai" and e.data.get("content")]
|
||||
assert [e.data["content"] for e in ai_text_events] == ["Hel", " lo ", "world!"]
|
||||
assert all(e.data["id"] == "ai-1" for e in ai_text_events)
|
||||
|
||||
# The values snapshot MUST NOT re-synthesize an AI text event for
|
||||
# the already-streamed id (otherwise consumers see duplicated text).
|
||||
assert len(ai_text_events) == 3
|
||||
|
||||
# Usage metadata attached only to the chunk that actually carried
|
||||
# it, and counted into cumulative usage exactly once (the values
|
||||
# snapshot's duplicate usage on the assembled AIMessage must not
|
||||
# be double-counted).
|
||||
events_with_usage = [e for e in ai_text_events if "usage_metadata" in e.data]
|
||||
assert len(events_with_usage) == 1
|
||||
assert events_with_usage[0].data["usage_metadata"] == {"input_tokens": 3, "output_tokens": 4, "total_tokens": 7}
|
||||
end_event = events[-1]
|
||||
assert end_event.type == "end"
|
||||
assert end_event.data["usage"] == {"input_tokens": 3, "output_tokens": 4, "total_tokens": 7}
|
||||
|
||||
# The values snapshot itself is still emitted.
|
||||
assert any(e.type == "values" for e in events)
|
||||
|
||||
# stream_mode includes ``messages`` — the whole point of this fix.
|
||||
call_kwargs = agent.stream.call_args.kwargs
|
||||
assert "messages" in call_kwargs["stream_mode"]
|
||||
|
||||
def test_chat_accumulates_streamed_deltas(self, client):
|
||||
"""chat() concatenates per-id deltas from messages mode."""
|
||||
agent = MagicMock()
|
||||
agent.stream.return_value = iter(
|
||||
[
|
||||
("messages", (AIMessageChunk(content="Hel", id="ai-1"), {})),
|
||||
("messages", (AIMessageChunk(content="lo ", id="ai-1"), {})),
|
||||
("messages", (AIMessageChunk(content="world!", id="ai-1"), {})),
|
||||
("values", {"messages": [HumanMessage(content="hi", id="h-1"), AIMessage(content="Hello world!", id="ai-1")]}),
|
||||
]
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(client, "_ensure_agent"),
|
||||
patch.object(client, "_agent", agent),
|
||||
):
|
||||
result = client.chat("hi", thread_id="t-chat-stream")
|
||||
|
||||
assert result == "Hello world!"
|
||||
|
||||
def test_messages_mode_tool_message(self, client):
|
||||
"""stream() forwards ToolMessage chunks from messages mode."""
|
||||
agent = MagicMock()
|
||||
agent.stream.return_value = iter(
|
||||
[
|
||||
(
|
||||
"messages",
|
||||
(
|
||||
ToolMessage(content="file.txt", id="tm-1", tool_call_id="tc-1", name="bash"),
|
||||
{},
|
||||
),
|
||||
),
|
||||
("values", {"messages": [HumanMessage(content="ls", id="h-1"), ToolMessage(content="file.txt", id="tm-1", tool_call_id="tc-1", name="bash")]}),
|
||||
]
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(client, "_ensure_agent"),
|
||||
patch.object(client, "_agent", agent),
|
||||
):
|
||||
events = list(client.stream("ls", thread_id="t-tool-stream"))
|
||||
|
||||
tool_events = [e for e in events if e.type == "messages-tuple" and e.data.get("type") == "tool"]
|
||||
# The tool result must be delivered exactly once (from messages
|
||||
# mode), not duplicated by the values-snapshot synthesis path.
|
||||
assert len(tool_events) == 1
|
||||
assert tool_events[0].data["content"] == "file.txt"
|
||||
assert tool_events[0].data["name"] == "bash"
|
||||
assert tool_events[0].data["tool_call_id"] == "tc-1"
|
||||
|
||||
def test_list_content_blocks(self, client):
|
||||
"""stream() handles AIMessage with list-of-blocks content."""
|
||||
ai = AIMessage(
|
||||
|
||||
Reference in New Issue
Block a user