Compare commits

...

5 Commits

Author SHA1 Message Date
Willem Jiang 7752e74e2b update the code with review comments 2026-05-16 17:07:53 +08:00
Willem Jiang ba99a23814 Merge branch 'main' into fix-2804 2026-05-16 09:27:40 +08:00
Willem Jiang 2b2742c034 Merge branch 'main' into fix-2804 2026-05-12 15:53:28 +08:00
copilot-swe-agent[bot] 6ffe267d20 fix: use AIMessage content for summarization response parsing
Agent-Logs-Url: https://github.com/bytedance/deer-flow/sessions/0b34cccd-f69a-4f68-bfa6-9a41256b63b5

Co-authored-by: WillemJiang <219644+WillemJiang@users.noreply.github.com>
2026-05-11 14:48:45 +00:00
Willem Jiang c995c3a394 fix: hide summarization LLM output from frontend during context compression
Fixes #2804

  The useStream hook tracks messages-tuple mode which captures ALL LLM
  events, including the internal summarization LLM call. This caused the
  English summary text to briefly appear as an AI message in the UI before
  the REMOVE_ALL_MESSAGES state update replaced it.

  Fix by overriding _create_summary/_acreate_summary to pass callbacks=[]
  when invoking the summary model, preventing LangGraph from forwarding
  the internal LLM events to the frontend stream. Also add
  hide_from_ui=True to the summary HumanMessage's additional_kwargs as a
  belt-and-suspenders safety net alongside the existing name=summary
  check.
2026-05-11 22:19:47 +08:00
2 changed files with 153 additions and 2 deletions
@@ -10,6 +10,7 @@ from typing import Any, Protocol, override, runtime_checkable
from langchain.agents import AgentState
from langchain.agents.middleware import SummarizationMiddleware
from langchain_core.messages import AIMessage, AnyMessage, HumanMessage, RemoveMessage, ToolMessage
from langchain_core.messages.utils import get_buffer_string
from langgraph.config import get_config
from langgraph.graph.message import REMOVE_ALL_MESSAGES
from langgraph.runtime import Runtime
@@ -175,12 +176,84 @@ class DeerFlowSummarizationMiddleware(SummarizationMiddleware):
]
}
@override
def _create_summary(self, messages_to_summarize: list[AnyMessage]) -> str:
"""Generate summary without emitting streaming events to the client.
Suppresses callbacks to prevent the internal summarization LLM call from
producing visible AI message chunks in the frontend's ``messages-tuple``
stream (issue #2804).
"""
if not messages_to_summarize:
return "No previous conversation history."
trimmed = self._trim_messages_for_summary(messages_to_summarize)
if not trimmed:
return "Previous conversation was too long to summarize."
formatted = get_buffer_string(trimmed)
try:
response = self.model.with_config(callbacks=[]).invoke(
self.summary_prompt.format(messages=formatted).rstrip(),
config={
"metadata": {"lc_source": "summarization"},
"callbacks": [],
},
)
return self._extract_summary_text(response)
except Exception as e:
return f"Error generating summary: {e!s}"
@override
async def _acreate_summary(self, messages_to_summarize: list[AnyMessage]) -> str:
"""Generate summary without emitting streaming events to the client.
Suppresses callbacks to prevent the internal summarization LLM call from
producing visible AI message chunks in the frontend's ``messages-tuple``
stream (issue #2804).
"""
if not messages_to_summarize:
return "No previous conversation history."
trimmed = self._trim_messages_for_summary(messages_to_summarize)
if not trimmed:
return "Previous conversation was too long to summarize."
formatted = get_buffer_string(trimmed)
try:
response = await self.model.with_config(callbacks=[]).ainvoke(
self.summary_prompt.format(messages=formatted).rstrip(),
config={
"metadata": {"lc_source": "summarization"},
"callbacks": [],
},
)
return self._extract_summary_text(response)
except Exception as e:
return f"Error generating summary: {e!s}"
def _extract_summary_text(self, response: Any) -> str:
# Prefer .text which normalizes list content blocks (e.g. [{"type": "text", "text": "..."}]).
# Fall back to .content for non-LangChain responses.
summary_text = getattr(response, "text", None)
if summary_text is None:
summary_text = getattr(response, "content", "")
return summary_text.strip() if isinstance(summary_text, str) else str(summary_text).strip()
@override
def _build_new_messages(self, summary: str) -> list[HumanMessage]:
"""Override the base implementation to let the human message with the special name 'summary'.
And this message will be ignored to display in the frontend, but still can be used as context for the model.
"""
return [HumanMessage(content=f"Here is a summary of the conversation to date:\n\n{summary}", name="summary")]
return [
HumanMessage(
content=f"Here is a summary of the conversation to date:\n\n{summary}",
name="summary",
additional_kwargs={"hide_from_ui": True},
)
]
def _preserve_dynamic_context_reminders(
self,
+79 -1
View File
@@ -56,7 +56,8 @@ def _middleware(
preserve_recent_skill_tokens_per_skill: int = 0,
) -> DeerFlowSummarizationMiddleware:
model = MagicMock()
model.invoke.return_value = SimpleNamespace(text="compressed summary")
model.invoke.return_value = AIMessage(content="compressed summary")
model.with_config.return_value.invoke.return_value = AIMessage(content="compressed summary")
return DeerFlowSummarizationMiddleware(
model=model,
trigger=trigger,
@@ -642,6 +643,69 @@ def test_memory_flush_hook_preserves_agent_scoped_memory(monkeypatch: pytest.Mon
assert queue.add_nowait.call_args.kwargs["agent_name"] == "research-agent"
# ---------------------------------------------------------------------------
# Issue #2804: summary text must not leak to the frontend via streaming
# ---------------------------------------------------------------------------
def test_build_new_messages_sets_hide_from_ui() -> None:
"""The summary HumanMessage must carry hide_from_ui so the frontend filters it."""
middleware = _middleware()
messages = middleware._build_new_messages("test summary")
assert len(messages) == 1
msg = messages[0]
assert msg.name == "summary"
assert msg.additional_kwargs.get("hide_from_ui") is True
assert "test summary" in msg.content
def test_create_summary_suppresses_callbacks() -> None:
"""_create_summary must bind callbacks=[] on the model AND pass callbacks=[]
in the invoke config to suppress inherited LangGraph stream callbacks."""
middleware = _middleware()
middleware._create_summary(_messages())
middleware.model.with_config.assert_called_once_with(callbacks=[])
bound = middleware.model.with_config.return_value
bound.invoke.assert_called_once()
call_config = bound.invoke.call_args.kwargs.get("config") or bound.invoke.call_args[1].get("config")
assert call_config is not None
assert call_config.get("callbacks") == []
assert call_config.get("metadata", {}).get("lc_source") == "summarization"
@pytest.mark.anyio
async def test_acreate_summary_suppresses_callbacks() -> None:
"""_acreate_summary must bind callbacks=[] on the model AND pass callbacks=[]
in the ainvoke config to suppress inherited LangGraph stream callbacks."""
middleware = _middleware()
middleware.model.with_config.return_value.ainvoke = mock.AsyncMock(return_value=AIMessage(content="async summary"))
await middleware._acreate_summary(_messages())
middleware.model.with_config.assert_called_once_with(callbacks=[])
bound = middleware.model.with_config.return_value
bound.ainvoke.assert_called_once()
call_config = bound.ainvoke.call_args.kwargs.get("config") or bound.ainvoke.call_args[1].get("config")
assert call_config is not None
assert call_config.get("callbacks") == []
assert call_config.get("metadata", {}).get("lc_source") == "summarization"
def test_before_model_summary_message_has_hide_from_ui() -> None:
"""End-to-end: the emitted state update contains a summary message with hide_from_ui."""
middleware = _middleware()
result = middleware.before_model({"messages": _messages()}, _runtime())
emitted = result["messages"]
summary_msg = emitted[1]
assert summary_msg.name == "summary"
assert summary_msg.additional_kwargs.get("hide_from_ui") is True
def test_memory_flush_hook_passes_runtime_user_id(monkeypatch: pytest.MonkeyPatch) -> None:
queue = MagicMock()
monkeypatch.setattr("deerflow.agents.memory.summarization_hook.get_memory_config", lambda: MemoryConfig(enabled=True))
@@ -659,3 +723,17 @@ def test_memory_flush_hook_passes_runtime_user_id(monkeypatch: pytest.MonkeyPatc
queue.add_nowait.assert_called_once()
assert queue.add_nowait.call_args.kwargs["user_id"] == "alice"
def test_extract_summary_text_normalizes_list_content_blocks() -> None:
"""AIMessage.content can be a list of content blocks; _extract_summary_text
must normalize to plain text via the .text property instead of producing
a Python repr like [{'type': 'text', 'text': 'summary'}]."""
middleware = _middleware()
response = AIMessage(content=[{"type": "text", "text": "A summary of the chat."}])
assert middleware._extract_summary_text(response) == "A summary of the chat."
# Plain string content still works
response_str = AIMessage(content="Plain summary")
assert middleware._extract_summary_text(response_str) == "Plain summary"