mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-23 16:35:59 +00:00
fix(middleware): handle list content blocks in summarization text extraction
_explicitly extract text blocks from AIMessage.content lists instead of
falling through to str() repr when .text is unavailable, preventing
garbage summary strings like "[{'type': 'text', ...}]" from reaching the
LLM. Adds parametrized regression tests for string, multi-block,
mixed reasoning/text, empty, and non-AIMessage responses.
This commit is contained in:
@@ -236,10 +236,19 @@ class DeerFlowSummarizationMiddleware(SummarizationMiddleware):
|
|||||||
|
|
||||||
def _extract_summary_text(self, response: Any) -> str:
|
def _extract_summary_text(self, response: Any) -> str:
|
||||||
# Prefer .text which normalizes list content blocks (e.g. [{"type": "text", "text": "..."}]).
|
# Prefer .text which normalizes list content blocks (e.g. [{"type": "text", "text": "..."}]).
|
||||||
# Fall back to .content for non-LangChain responses.
|
# Fall back to .content for non-LangChain responses, with explicit list handling
|
||||||
|
# to avoid producing Python repr strings like "[{'type': 'text', ...}]".
|
||||||
summary_text = getattr(response, "text", None)
|
summary_text = getattr(response, "text", None)
|
||||||
if summary_text is None:
|
if summary_text is None:
|
||||||
summary_text = getattr(response, "content", "")
|
summary_text = getattr(response, "content", "")
|
||||||
|
if isinstance(summary_text, list):
|
||||||
|
parts: list[str] = []
|
||||||
|
for block in summary_text:
|
||||||
|
if isinstance(block, str):
|
||||||
|
parts.append(block)
|
||||||
|
elif isinstance(block, dict) and block.get("type") == "text":
|
||||||
|
parts.append(block.get("text", ""))
|
||||||
|
summary_text = "".join(parts)
|
||||||
return summary_text.strip() if isinstance(summary_text, str) else str(summary_text).strip()
|
return summary_text.strip() if isinstance(summary_text, str) else str(summary_text).strip()
|
||||||
|
|
||||||
@override
|
@override
|
||||||
|
|||||||
@@ -725,15 +725,46 @@ def test_memory_flush_hook_passes_runtime_user_id(monkeypatch: pytest.MonkeyPatc
|
|||||||
assert queue.add_nowait.call_args.kwargs["user_id"] == "alice"
|
assert queue.add_nowait.call_args.kwargs["user_id"] == "alice"
|
||||||
|
|
||||||
|
|
||||||
def test_extract_summary_text_normalizes_list_content_blocks() -> None:
|
@pytest.mark.parametrize(
|
||||||
|
"content, expected",
|
||||||
|
[
|
||||||
|
# String content — straight through
|
||||||
|
("Plain summary", "Plain summary"),
|
||||||
|
# Single text block
|
||||||
|
([{"type": "text", "text": "A summary of the chat."}], "A summary of the chat."),
|
||||||
|
# Multiple text blocks concatenated
|
||||||
|
(
|
||||||
|
[{"type": "text", "text": "Part one. "}, {"type": "text", "text": "Part two."}],
|
||||||
|
"Part one. Part two.",
|
||||||
|
),
|
||||||
|
# Mixed blocks: reasoning should be skipped, only text extracted
|
||||||
|
(
|
||||||
|
[
|
||||||
|
{"type": "thinking", "thinking": "internal reasoning"},
|
||||||
|
{"type": "text", "text": "Visible summary."},
|
||||||
|
],
|
||||||
|
"Visible summary.",
|
||||||
|
),
|
||||||
|
# Empty list → empty string
|
||||||
|
([], ""),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_extract_summary_text_normalizes_list_content_blocks(content, expected) -> None:
|
||||||
"""AIMessage.content can be a list of content blocks; _extract_summary_text
|
"""AIMessage.content can be a list of content blocks; _extract_summary_text
|
||||||
must normalize to plain text via the .text property instead of producing
|
must normalize to plain text instead of producing a Python repr like
|
||||||
a Python repr like [{'type': 'text', 'text': 'summary'}]."""
|
[{'type': 'text', 'text': 'summary'}]."""
|
||||||
|
middleware = _middleware()
|
||||||
|
response = AIMessage(content=content)
|
||||||
|
assert middleware._extract_summary_text(response) == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_summary_text_handles_non_aimessage_with_list_content() -> None:
|
||||||
|
"""When response has no .text attribute and .content is a list, the explicit
|
||||||
|
list normalization must still extract text instead of falling through to repr."""
|
||||||
middleware = _middleware()
|
middleware = _middleware()
|
||||||
|
|
||||||
response = AIMessage(content=[{"type": "text", "text": "A summary of the chat."}])
|
class FakeResponse:
|
||||||
assert middleware._extract_summary_text(response) == "A summary of the chat."
|
text = None # type: ignore[assignment]
|
||||||
|
content = [{"type": "text", "text": "Summary from non-AIMessage."}]
|
||||||
|
|
||||||
# Plain string content still works
|
assert middleware._extract_summary_text(FakeResponse()) == "Summary from non-AIMessage."
|
||||||
response_str = AIMessage(content="Plain summary")
|
|
||||||
assert middleware._extract_summary_text(response_str) == "Plain summary"
|
|
||||||
|
|||||||
Reference in New Issue
Block a user