From 719305840b82090fb3b6fa3089ac3e91f9b24a55 Mon Sep 17 00:00:00 2001 From: Willem Jiang Date: Thu, 21 May 2026 14:57:49 +0800 Subject: [PATCH] fix(middleware): handle list content blocks in summarization text extraction _explicitly extract text blocks from AIMessage.content lists instead of falling through to str() repr when .text is unavailable, preventing garbage summary strings like "[{'type': 'text', ...}]" from reaching the LLM. Adds parametrized regression tests for string, multi-block, mixed reasoning/text, empty, and non-AIMessage responses. --- .../middlewares/summarization_middleware.py | 11 ++++- .../tests/test_summarization_middleware.py | 47 +++++++++++++++---- 2 files changed, 49 insertions(+), 9 deletions(-) diff --git a/backend/packages/harness/deerflow/agents/middlewares/summarization_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/summarization_middleware.py index 39e7f70b2..b3f62c9ac 100644 --- a/backend/packages/harness/deerflow/agents/middlewares/summarization_middleware.py +++ b/backend/packages/harness/deerflow/agents/middlewares/summarization_middleware.py @@ -236,10 +236,19 @@ class DeerFlowSummarizationMiddleware(SummarizationMiddleware): def _extract_summary_text(self, response: Any) -> str: # Prefer .text which normalizes list content blocks (e.g. [{"type": "text", "text": "..."}]). - # Fall back to .content for non-LangChain responses. + # Fall back to .content for non-LangChain responses, with explicit list handling + # to avoid producing Python repr strings like "[{'type': 'text', ...}]". summary_text = getattr(response, "text", None) if summary_text is None: summary_text = getattr(response, "content", "") + if isinstance(summary_text, list): + parts: list[str] = [] + for block in summary_text: + if isinstance(block, str): + parts.append(block) + elif isinstance(block, dict) and block.get("type") == "text": + parts.append(block.get("text", "")) + summary_text = "".join(parts) return summary_text.strip() if isinstance(summary_text, str) else str(summary_text).strip() @override diff --git a/backend/tests/test_summarization_middleware.py b/backend/tests/test_summarization_middleware.py index 0a5a80160..c91a6b80d 100644 --- a/backend/tests/test_summarization_middleware.py +++ b/backend/tests/test_summarization_middleware.py @@ -725,15 +725,46 @@ def test_memory_flush_hook_passes_runtime_user_id(monkeypatch: pytest.MonkeyPatc assert queue.add_nowait.call_args.kwargs["user_id"] == "alice" -def test_extract_summary_text_normalizes_list_content_blocks() -> None: +@pytest.mark.parametrize( + "content, expected", + [ + # String content — straight through + ("Plain summary", "Plain summary"), + # Single text block + ([{"type": "text", "text": "A summary of the chat."}], "A summary of the chat."), + # Multiple text blocks concatenated + ( + [{"type": "text", "text": "Part one. "}, {"type": "text", "text": "Part two."}], + "Part one. Part two.", + ), + # Mixed blocks: reasoning should be skipped, only text extracted + ( + [ + {"type": "thinking", "thinking": "internal reasoning"}, + {"type": "text", "text": "Visible summary."}, + ], + "Visible summary.", + ), + # Empty list → empty string + ([], ""), + ], +) +def test_extract_summary_text_normalizes_list_content_blocks(content, expected) -> None: """AIMessage.content can be a list of content blocks; _extract_summary_text - must normalize to plain text via the .text property instead of producing - a Python repr like [{'type': 'text', 'text': 'summary'}].""" + must normalize to plain text instead of producing a Python repr like + [{'type': 'text', 'text': 'summary'}].""" + middleware = _middleware() + response = AIMessage(content=content) + assert middleware._extract_summary_text(response) == expected + + +def test_extract_summary_text_handles_non_aimessage_with_list_content() -> None: + """When response has no .text attribute and .content is a list, the explicit + list normalization must still extract text instead of falling through to repr.""" middleware = _middleware() - response = AIMessage(content=[{"type": "text", "text": "A summary of the chat."}]) - assert middleware._extract_summary_text(response) == "A summary of the chat." + class FakeResponse: + text = None # type: ignore[assignment] + content = [{"type": "text", "text": "Summary from non-AIMessage."}] - # Plain string content still works - response_str = AIMessage(content="Plain summary") - assert middleware._extract_summary_text(response_str) == "Plain summary" + assert middleware._extract_summary_text(FakeResponse()) == "Summary from non-AIMessage."