fix(middleware): handle list content blocks in summarization text extraction

_explicitly extract text blocks from AIMessage.content lists instead of falling through to str() repr when .text is unavailable, preventing garbage summary strings like "[{'type': 'text', ...}]" from reaching the LLM. Adds parametrized regression tests for string, multi-block, mixed reasoning/text, empty, and non-AIMessage responses.
2026-05-22 16:06:50 +00:00 · 2026-05-21 14:57:49 +08:00
parent 7752e74e2b
commit 719305840b
2 changed files with 49 additions and 9 deletions
@@ -236,10 +236,19 @@ class DeerFlowSummarizationMiddleware(SummarizationMiddleware):

    def _extract_summary_text(self, response: Any) -> str:
        # Prefer .text which normalizes list content blocks (e.g. [{"type": "text", "text": "..."}]).
-        # Fall back to .content for non-LangChain responses.
+        # Fall back to .content for non-LangChain responses, with explicit list handling
+        # to avoid producing Python repr strings like "[{'type': 'text', ...}]".
        summary_text = getattr(response, "text", None)
        if summary_text is None:
            summary_text = getattr(response, "content", "")
+        if isinstance(summary_text, list):
+            parts: list[str] = []
+            for block in summary_text:
+                if isinstance(block, str):
+                    parts.append(block)
+                elif isinstance(block, dict) and block.get("type") == "text":
+                    parts.append(block.get("text", ""))
+            summary_text = "".join(parts)
        return summary_text.strip() if isinstance(summary_text, str) else str(summary_text).strip()

    @override