fix(history): strip base64 image data from REST endpoint responses (#3535)

ViewImageMiddleware persists full base64 image payloads in hide_from_ui
human messages inside checkpoints. All REST endpoints that returned
serialize_channel_values(channel_values) sent these multi-megabyte
payloads to the frontend, freezing the UI on threads with images.

Add strip_data_url_image_blocks() to remove data:-scheme image_url
content blocks from hide_from_ui messages, and
serialize_channel_values_for_api() as a convenience wrapper used by all
six affected call sites across threads, runs, and thread_runs routers.
SSE streaming is unaffected (still uses serialize_channel_values).

Fixes #3496
This commit is contained in:
hataa
2026-06-13 08:58:19 +08:00
committed by GitHub
parent 839fa99237
commit 094296440f
6 changed files with 230 additions and 10 deletions
@@ -7,7 +7,7 @@ directly from ``deerflow.runtime``.
from .checkpointer import checkpointer_context, get_checkpointer, make_checkpointer, reset_checkpointer
from .runs import ConflictError, DisconnectMode, RunContext, RunManager, RunRecord, RunStatus, UnsupportedStrategyError, run_agent
from .serialization import serialize, serialize_channel_values, serialize_lc_object, serialize_messages_tuple
from .serialization import serialize, serialize_channel_values, serialize_channel_values_for_api, serialize_lc_object, serialize_messages_tuple, strip_data_url_image_blocks
from .store import get_store, make_store, reset_store, store_context
from .stream_bridge import END_SENTINEL, HEARTBEAT_SENTINEL, MemoryStreamBridge, StreamBridge, StreamEvent, make_stream_bridge
@@ -29,8 +29,10 @@ __all__ = [
# serialization
"serialize",
"serialize_channel_values",
"serialize_channel_values_for_api",
"serialize_lc_object",
"serialize_messages_tuple",
"strip_data_url_image_blocks",
# store
"get_store",
"make_store",
@@ -56,6 +56,56 @@ def serialize_channel_values(channel_values: dict[str, Any]) -> dict[str, Any]:
return result
def strip_data_url_image_blocks(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Remove ``data:``-scheme ``image_url`` blocks from *hide_from_ui* messages.
The history and run-wait endpoints return checkpoint-persisted messages to
the frontend. ``ViewImageMiddleware`` stores full base64 image payloads in
``hide_from_ui`` human messages — these are internal model context and must
not be sent over the wire (huge response bodies, no UI value).
Only content blocks of type ``image_url`` whose URL starts with ``data:``
are stripped. Text blocks, ``https://`` image URLs, and non-hidden
messages are left untouched so that message ordering and count are
preserved.
"""
result: list[dict[str, Any]] = []
for msg in messages:
if not isinstance(msg, dict):
result.append(msg)
continue
# Only touch messages explicitly flagged as hidden from the UI.
additional_kwargs = msg.get("additional_kwargs")
if not (isinstance(additional_kwargs, dict) and additional_kwargs.get("hide_from_ui") is True):
result.append(msg)
continue
content = msg.get("content")
if not isinstance(content, list):
result.append(msg)
continue
# Filter out image_url blocks with data: scheme.
filtered = [block for block in content if not (isinstance(block, dict) and block.get("type") == "image_url" and isinstance(block.get("image_url"), dict) and str(block["image_url"].get("url", "")).startswith("data:"))]
result.append({**msg, "content": filtered})
return result
def serialize_channel_values_for_api(channel_values: dict[str, Any]) -> dict[str, Any]:
"""Serialize channel values and strip base64 image data from messages.
Convenience wrapper combining :func:`serialize_channel_values` with
:func:`strip_data_url_image_blocks`. Use this in all REST endpoints
that return channel values to the frontend so that ``data:``-scheme
base64 image payloads are never sent over the wire.
"""
result = serialize_channel_values(channel_values)
if isinstance(result.get("messages"), list):
result["messages"] = strip_data_url_image_blocks(result["messages"])
return result
def serialize_messages_tuple(obj: Any) -> Any:
"""Serialize a messages-mode tuple ``(chunk, metadata)``."""
if isinstance(obj, tuple) and len(obj) == 2: