mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-13 10:55:59 +00:00
fix(history): strip base64 image data from REST endpoint responses (#3535)
ViewImageMiddleware persists full base64 image payloads in hide_from_ui human messages inside checkpoints. All REST endpoints that returned serialize_channel_values(channel_values) sent these multi-megabyte payloads to the frontend, freezing the UI on threads with images. Add strip_data_url_image_blocks() to remove data:-scheme image_url content blocks from hide_from_ui messages, and serialize_channel_values_for_api() as a convenience wrapper used by all six affected call sites across threads, runs, and thread_runs routers. SSE streaming is unaffected (still uses serialize_channel_values). Fixes #3496
This commit is contained in:
@@ -56,6 +56,56 @@ def serialize_channel_values(channel_values: dict[str, Any]) -> dict[str, Any]:
|
||||
return result
|
||||
|
||||
|
||||
def strip_data_url_image_blocks(messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
"""Remove ``data:``-scheme ``image_url`` blocks from *hide_from_ui* messages.
|
||||
|
||||
The history and run-wait endpoints return checkpoint-persisted messages to
|
||||
the frontend. ``ViewImageMiddleware`` stores full base64 image payloads in
|
||||
``hide_from_ui`` human messages — these are internal model context and must
|
||||
not be sent over the wire (huge response bodies, no UI value).
|
||||
|
||||
Only content blocks of type ``image_url`` whose URL starts with ``data:``
|
||||
are stripped. Text blocks, ``https://`` image URLs, and non-hidden
|
||||
messages are left untouched so that message ordering and count are
|
||||
preserved.
|
||||
"""
|
||||
result: list[dict[str, Any]] = []
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict):
|
||||
result.append(msg)
|
||||
continue
|
||||
|
||||
# Only touch messages explicitly flagged as hidden from the UI.
|
||||
additional_kwargs = msg.get("additional_kwargs")
|
||||
if not (isinstance(additional_kwargs, dict) and additional_kwargs.get("hide_from_ui") is True):
|
||||
result.append(msg)
|
||||
continue
|
||||
|
||||
content = msg.get("content")
|
||||
if not isinstance(content, list):
|
||||
result.append(msg)
|
||||
continue
|
||||
|
||||
# Filter out image_url blocks with data: scheme.
|
||||
filtered = [block for block in content if not (isinstance(block, dict) and block.get("type") == "image_url" and isinstance(block.get("image_url"), dict) and str(block["image_url"].get("url", "")).startswith("data:"))]
|
||||
result.append({**msg, "content": filtered})
|
||||
return result
|
||||
|
||||
|
||||
def serialize_channel_values_for_api(channel_values: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Serialize channel values and strip base64 image data from messages.
|
||||
|
||||
Convenience wrapper combining :func:`serialize_channel_values` with
|
||||
:func:`strip_data_url_image_blocks`. Use this in all REST endpoints
|
||||
that return channel values to the frontend so that ``data:``-scheme
|
||||
base64 image payloads are never sent over the wire.
|
||||
"""
|
||||
result = serialize_channel_values(channel_values)
|
||||
if isinstance(result.get("messages"), list):
|
||||
result["messages"] = strip_data_url_image_blocks(result["messages"])
|
||||
return result
|
||||
|
||||
|
||||
def serialize_messages_tuple(obj: Any) -> Any:
|
||||
"""Serialize a messages-mode tuple ``(chunk, metadata)``."""
|
||||
if isinstance(obj, tuple) and len(obj) == 2:
|
||||
|
||||
Reference in New Issue
Block a user