"""Canonical serialization for LangChain / LangGraph objects. Provides a single source of truth for converting LangChain message objects, Pydantic models, and LangGraph state dicts into plain JSON-serialisable Python structures. Consumers: ``deerflow.runtime.runs.worker`` (SSE publishing) and ``app.gateway.routers.threads`` (REST responses). """ from __future__ import annotations from typing import Any def serialize_lc_object(obj: Any) -> Any: """Recursively serialize a LangChain object to a JSON-serialisable dict.""" if obj is None: return None if isinstance(obj, (str, int, float, bool)): return obj if isinstance(obj, dict): return {k: serialize_lc_object(v) for k, v in obj.items()} if isinstance(obj, (list, tuple)): return [serialize_lc_object(item) for item in obj] # Pydantic v2 if hasattr(obj, "model_dump"): try: return obj.model_dump() except Exception: pass # Pydantic v1 / older objects if hasattr(obj, "dict"): try: return obj.dict() except Exception: pass # Last resort try: return str(obj) except Exception: return repr(obj) def serialize_channel_values(channel_values: dict[str, Any]) -> dict[str, Any]: """Serialize channel values, stripping internal LangGraph keys. Internal keys like ``__pregel_*`` and ``__interrupt__`` are removed to match what the LangGraph Platform API returns. """ result: dict[str, Any] = {} for key, value in channel_values.items(): if key.startswith("__pregel_") or key == "__interrupt__": continue result[key] = serialize_lc_object(value) return result def strip_data_url_image_blocks(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: """Remove ``data:``-scheme ``image_url`` blocks from *hide_from_ui* messages. The history and run-wait endpoints return checkpoint-persisted messages to the frontend. ``ViewImageMiddleware`` stores full base64 image payloads in ``hide_from_ui`` human messages — these are internal model context and must not be sent over the wire (huge response bodies, no UI value). Only content blocks of type ``image_url`` whose URL starts with ``data:`` are stripped. Text blocks, ``https://`` image URLs, and non-hidden messages are left untouched so that message ordering and count are preserved. """ result: list[dict[str, Any]] = [] for msg in messages: if not isinstance(msg, dict): result.append(msg) continue # Only touch messages explicitly flagged as hidden from the UI. additional_kwargs = msg.get("additional_kwargs") if not (isinstance(additional_kwargs, dict) and additional_kwargs.get("hide_from_ui") is True): result.append(msg) continue content = msg.get("content") if not isinstance(content, list): result.append(msg) continue # Filter out image_url blocks with data: scheme. filtered = [block for block in content if not (isinstance(block, dict) and block.get("type") == "image_url" and isinstance(block.get("image_url"), dict) and str(block["image_url"].get("url", "")).startswith("data:"))] result.append({**msg, "content": filtered}) return result def serialize_channel_values_for_api(channel_values: dict[str, Any]) -> dict[str, Any]: """Serialize channel values and strip base64 image data from messages. Convenience wrapper combining :func:`serialize_channel_values` with :func:`strip_data_url_image_blocks`. Use this in all REST endpoints that return channel values to the frontend so that ``data:``-scheme base64 image payloads are never sent over the wire. """ result = serialize_channel_values(channel_values) if isinstance(result.get("messages"), list): result["messages"] = strip_data_url_image_blocks(result["messages"]) return result def serialize_messages_tuple(obj: Any) -> Any: """Serialize a messages-mode tuple ``(chunk, metadata)``.""" if isinstance(obj, tuple) and len(obj) == 2: chunk, metadata = obj return [serialize_lc_object(chunk), metadata if isinstance(metadata, dict) else {}] return serialize_lc_object(obj) def serialize(obj: Any, *, mode: str = "") -> Any: """Serialize LangChain objects with mode-specific handling. * ``messages`` — obj is ``(message_chunk, metadata_dict)`` * ``values`` — obj is the full state dict; ``__pregel_*`` keys stripped * everything else — recursive ``model_dump()`` / ``dict()`` fallback """ if mode == "messages": return serialize_messages_tuple(obj) if mode == "values": return serialize_channel_values(obj) if isinstance(obj, dict) else serialize_lc_object(obj) return serialize_lc_object(obj)