fix(backend): preserve viewed image reducer metadata (#1900)

Fix concurrent viewed_images state updates for multi-image input by preserving the reducer metadata in the vision middleware state schema.
This commit is contained in:
Zhou
2026-04-06 16:47:19 +08:00
committed by GitHub
parent f5088ed70d
commit 1ced6e977c
2 changed files with 14 additions and 7 deletions
@@ -1,22 +1,19 @@
"""Middleware for injecting image details into conversation before LLM call."""
import logging
from typing import NotRequired, override
from typing import override
from langchain.agents import AgentState
from langchain.agents.middleware import AgentMiddleware
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
from langgraph.runtime import Runtime
from deerflow.agents.thread_state import ViewedImageData
from deerflow.agents.thread_state import ThreadState
logger = logging.getLogger(__name__)
class ViewImageMiddlewareState(AgentState):
"""Compatible with the `ThreadState` schema."""
viewed_images: NotRequired[dict[str, ViewedImageData] | None]
class ViewImageMiddlewareState(ThreadState):
"""Reuse the thread state so reducer-backed keys keep their annotations."""
class ViewImageMiddleware(AgentMiddleware[ViewImageMiddlewareState]):