fix(backend): preserve viewed image reducer metadata (#1900)

Fix concurrent viewed_images state updates for multi-image input by preserving the reducer metadata in the vision middleware state schema.
2026-05-24 17:06:00 +00:00 · 2026-04-06 16:47:19 +08:00
parent f5088ed70d
commit 1ced6e977c
2 changed files with 14 additions and 7 deletions
@@ -1,22 +1,19 @@
 """Middleware for injecting image details into conversation before LLM call."""

 import logging
-from typing import NotRequired, override
+from typing import override

-from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
 from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
 from langgraph.runtime import Runtime

-from deerflow.agents.thread_state import ViewedImageData
+from deerflow.agents.thread_state import ThreadState

 logger = logging.getLogger(__name__)


-class ViewImageMiddlewareState(AgentState):
-    """Compatible with the `ThreadState` schema."""
-
-    viewed_images: NotRequired[dict[str, ViewedImageData] | None]
+class ViewImageMiddlewareState(ThreadState):
+    """Reuse the thread state so reducer-backed keys keep their annotations."""


 class ViewImageMiddleware(AgentMiddleware[ViewImageMiddlewareState]):