fix(gateway): preserve message additional_kwargs in normalize_input (#3132) (#3136)

* fix(gateway): preserve message additional_kwargs in normalize_input (#3132)

The gateway's hand-rolled dict→message coercion only forwarded `content`
and collapsed every role to `HumanMessage`, silently dropping the
frontend's `additional_kwargs.files` payload (along with `id`, `name`,
and ai/system/tool roles).

Effect on issue #3132:

- `UploadsMiddleware` saw no `files` on the last human message, so the
  just-uploaded file got bucketed under "previous messages" while the
  current turn was reported as `(empty)`.
- The persisted human message had no `files`, so the attachment chip on
  the message disappeared the moment the optimistic UI cleared.

Delegate the conversion to `langchain_core.messages.utils.convert_to_messages`
so `additional_kwargs`, `id`, `name`, and non-human roles round-trip
unchanged.

* fix(gateway): convert malformed-message ValueError into HTTP 400

normalize_input now sits at the request boundary, so a malformed
input.messages[N] dict (missing role/type/content, unsupported role,
etc.) should surface as 400 with the offending index — not bubble out
of FastAPI as 500.

Per Copilot review on #3136.
This commit is contained in:
Xinmin Zeng
2026-05-21 21:06:19 +08:00
committed by GitHub
parent 1c5c585741
commit 9c03a71a07
2 changed files with 115 additions and 12 deletions
+27 -12
View File
@@ -15,7 +15,8 @@ from collections.abc import Mapping
from typing import Any
from fastapi import HTTPException, Request
from langchain_core.messages import HumanMessage
from langchain_core.messages import BaseMessage
from langchain_core.messages.utils import convert_to_messages
from app.gateway.deps import get_run_context, get_run_manager, get_stream_bridge
from app.gateway.utils import sanitize_log_param
@@ -76,21 +77,35 @@ def normalize_stream_modes(raw: list[str] | str | None) -> list[str]:
def normalize_input(raw_input: dict[str, Any] | None) -> dict[str, Any]:
"""Convert LangGraph Platform input format to LangChain state dict."""
"""Convert LangGraph Platform input format to LangChain state dict.
Delegates dict→message coercion to ``langchain_core.messages.utils.convert_to_messages``
so that ``additional_kwargs`` (e.g. uploaded-file metadata — gh #3132), ``id``,
``name``, and non-human roles (ai/system/tool) survive unchanged. An earlier
hand-rolled version only forwarded ``content`` and collapsed every role to
``HumanMessage``, which silently stripped frontend-supplied attachments.
Malformed message dicts (missing ``role``/``type``/``content``, unsupported
role, etc.) raise ``HTTPException(400)`` with the offending index, instead
of bubbling up as a 500. The gateway is a system boundary, so per-entry
validation errors are the right shape for clients to retry against.
"""
if raw_input is None:
return {}
messages = raw_input.get("messages")
if messages and isinstance(messages, list):
converted = []
for msg in messages:
if isinstance(msg, dict):
role = msg.get("role", msg.get("type", "user"))
content = msg.get("content", "")
if role in ("user", "human"):
converted.append(HumanMessage(content=content))
else:
# TODO: handle other message types (system, ai, tool)
converted.append(HumanMessage(content=content))
converted: list[Any] = []
for index, msg in enumerate(messages):
if isinstance(msg, BaseMessage):
converted.append(msg)
elif isinstance(msg, dict):
try:
converted.extend(convert_to_messages([msg]))
except (ValueError, TypeError, NotImplementedError) as exc:
raise HTTPException(
status_code=400,
detail=f"Invalid message at input.messages[{index}]: {exc}",
) from exc
else:
converted.append(msg)
return {**raw_input, "messages": converted}
+88
View File
@@ -81,6 +81,94 @@ def test_normalize_input_passthrough():
assert result == {"custom_key": "value"}
def test_normalize_input_preserves_additional_kwargs_and_id():
"""Regression: gh #3132 — frontend ships uploaded-file metadata in
additional_kwargs.files (and a client-side message id). The gateway must
not strip them before the graph runs, otherwise UploadsMiddleware reports
"(empty)" for new uploads and the frontend message loses its file chip.
"""
from langchain_core.messages import HumanMessage
from app.gateway.services import normalize_input
files = [{"filename": "a.csv", "size": 100, "path": "/mnt/user-data/uploads/a.csv", "status": "uploaded"}]
result = normalize_input(
{
"messages": [
{
"type": "human",
"id": "client-msg-1",
"name": "user-input",
"content": [{"type": "text", "text": "clean it"}],
"additional_kwargs": {"files": files, "custom": "keep-me"},
}
]
}
)
assert len(result["messages"]) == 1
msg = result["messages"][0]
assert isinstance(msg, HumanMessage)
assert msg.id == "client-msg-1"
assert msg.name == "user-input"
assert msg.content == [{"type": "text", "text": "clean it"}]
assert msg.additional_kwargs == {"files": files, "custom": "keep-me"}
def test_normalize_input_passes_through_basemessage_instances():
from langchain_core.messages import HumanMessage
from app.gateway.services import normalize_input
msg = HumanMessage(content="hello", id="m-1", additional_kwargs={"files": [{"filename": "x"}]})
result = normalize_input({"messages": [msg]})
assert result["messages"][0] is msg
def test_normalize_input_rejects_malformed_message_with_400():
"""Boundary validation: ``convert_to_messages`` raises ``ValueError`` when a
message dict is missing ``role``/``type``/``content``. ``normalize_input``
runs inside the gateway HTTP boundary, so a malformed payload should surface
as a 400 referencing the offending entry — not bubble up as a 500.
Raised after the Copilot review on PR #3136.
"""
import pytest
from fastapi import HTTPException
from app.gateway.services import normalize_input
with pytest.raises(HTTPException) as excinfo:
normalize_input({"messages": [{"role": "human", "content": "ok"}, {"oops": "no role here"}]})
assert excinfo.value.status_code == 400
assert "input.messages[1]" in excinfo.value.detail
def test_normalize_input_handles_non_human_roles():
"""The previous implementation collapsed every role to HumanMessage with a
`# TODO: handle other message types` comment. Resuming a thread with prior
AI/tool messages would silently rewrite them as human turns — corrupting
the conversation. Use langchain's standard conversion so ai/system/tool
roles round-trip correctly.
"""
from langchain_core.messages import AIMessage, SystemMessage, ToolMessage
from app.gateway.services import normalize_input
result = normalize_input(
{
"messages": [
{"role": "system", "content": "sys"},
{"role": "ai", "content": "hi", "id": "ai-1"},
{"role": "tool", "content": "result", "tool_call_id": "call-1"},
]
}
)
types = [type(m) for m in result["messages"]]
assert types == [SystemMessage, AIMessage, ToolMessage]
assert result["messages"][1].id == "ai-1"
assert result["messages"][2].tool_call_id == "call-1"
def test_build_run_config_basic():
from app.gateway.services import build_run_config