fix(gateway): preserve message additional_kwargs in normalize_input (#3132) (#3136)

* fix(gateway): preserve message additional_kwargs in normalize_input (#3132)

The gateway's hand-rolled dict→message coercion only forwarded `content`
and collapsed every role to `HumanMessage`, silently dropping the
frontend's `additional_kwargs.files` payload (along with `id`, `name`,
and ai/system/tool roles).

Effect on issue #3132:

- `UploadsMiddleware` saw no `files` on the last human message, so the
  just-uploaded file got bucketed under "previous messages" while the
  current turn was reported as `(empty)`.
- The persisted human message had no `files`, so the attachment chip on
  the message disappeared the moment the optimistic UI cleared.

Delegate the conversion to `langchain_core.messages.utils.convert_to_messages`
so `additional_kwargs`, `id`, `name`, and non-human roles round-trip
unchanged.

* fix(gateway): convert malformed-message ValueError into HTTP 400

normalize_input now sits at the request boundary, so a malformed
input.messages[N] dict (missing role/type/content, unsupported role,
etc.) should surface as 400 with the offending index — not bubble out
of FastAPI as 500.

Per Copilot review on #3136.
This commit is contained in:
Xinmin Zeng
2026-05-21 21:06:19 +08:00
committed by GitHub
parent 1c5c585741
commit 9c03a71a07
2 changed files with 115 additions and 12 deletions
+88
View File
@@ -81,6 +81,94 @@ def test_normalize_input_passthrough():
assert result == {"custom_key": "value"}
def test_normalize_input_preserves_additional_kwargs_and_id():
"""Regression: gh #3132 — frontend ships uploaded-file metadata in
additional_kwargs.files (and a client-side message id). The gateway must
not strip them before the graph runs, otherwise UploadsMiddleware reports
"(empty)" for new uploads and the frontend message loses its file chip.
"""
from langchain_core.messages import HumanMessage
from app.gateway.services import normalize_input
files = [{"filename": "a.csv", "size": 100, "path": "/mnt/user-data/uploads/a.csv", "status": "uploaded"}]
result = normalize_input(
{
"messages": [
{
"type": "human",
"id": "client-msg-1",
"name": "user-input",
"content": [{"type": "text", "text": "clean it"}],
"additional_kwargs": {"files": files, "custom": "keep-me"},
}
]
}
)
assert len(result["messages"]) == 1
msg = result["messages"][0]
assert isinstance(msg, HumanMessage)
assert msg.id == "client-msg-1"
assert msg.name == "user-input"
assert msg.content == [{"type": "text", "text": "clean it"}]
assert msg.additional_kwargs == {"files": files, "custom": "keep-me"}
def test_normalize_input_passes_through_basemessage_instances():
from langchain_core.messages import HumanMessage
from app.gateway.services import normalize_input
msg = HumanMessage(content="hello", id="m-1", additional_kwargs={"files": [{"filename": "x"}]})
result = normalize_input({"messages": [msg]})
assert result["messages"][0] is msg
def test_normalize_input_rejects_malformed_message_with_400():
"""Boundary validation: ``convert_to_messages`` raises ``ValueError`` when a
message dict is missing ``role``/``type``/``content``. ``normalize_input``
runs inside the gateway HTTP boundary, so a malformed payload should surface
as a 400 referencing the offending entry — not bubble up as a 500.
Raised after the Copilot review on PR #3136.
"""
import pytest
from fastapi import HTTPException
from app.gateway.services import normalize_input
with pytest.raises(HTTPException) as excinfo:
normalize_input({"messages": [{"role": "human", "content": "ok"}, {"oops": "no role here"}]})
assert excinfo.value.status_code == 400
assert "input.messages[1]" in excinfo.value.detail
def test_normalize_input_handles_non_human_roles():
"""The previous implementation collapsed every role to HumanMessage with a
`# TODO: handle other message types` comment. Resuming a thread with prior
AI/tool messages would silently rewrite them as human turns — corrupting
the conversation. Use langchain's standard conversion so ai/system/tool
roles round-trip correctly.
"""
from langchain_core.messages import AIMessage, SystemMessage, ToolMessage
from app.gateway.services import normalize_input
result = normalize_input(
{
"messages": [
{"role": "system", "content": "sys"},
{"role": "ai", "content": "hi", "id": "ai-1"},
{"role": "tool", "content": "result", "tool_call_id": "call-1"},
]
}
)
types = [type(m) for m in result["messages"]]
assert types == [SystemMessage, AIMessage, ToolMessage]
assert result["messages"][1].id == "ai-1"
assert result["messages"][2].tool_call_id == "call-1"
def test_build_run_config_basic():
from app.gateway.services import build_run_config