fix(gateway): preserve message additional_kwargs in normalize_input (#3132) (#3136)

* fix(gateway): preserve message additional_kwargs in normalize_input (#3132) The gateway's hand-rolled dict→message coercion only forwarded `content` and collapsed every role to `HumanMessage`, silently dropping the frontend's `additional_kwargs.files` payload (along with `id`, `name`, and ai/system/tool roles). Effect on issue #3132: - `UploadsMiddleware` saw no `files` on the last human message, so the just-uploaded file got bucketed under "previous messages" while the current turn was reported as `(empty)`. - The persisted human message had no `files`, so the attachment chip on the message disappeared the moment the optimistic UI cleared. Delegate the conversion to `langchain_core.messages.utils.convert_to_messages` so `additional_kwargs`, `id`, `name`, and non-human roles round-trip unchanged. * fix(gateway): convert malformed-message ValueError into HTTP 400 normalize_input now sits at the request boundary, so a malformed input.messages[N] dict (missing role/type/content, unsupported role, etc.) should surface as 400 with the offending index — not bubble out of FastAPI as 500. Per Copilot review on #3136.
2026-05-22 16:06:50 +00:00 · 2026-05-21 21:06:19 +08:00
parent 1c5c585741
commit 9c03a71a07
2 changed files with 115 additions and 12 deletions
@@ -81,6 +81,94 @@ def test_normalize_input_passthrough():
    assert result == {"custom_key": "value"}


+def test_normalize_input_preserves_additional_kwargs_and_id():
+    """Regression: gh #3132 — frontend ships uploaded-file metadata in
+    additional_kwargs.files (and a client-side message id).  The gateway must
+    not strip them before the graph runs, otherwise UploadsMiddleware reports
+    "(empty)" for new uploads and the frontend message loses its file chip.
+    """
+    from langchain_core.messages import HumanMessage
+
+    from app.gateway.services import normalize_input
+
+    files = [{"filename": "a.csv", "size": 100, "path": "/mnt/user-data/uploads/a.csv", "status": "uploaded"}]
+    result = normalize_input(
+        {
+            "messages": [
+                {
+                    "type": "human",
+                    "id": "client-msg-1",
+                    "name": "user-input",
+                    "content": [{"type": "text", "text": "clean it"}],
+                    "additional_kwargs": {"files": files, "custom": "keep-me"},
+                }
+            ]
+        }
+    )
+    assert len(result["messages"]) == 1
+    msg = result["messages"][0]
+    assert isinstance(msg, HumanMessage)
+    assert msg.id == "client-msg-1"
+    assert msg.name == "user-input"
+    assert msg.content == [{"type": "text", "text": "clean it"}]
+    assert msg.additional_kwargs == {"files": files, "custom": "keep-me"}
+
+
+def test_normalize_input_passes_through_basemessage_instances():
+    from langchain_core.messages import HumanMessage
+
+    from app.gateway.services import normalize_input
+
+    msg = HumanMessage(content="hello", id="m-1", additional_kwargs={"files": [{"filename": "x"}]})
+    result = normalize_input({"messages": [msg]})
+    assert result["messages"][0] is msg
+
+
+def test_normalize_input_rejects_malformed_message_with_400():
+    """Boundary validation: ``convert_to_messages`` raises ``ValueError`` when a
+    message dict is missing ``role``/``type``/``content``.  ``normalize_input``
+    runs inside the gateway HTTP boundary, so a malformed payload should surface
+    as a 400 referencing the offending entry — not bubble up as a 500.
+
+    Raised after the Copilot review on PR #3136.
+    """
+    import pytest
+    from fastapi import HTTPException
+
+    from app.gateway.services import normalize_input
+
+    with pytest.raises(HTTPException) as excinfo:
+        normalize_input({"messages": [{"role": "human", "content": "ok"}, {"oops": "no role here"}]})
+    assert excinfo.value.status_code == 400
+    assert "input.messages[1]" in excinfo.value.detail
+
+
+def test_normalize_input_handles_non_human_roles():
+    """The previous implementation collapsed every role to HumanMessage with a
+    `# TODO: handle other message types` comment.  Resuming a thread with prior
+    AI/tool messages would silently rewrite them as human turns — corrupting
+    the conversation.  Use langchain's standard conversion so ai/system/tool
+    roles round-trip correctly.
+    """
+    from langchain_core.messages import AIMessage, SystemMessage, ToolMessage
+
+    from app.gateway.services import normalize_input
+
+    result = normalize_input(
+        {
+            "messages": [
+                {"role": "system", "content": "sys"},
+                {"role": "ai", "content": "hi", "id": "ai-1"},
+                {"role": "tool", "content": "result", "tool_call_id": "call-1"},
+            ]
+        }
+    )
+    types = [type(m) for m in result["messages"]]
+    assert types == [SystemMessage, AIMessage, ToolMessage]
+    assert result["messages"][1].id == "ai-1"
+    assert result["messages"][2].tool_call_id == "call-1"
+
+
 def test_build_run_config_basic():
    from app.gateway.services import build_run_config