mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-21 15:36:48 +00:00
* fix(gateway): preserve message additional_kwargs in normalize_input (#3132) The gateway's hand-rolled dict→message coercion only forwarded `content` and collapsed every role to `HumanMessage`, silently dropping the frontend's `additional_kwargs.files` payload (along with `id`, `name`, and ai/system/tool roles). Effect on issue #3132: - `UploadsMiddleware` saw no `files` on the last human message, so the just-uploaded file got bucketed under "previous messages" while the current turn was reported as `(empty)`. - The persisted human message had no `files`, so the attachment chip on the message disappeared the moment the optimistic UI cleared. Delegate the conversion to `langchain_core.messages.utils.convert_to_messages` so `additional_kwargs`, `id`, `name`, and non-human roles round-trip unchanged. * fix(gateway): convert malformed-message ValueError into HTTP 400 normalize_input now sits at the request boundary, so a malformed input.messages[N] dict (missing role/type/content, unsupported role, etc.) should surface as 400 with the offending index — not bubble out of FastAPI as 500. Per Copilot review on #3136.
This commit is contained in:
@@ -15,7 +15,8 @@ from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from fastapi import HTTPException, Request
|
||||
from langchain_core.messages import HumanMessage
|
||||
from langchain_core.messages import BaseMessage
|
||||
from langchain_core.messages.utils import convert_to_messages
|
||||
|
||||
from app.gateway.deps import get_run_context, get_run_manager, get_stream_bridge
|
||||
from app.gateway.utils import sanitize_log_param
|
||||
@@ -76,21 +77,35 @@ def normalize_stream_modes(raw: list[str] | str | None) -> list[str]:
|
||||
|
||||
|
||||
def normalize_input(raw_input: dict[str, Any] | None) -> dict[str, Any]:
|
||||
"""Convert LangGraph Platform input format to LangChain state dict."""
|
||||
"""Convert LangGraph Platform input format to LangChain state dict.
|
||||
|
||||
Delegates dict→message coercion to ``langchain_core.messages.utils.convert_to_messages``
|
||||
so that ``additional_kwargs`` (e.g. uploaded-file metadata — gh #3132), ``id``,
|
||||
``name``, and non-human roles (ai/system/tool) survive unchanged. An earlier
|
||||
hand-rolled version only forwarded ``content`` and collapsed every role to
|
||||
``HumanMessage``, which silently stripped frontend-supplied attachments.
|
||||
|
||||
Malformed message dicts (missing ``role``/``type``/``content``, unsupported
|
||||
role, etc.) raise ``HTTPException(400)`` with the offending index, instead
|
||||
of bubbling up as a 500. The gateway is a system boundary, so per-entry
|
||||
validation errors are the right shape for clients to retry against.
|
||||
"""
|
||||
if raw_input is None:
|
||||
return {}
|
||||
messages = raw_input.get("messages")
|
||||
if messages and isinstance(messages, list):
|
||||
converted = []
|
||||
for msg in messages:
|
||||
if isinstance(msg, dict):
|
||||
role = msg.get("role", msg.get("type", "user"))
|
||||
content = msg.get("content", "")
|
||||
if role in ("user", "human"):
|
||||
converted.append(HumanMessage(content=content))
|
||||
else:
|
||||
# TODO: handle other message types (system, ai, tool)
|
||||
converted.append(HumanMessage(content=content))
|
||||
converted: list[Any] = []
|
||||
for index, msg in enumerate(messages):
|
||||
if isinstance(msg, BaseMessage):
|
||||
converted.append(msg)
|
||||
elif isinstance(msg, dict):
|
||||
try:
|
||||
converted.extend(convert_to_messages([msg]))
|
||||
except (ValueError, TypeError, NotImplementedError) as exc:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"Invalid message at input.messages[{index}]: {exc}",
|
||||
) from exc
|
||||
else:
|
||||
converted.append(msg)
|
||||
return {**raw_input, "messages": converted}
|
||||
|
||||
@@ -81,6 +81,94 @@ def test_normalize_input_passthrough():
|
||||
assert result == {"custom_key": "value"}
|
||||
|
||||
|
||||
def test_normalize_input_preserves_additional_kwargs_and_id():
|
||||
"""Regression: gh #3132 — frontend ships uploaded-file metadata in
|
||||
additional_kwargs.files (and a client-side message id). The gateway must
|
||||
not strip them before the graph runs, otherwise UploadsMiddleware reports
|
||||
"(empty)" for new uploads and the frontend message loses its file chip.
|
||||
"""
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from app.gateway.services import normalize_input
|
||||
|
||||
files = [{"filename": "a.csv", "size": 100, "path": "/mnt/user-data/uploads/a.csv", "status": "uploaded"}]
|
||||
result = normalize_input(
|
||||
{
|
||||
"messages": [
|
||||
{
|
||||
"type": "human",
|
||||
"id": "client-msg-1",
|
||||
"name": "user-input",
|
||||
"content": [{"type": "text", "text": "clean it"}],
|
||||
"additional_kwargs": {"files": files, "custom": "keep-me"},
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
assert len(result["messages"]) == 1
|
||||
msg = result["messages"][0]
|
||||
assert isinstance(msg, HumanMessage)
|
||||
assert msg.id == "client-msg-1"
|
||||
assert msg.name == "user-input"
|
||||
assert msg.content == [{"type": "text", "text": "clean it"}]
|
||||
assert msg.additional_kwargs == {"files": files, "custom": "keep-me"}
|
||||
|
||||
|
||||
def test_normalize_input_passes_through_basemessage_instances():
|
||||
from langchain_core.messages import HumanMessage
|
||||
|
||||
from app.gateway.services import normalize_input
|
||||
|
||||
msg = HumanMessage(content="hello", id="m-1", additional_kwargs={"files": [{"filename": "x"}]})
|
||||
result = normalize_input({"messages": [msg]})
|
||||
assert result["messages"][0] is msg
|
||||
|
||||
|
||||
def test_normalize_input_rejects_malformed_message_with_400():
|
||||
"""Boundary validation: ``convert_to_messages`` raises ``ValueError`` when a
|
||||
message dict is missing ``role``/``type``/``content``. ``normalize_input``
|
||||
runs inside the gateway HTTP boundary, so a malformed payload should surface
|
||||
as a 400 referencing the offending entry — not bubble up as a 500.
|
||||
|
||||
Raised after the Copilot review on PR #3136.
|
||||
"""
|
||||
import pytest
|
||||
from fastapi import HTTPException
|
||||
|
||||
from app.gateway.services import normalize_input
|
||||
|
||||
with pytest.raises(HTTPException) as excinfo:
|
||||
normalize_input({"messages": [{"role": "human", "content": "ok"}, {"oops": "no role here"}]})
|
||||
assert excinfo.value.status_code == 400
|
||||
assert "input.messages[1]" in excinfo.value.detail
|
||||
|
||||
|
||||
def test_normalize_input_handles_non_human_roles():
|
||||
"""The previous implementation collapsed every role to HumanMessage with a
|
||||
`# TODO: handle other message types` comment. Resuming a thread with prior
|
||||
AI/tool messages would silently rewrite them as human turns — corrupting
|
||||
the conversation. Use langchain's standard conversion so ai/system/tool
|
||||
roles round-trip correctly.
|
||||
"""
|
||||
from langchain_core.messages import AIMessage, SystemMessage, ToolMessage
|
||||
|
||||
from app.gateway.services import normalize_input
|
||||
|
||||
result = normalize_input(
|
||||
{
|
||||
"messages": [
|
||||
{"role": "system", "content": "sys"},
|
||||
{"role": "ai", "content": "hi", "id": "ai-1"},
|
||||
{"role": "tool", "content": "result", "tool_call_id": "call-1"},
|
||||
]
|
||||
}
|
||||
)
|
||||
types = [type(m) for m in result["messages"]]
|
||||
assert types == [SystemMessage, AIMessage, ToolMessage]
|
||||
assert result["messages"][1].id == "ai-1"
|
||||
assert result["messages"][2].tool_call_id == "call-1"
|
||||
|
||||
|
||||
def test_build_run_config_basic():
|
||||
from app.gateway.services import build_run_config
|
||||
|
||||
|
||||
Reference in New Issue
Block a user