mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-23 16:35:59 +00:00
* fix(gateway): preserve message additional_kwargs in normalize_input (#3132) The gateway's hand-rolled dict→message coercion only forwarded `content` and collapsed every role to `HumanMessage`, silently dropping the frontend's `additional_kwargs.files` payload (along with `id`, `name`, and ai/system/tool roles). Effect on issue #3132: - `UploadsMiddleware` saw no `files` on the last human message, so the just-uploaded file got bucketed under "previous messages" while the current turn was reported as `(empty)`. - The persisted human message had no `files`, so the attachment chip on the message disappeared the moment the optimistic UI cleared. Delegate the conversion to `langchain_core.messages.utils.convert_to_messages` so `additional_kwargs`, `id`, `name`, and non-human roles round-trip unchanged. * fix(gateway): convert malformed-message ValueError into HTTP 400 normalize_input now sits at the request boundary, so a malformed input.messages[N] dict (missing role/type/content, unsupported role, etc.) should surface as 400 with the offending index — not bubble out of FastAPI as 500. Per Copilot review on #3136.
This commit is contained in:
@@ -15,7 +15,8 @@ from collections.abc import Mapping
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from fastapi import HTTPException, Request
|
from fastapi import HTTPException, Request
|
||||||
from langchain_core.messages import HumanMessage
|
from langchain_core.messages import BaseMessage
|
||||||
|
from langchain_core.messages.utils import convert_to_messages
|
||||||
|
|
||||||
from app.gateway.deps import get_run_context, get_run_manager, get_stream_bridge
|
from app.gateway.deps import get_run_context, get_run_manager, get_stream_bridge
|
||||||
from app.gateway.utils import sanitize_log_param
|
from app.gateway.utils import sanitize_log_param
|
||||||
@@ -76,21 +77,35 @@ def normalize_stream_modes(raw: list[str] | str | None) -> list[str]:
|
|||||||
|
|
||||||
|
|
||||||
def normalize_input(raw_input: dict[str, Any] | None) -> dict[str, Any]:
|
def normalize_input(raw_input: dict[str, Any] | None) -> dict[str, Any]:
|
||||||
"""Convert LangGraph Platform input format to LangChain state dict."""
|
"""Convert LangGraph Platform input format to LangChain state dict.
|
||||||
|
|
||||||
|
Delegates dict→message coercion to ``langchain_core.messages.utils.convert_to_messages``
|
||||||
|
so that ``additional_kwargs`` (e.g. uploaded-file metadata — gh #3132), ``id``,
|
||||||
|
``name``, and non-human roles (ai/system/tool) survive unchanged. An earlier
|
||||||
|
hand-rolled version only forwarded ``content`` and collapsed every role to
|
||||||
|
``HumanMessage``, which silently stripped frontend-supplied attachments.
|
||||||
|
|
||||||
|
Malformed message dicts (missing ``role``/``type``/``content``, unsupported
|
||||||
|
role, etc.) raise ``HTTPException(400)`` with the offending index, instead
|
||||||
|
of bubbling up as a 500. The gateway is a system boundary, so per-entry
|
||||||
|
validation errors are the right shape for clients to retry against.
|
||||||
|
"""
|
||||||
if raw_input is None:
|
if raw_input is None:
|
||||||
return {}
|
return {}
|
||||||
messages = raw_input.get("messages")
|
messages = raw_input.get("messages")
|
||||||
if messages and isinstance(messages, list):
|
if messages and isinstance(messages, list):
|
||||||
converted = []
|
converted: list[Any] = []
|
||||||
for msg in messages:
|
for index, msg in enumerate(messages):
|
||||||
if isinstance(msg, dict):
|
if isinstance(msg, BaseMessage):
|
||||||
role = msg.get("role", msg.get("type", "user"))
|
converted.append(msg)
|
||||||
content = msg.get("content", "")
|
elif isinstance(msg, dict):
|
||||||
if role in ("user", "human"):
|
try:
|
||||||
converted.append(HumanMessage(content=content))
|
converted.extend(convert_to_messages([msg]))
|
||||||
else:
|
except (ValueError, TypeError, NotImplementedError) as exc:
|
||||||
# TODO: handle other message types (system, ai, tool)
|
raise HTTPException(
|
||||||
converted.append(HumanMessage(content=content))
|
status_code=400,
|
||||||
|
detail=f"Invalid message at input.messages[{index}]: {exc}",
|
||||||
|
) from exc
|
||||||
else:
|
else:
|
||||||
converted.append(msg)
|
converted.append(msg)
|
||||||
return {**raw_input, "messages": converted}
|
return {**raw_input, "messages": converted}
|
||||||
|
|||||||
@@ -81,6 +81,94 @@ def test_normalize_input_passthrough():
|
|||||||
assert result == {"custom_key": "value"}
|
assert result == {"custom_key": "value"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_input_preserves_additional_kwargs_and_id():
|
||||||
|
"""Regression: gh #3132 — frontend ships uploaded-file metadata in
|
||||||
|
additional_kwargs.files (and a client-side message id). The gateway must
|
||||||
|
not strip them before the graph runs, otherwise UploadsMiddleware reports
|
||||||
|
"(empty)" for new uploads and the frontend message loses its file chip.
|
||||||
|
"""
|
||||||
|
from langchain_core.messages import HumanMessage
|
||||||
|
|
||||||
|
from app.gateway.services import normalize_input
|
||||||
|
|
||||||
|
files = [{"filename": "a.csv", "size": 100, "path": "/mnt/user-data/uploads/a.csv", "status": "uploaded"}]
|
||||||
|
result = normalize_input(
|
||||||
|
{
|
||||||
|
"messages": [
|
||||||
|
{
|
||||||
|
"type": "human",
|
||||||
|
"id": "client-msg-1",
|
||||||
|
"name": "user-input",
|
||||||
|
"content": [{"type": "text", "text": "clean it"}],
|
||||||
|
"additional_kwargs": {"files": files, "custom": "keep-me"},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
assert len(result["messages"]) == 1
|
||||||
|
msg = result["messages"][0]
|
||||||
|
assert isinstance(msg, HumanMessage)
|
||||||
|
assert msg.id == "client-msg-1"
|
||||||
|
assert msg.name == "user-input"
|
||||||
|
assert msg.content == [{"type": "text", "text": "clean it"}]
|
||||||
|
assert msg.additional_kwargs == {"files": files, "custom": "keep-me"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_input_passes_through_basemessage_instances():
|
||||||
|
from langchain_core.messages import HumanMessage
|
||||||
|
|
||||||
|
from app.gateway.services import normalize_input
|
||||||
|
|
||||||
|
msg = HumanMessage(content="hello", id="m-1", additional_kwargs={"files": [{"filename": "x"}]})
|
||||||
|
result = normalize_input({"messages": [msg]})
|
||||||
|
assert result["messages"][0] is msg
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_input_rejects_malformed_message_with_400():
|
||||||
|
"""Boundary validation: ``convert_to_messages`` raises ``ValueError`` when a
|
||||||
|
message dict is missing ``role``/``type``/``content``. ``normalize_input``
|
||||||
|
runs inside the gateway HTTP boundary, so a malformed payload should surface
|
||||||
|
as a 400 referencing the offending entry — not bubble up as a 500.
|
||||||
|
|
||||||
|
Raised after the Copilot review on PR #3136.
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
from app.gateway.services import normalize_input
|
||||||
|
|
||||||
|
with pytest.raises(HTTPException) as excinfo:
|
||||||
|
normalize_input({"messages": [{"role": "human", "content": "ok"}, {"oops": "no role here"}]})
|
||||||
|
assert excinfo.value.status_code == 400
|
||||||
|
assert "input.messages[1]" in excinfo.value.detail
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_input_handles_non_human_roles():
|
||||||
|
"""The previous implementation collapsed every role to HumanMessage with a
|
||||||
|
`# TODO: handle other message types` comment. Resuming a thread with prior
|
||||||
|
AI/tool messages would silently rewrite them as human turns — corrupting
|
||||||
|
the conversation. Use langchain's standard conversion so ai/system/tool
|
||||||
|
roles round-trip correctly.
|
||||||
|
"""
|
||||||
|
from langchain_core.messages import AIMessage, SystemMessage, ToolMessage
|
||||||
|
|
||||||
|
from app.gateway.services import normalize_input
|
||||||
|
|
||||||
|
result = normalize_input(
|
||||||
|
{
|
||||||
|
"messages": [
|
||||||
|
{"role": "system", "content": "sys"},
|
||||||
|
{"role": "ai", "content": "hi", "id": "ai-1"},
|
||||||
|
{"role": "tool", "content": "result", "tool_call_id": "call-1"},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
types = [type(m) for m in result["messages"]]
|
||||||
|
assert types == [SystemMessage, AIMessage, ToolMessage]
|
||||||
|
assert result["messages"][1].id == "ai-1"
|
||||||
|
assert result["messages"][2].tool_call_id == "call-1"
|
||||||
|
|
||||||
|
|
||||||
def test_build_run_config_basic():
|
def test_build_run_config_basic():
|
||||||
from app.gateway.services import build_run_config
|
from app.gateway.services import build_run_config
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user