mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-22 07:56:48 +00:00
* fix(loop-detection): keep tool-call pairing on warn injection (#2724) * make format * fix(loop-detection): avoid IMMessage leak to downstream consumer * fix(channels): filter loop warning text from IM replies
This commit is contained in:
@@ -146,6 +146,13 @@ def _normalize_custom_agent_name(raw_value: str) -> str:
|
|||||||
return normalized
|
return normalized
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_loop_warning_text(text: str) -> str:
|
||||||
|
"""Remove middleware-authored loop warning lines from display text."""
|
||||||
|
if "[LOOP DETECTED]" not in text:
|
||||||
|
return text
|
||||||
|
return "\n".join(line for line in text.splitlines() if "[LOOP DETECTED]" not in line).strip()
|
||||||
|
|
||||||
|
|
||||||
def _extract_response_text(result: dict | list) -> str:
|
def _extract_response_text(result: dict | list) -> str:
|
||||||
"""Extract the last AI message text from a LangGraph runs.wait result.
|
"""Extract the last AI message text from a LangGraph runs.wait result.
|
||||||
|
|
||||||
@@ -155,7 +162,7 @@ def _extract_response_text(result: dict | list) -> str:
|
|||||||
Handles special cases:
|
Handles special cases:
|
||||||
- Regular AI text responses
|
- Regular AI text responses
|
||||||
- Clarification interrupts (``ask_clarification`` tool messages)
|
- Clarification interrupts (``ask_clarification`` tool messages)
|
||||||
- AI messages with tool_calls but no text content
|
- Strips loop-detection warnings attached to tool-call AI messages
|
||||||
"""
|
"""
|
||||||
if isinstance(result, list):
|
if isinstance(result, list):
|
||||||
messages = result
|
messages = result
|
||||||
@@ -185,7 +192,12 @@ def _extract_response_text(result: dict | list) -> str:
|
|||||||
# Regular AI message with text content
|
# Regular AI message with text content
|
||||||
if msg_type == "ai":
|
if msg_type == "ai":
|
||||||
content = msg.get("content", "")
|
content = msg.get("content", "")
|
||||||
|
has_tool_calls = bool(msg.get("tool_calls"))
|
||||||
if isinstance(content, str) and content:
|
if isinstance(content, str) and content:
|
||||||
|
if has_tool_calls:
|
||||||
|
content = _strip_loop_warning_text(content)
|
||||||
|
if not content:
|
||||||
|
continue
|
||||||
return content
|
return content
|
||||||
# content can be a list of content blocks
|
# content can be a list of content blocks
|
||||||
if isinstance(content, list):
|
if isinstance(content, list):
|
||||||
@@ -196,6 +208,8 @@ def _extract_response_text(result: dict | list) -> str:
|
|||||||
elif isinstance(block, str):
|
elif isinstance(block, str):
|
||||||
parts.append(block)
|
parts.append(block)
|
||||||
text = "".join(parts)
|
text = "".join(parts)
|
||||||
|
if has_tool_calls:
|
||||||
|
text = _strip_loop_warning_text(text)
|
||||||
if text:
|
if text:
|
||||||
return text
|
return text
|
||||||
return ""
|
return ""
|
||||||
|
|||||||
@@ -22,7 +22,6 @@ from typing import override
|
|||||||
|
|
||||||
from langchain.agents import AgentState
|
from langchain.agents import AgentState
|
||||||
from langchain.agents.middleware import AgentMiddleware
|
from langchain.agents.middleware import AgentMiddleware
|
||||||
from langchain_core.messages import HumanMessage
|
|
||||||
from langgraph.runtime import Runtime
|
from langgraph.runtime import Runtime
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -356,13 +355,30 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
|
|||||||
return {"messages": [stripped_msg]}
|
return {"messages": [stripped_msg]}
|
||||||
|
|
||||||
if warning:
|
if warning:
|
||||||
# Inject as HumanMessage instead of SystemMessage to avoid
|
# WORKAROUND for v2.0-m1 — see #2724.
|
||||||
# Anthropic's "multiple non-consecutive system messages" error.
|
#
|
||||||
# Anthropic models require system messages only at the start of
|
# Append the warning to the AIMessage content instead of
|
||||||
# the conversation; injecting one mid-conversation crashes
|
# injecting a separate HumanMessage. Inserting any non-tool
|
||||||
# langchain_anthropic's _format_messages(). HumanMessage works
|
# message between an AIMessage(tool_calls=...) and its
|
||||||
# with all providers. See #1299.
|
# ToolMessage responses breaks OpenAI/Moonshot strict pairing
|
||||||
return {"messages": [HumanMessage(content=warning, name="loop_warning")]}
|
# validation ("tool_call_ids did not have response messages")
|
||||||
|
# because the tools node has not run yet at after_model time.
|
||||||
|
# tool_calls are preserved so the tools node still executes.
|
||||||
|
#
|
||||||
|
# This is a temporary mitigation: mutating an existing
|
||||||
|
# AIMessage to carry framework-authored text leaks loop-warning
|
||||||
|
# text into downstream consumers (MemoryMiddleware fact
|
||||||
|
# extraction, TitleMiddleware, telemetry, model replay) as if
|
||||||
|
# the model said it. The proper fix is to defer warning
|
||||||
|
# injection from after_model to wrap_model_call so every prior
|
||||||
|
# ToolMessage is already in the request — see RFC #2517 (which
|
||||||
|
# lists "loop intervention does not leave invalid
|
||||||
|
# tool-call/tool-message state" as acceptance criteria) and
|
||||||
|
# the prototype on `fix/loop-detection-tool-call-pairing`.
|
||||||
|
messages = state.get("messages", [])
|
||||||
|
last_msg = messages[-1]
|
||||||
|
patched_msg = last_msg.model_copy(update={"content": self._append_text(last_msg.content, warning)})
|
||||||
|
return {"messages": [patched_msg]}
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|||||||
@@ -372,6 +372,37 @@ class TestExtractResponseText:
|
|||||||
# Should return "" (no text in current turn), NOT "Hi there!" from previous turn
|
# Should return "" (no text in current turn), NOT "Hi there!" from previous turn
|
||||||
assert _extract_response_text(result) == ""
|
assert _extract_response_text(result) == ""
|
||||||
|
|
||||||
|
def test_does_not_publish_loop_warning_on_tool_calling_ai_message(self):
|
||||||
|
"""Loop-detection warning text on a tool-calling AI message is middleware-authored."""
|
||||||
|
from app.channels.manager import _extract_response_text
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"messages": [
|
||||||
|
{"type": "human", "content": "search the repo"},
|
||||||
|
{
|
||||||
|
"type": "ai",
|
||||||
|
"content": "[LOOP DETECTED] You are repeating the same tool calls.",
|
||||||
|
"tool_calls": [{"name": "grep", "args": {"pattern": "TODO"}, "id": "call_1"}],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
assert _extract_response_text(result) == ""
|
||||||
|
|
||||||
|
def test_preserves_visible_text_when_stripping_loop_warning(self):
|
||||||
|
from app.channels.manager import _extract_response_text
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"messages": [
|
||||||
|
{"type": "human", "content": "prepare the report"},
|
||||||
|
{
|
||||||
|
"type": "ai",
|
||||||
|
"content": "Here is the report.\n\n[LOOP DETECTED] You are repeating the same tool calls.",
|
||||||
|
"tool_calls": [{"name": "present_files", "args": {"filepaths": ["/mnt/user-data/outputs/report.md"]}, "id": "call_1"}],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
assert _extract_response_text(result) == "Here is the report."
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# ChannelManager tests
|
# ChannelManager tests
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
import copy
|
import copy
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
|
from langchain_core.messages import AIMessage, SystemMessage
|
||||||
|
|
||||||
from deerflow.agents.middlewares.loop_detection_middleware import (
|
from deerflow.agents.middlewares.loop_detection_middleware import (
|
||||||
_HARD_STOP_MSG,
|
_HARD_STOP_MSG,
|
||||||
@@ -146,14 +146,42 @@ class TestLoopDetection:
|
|||||||
for _ in range(2):
|
for _ in range(2):
|
||||||
mw._apply(_make_state(tool_calls=call), runtime)
|
mw._apply(_make_state(tool_calls=call), runtime)
|
||||||
|
|
||||||
# Third identical call triggers warning
|
# Third identical call triggers warning. The warning is appended to
|
||||||
|
# the AIMessage content (tool_calls preserved) — never inserted as a
|
||||||
|
# separate HumanMessage between the AIMessage(tool_calls) and its
|
||||||
|
# ToolMessage responses, which would break OpenAI/Moonshot strict
|
||||||
|
# tool-call pairing validation.
|
||||||
result = mw._apply(_make_state(tool_calls=call), runtime)
|
result = mw._apply(_make_state(tool_calls=call), runtime)
|
||||||
assert result is not None
|
assert result is not None
|
||||||
msgs = result["messages"]
|
msgs = result["messages"]
|
||||||
assert len(msgs) == 1
|
assert len(msgs) == 1
|
||||||
assert isinstance(msgs[0], HumanMessage)
|
assert isinstance(msgs[0], AIMessage)
|
||||||
|
assert len(msgs[0].tool_calls) == len(call)
|
||||||
|
assert msgs[0].tool_calls[0]["id"] == call[0]["id"]
|
||||||
assert "LOOP DETECTED" in msgs[0].content
|
assert "LOOP DETECTED" in msgs[0].content
|
||||||
|
|
||||||
|
def test_warn_does_not_break_tool_call_pairing(self):
|
||||||
|
"""Regression: the warn branch must NOT inject a non-tool message
|
||||||
|
after an AIMessage(tool_calls=...). Moonshot/OpenAI reject the next
|
||||||
|
request with 'tool_call_ids did not have response messages' if any
|
||||||
|
non-tool message is wedged between the AIMessage and its ToolMessage
|
||||||
|
responses. See #2029.
|
||||||
|
"""
|
||||||
|
mw = LoopDetectionMiddleware(warn_threshold=3, hard_limit=10)
|
||||||
|
runtime = _make_runtime()
|
||||||
|
call = [_bash_call("ls")]
|
||||||
|
|
||||||
|
for _ in range(2):
|
||||||
|
mw._apply(_make_state(tool_calls=call), runtime)
|
||||||
|
|
||||||
|
result = mw._apply(_make_state(tool_calls=call), runtime)
|
||||||
|
assert result is not None
|
||||||
|
msgs = result["messages"]
|
||||||
|
assert len(msgs) == 1
|
||||||
|
assert isinstance(msgs[0], AIMessage)
|
||||||
|
assert len(msgs[0].tool_calls) == len(call)
|
||||||
|
assert msgs[0].tool_calls[0]["id"] == call[0]["id"]
|
||||||
|
|
||||||
def test_warn_only_injected_once(self):
|
def test_warn_only_injected_once(self):
|
||||||
"""Warning for the same hash should only be injected once per thread."""
|
"""Warning for the same hash should only be injected once per thread."""
|
||||||
mw = LoopDetectionMiddleware(warn_threshold=3, hard_limit=10)
|
mw = LoopDetectionMiddleware(warn_threshold=3, hard_limit=10)
|
||||||
@@ -483,7 +511,11 @@ class TestToolFrequencyDetection:
|
|||||||
result = mw._apply(_make_state(tool_calls=[self._read_call("/file_4.py")]), runtime)
|
result = mw._apply(_make_state(tool_calls=[self._read_call("/file_4.py")]), runtime)
|
||||||
assert result is not None
|
assert result is not None
|
||||||
msg = result["messages"][0]
|
msg = result["messages"][0]
|
||||||
assert isinstance(msg, HumanMessage)
|
# Warning is appended to the AIMessage content; tool_calls preserved
|
||||||
|
# so the tools node still runs and Moonshot/OpenAI tool-call pairing
|
||||||
|
# validation does not break.
|
||||||
|
assert isinstance(msg, AIMessage)
|
||||||
|
assert msg.tool_calls
|
||||||
assert "read_file" in msg.content
|
assert "read_file" in msg.content
|
||||||
assert "LOOP DETECTED" in msg.content
|
assert "LOOP DETECTED" in msg.content
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user