mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-26 09:55:59 +00:00
fix(frontend): strip unclosed <think> tags from streaming AI content (#3218)
* fix(frontend): strip unclosed <think> tags from streaming AI content During streaming, an opening <think> tag may arrive in one chunk while the matching </think> arrives in a later chunk. The existing splitInlineReasoning regex only matched fully closed pairs, so the mid-flight reasoning was left in message.content and rendered into the chat bubble via the markdown pipeline's rehypeRaw plugin until the closing tag landed. Extend splitInlineReasoning with a second pass: after stripping every closed <think>...</think> pair, route any remaining content from a lone opener to the reasoning slot and leave only the preceding preamble in content. Closed-tag behavior is unchanged. Covers every provider whose stream emits reasoning inline as <think> tags (MiniMax streaming path, MindIE, PatchedChatOpenAI, and any gateway-served DeepSeek/OpenAI-compatible model). * style(frontend): apply prettier formatting to streaming reasoning tests * fix(frontend): skip <think> split for literal think tags in inline code Treats a `<think>` opener immediately preceded by a backtick as part of markdown inline code rather than a streaming reasoning marker. Prevents permanent content truncation when an AI message documents the `<think>` tag literally (e.g. ``Use `<think>` markers``), where the streaming-safe fallback would otherwise route the rest of the answer into the reasoning panel because no `</think>` ever arrives. Adds regression tests for both the post-stream and mid-stream cases.
This commit is contained in:
@@ -266,22 +266,42 @@ export function extractTextFromMessage(message: Message) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const THINK_OPEN_TAG = "<think>";
|
||||
const THINK_TAG_RE = /<think>\s*([\s\S]*?)\s*<\/think>/g;
|
||||
|
||||
function splitInlineReasoning(content: string) {
|
||||
const reasoningParts: string[] = [];
|
||||
const cleaned = content
|
||||
.replace(THINK_TAG_RE, (_, reasoning: string) => {
|
||||
|
||||
// First pass: strip every fully closed `<think>...</think>` pair and
|
||||
// collect its body as reasoning.
|
||||
let cleaned = content.replace(THINK_TAG_RE, (_, reasoning: string) => {
|
||||
const normalized = reasoning.trim();
|
||||
if (normalized) {
|
||||
reasoningParts.push(normalized);
|
||||
}
|
||||
return "";
|
||||
})
|
||||
.trim();
|
||||
});
|
||||
|
||||
// Streaming-safe pass: a `<think>` opener whose `</think>` has not arrived
|
||||
// yet means the rest of the chunk is reasoning in flight. Route it into the
|
||||
// reasoning slot instead of letting it render as message content (the
|
||||
// raw-HTML markdown pipeline would otherwise paint the inner text on
|
||||
// screen until the closing tag lands).
|
||||
//
|
||||
// Skip when the opener sits right after a backtick — that is the model
|
||||
// talking about `<think>` literally inside markdown inline code, not
|
||||
// actually streaming reasoning.
|
||||
const openTagIndex = cleaned.indexOf(THINK_OPEN_TAG);
|
||||
if (openTagIndex !== -1 && cleaned[openTagIndex - 1] !== "`") {
|
||||
const tail = cleaned.slice(openTagIndex + THINK_OPEN_TAG.length).trim();
|
||||
if (tail) {
|
||||
reasoningParts.push(tail);
|
||||
}
|
||||
cleaned = cleaned.slice(0, openTagIndex);
|
||||
}
|
||||
|
||||
return {
|
||||
content: cleaned,
|
||||
content: cleaned.trim(),
|
||||
reasoning: reasoningParts.length > 0 ? reasoningParts.join("\n\n") : null,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,14 +1,26 @@
|
||||
import type { Message } from "@langchain/langgraph-sdk";
|
||||
import { expect, test } from "vitest";
|
||||
import { describe, expect, test } from "vitest";
|
||||
|
||||
import {
|
||||
extractContentFromMessage,
|
||||
extractReasoningContentFromMessage,
|
||||
getAssistantTurnCopyData,
|
||||
getAssistantTurnUsageMessages,
|
||||
getMessageGroups,
|
||||
getStreamingMessageLookup,
|
||||
hasContent,
|
||||
hasReasoning,
|
||||
isAssistantMessageGroupStreaming,
|
||||
} from "@/core/messages/utils";
|
||||
|
||||
function aiMessage(content: string): Message {
|
||||
return {
|
||||
id: "ai-1",
|
||||
type: "ai",
|
||||
content,
|
||||
} as Message;
|
||||
}
|
||||
|
||||
test("aggregates token usage messages once per assistant turn", () => {
|
||||
const messages = [
|
||||
{
|
||||
@@ -67,6 +79,100 @@ test("aggregates token usage messages once per assistant turn", () => {
|
||||
).toEqual([null, null, ["ai-1", "ai-2"], null, ["ai-3"]]);
|
||||
});
|
||||
|
||||
describe("inline <think> tag splitting", () => {
|
||||
test("strips a fully closed <think> block from AI content", () => {
|
||||
const message = aiMessage("<think>internal reasoning</think>final answer");
|
||||
expect(extractContentFromMessage(message)).toBe("final answer");
|
||||
expect(extractReasoningContentFromMessage(message)).toBe(
|
||||
"internal reasoning",
|
||||
);
|
||||
});
|
||||
|
||||
test("strips multiple closed <think> blocks and joins their reasoning", () => {
|
||||
const message = aiMessage(
|
||||
"<think>step one</think>between<think>step two</think>after",
|
||||
);
|
||||
expect(extractContentFromMessage(message)).toBe("betweenafter");
|
||||
expect(extractReasoningContentFromMessage(message)).toBe(
|
||||
"step one\n\nstep two",
|
||||
);
|
||||
});
|
||||
|
||||
test("during streaming, an unclosed <think> tag does not leak its tail into content", () => {
|
||||
// Simulates accumulated content mid-stream, before </think> arrives.
|
||||
const message = aiMessage(
|
||||
"<think>I need to analyze the user's question step by",
|
||||
);
|
||||
expect(extractContentFromMessage(message)).toBe("");
|
||||
expect(extractContentFromMessage(message)).not.toContain("<think>");
|
||||
expect(extractReasoningContentFromMessage(message)).toBe(
|
||||
"I need to analyze the user's question step by",
|
||||
);
|
||||
});
|
||||
|
||||
test("preamble before an unclosed <think> stays in content", () => {
|
||||
const message = aiMessage(
|
||||
"Here is part of the answer.<think>but wait, let me reconsider",
|
||||
);
|
||||
expect(extractContentFromMessage(message)).toBe(
|
||||
"Here is part of the answer.",
|
||||
);
|
||||
expect(extractReasoningContentFromMessage(message)).toBe(
|
||||
"but wait, let me reconsider",
|
||||
);
|
||||
});
|
||||
|
||||
test("closed <think> followed by a trailing unclosed <think> merges both into reasoning", () => {
|
||||
const message = aiMessage(
|
||||
"<think>first step</think>partial answer<think>second step still streaming",
|
||||
);
|
||||
expect(extractContentFromMessage(message)).toBe("partial answer");
|
||||
expect(extractReasoningContentFromMessage(message)).toBe(
|
||||
"first step\n\nsecond step still streaming",
|
||||
);
|
||||
});
|
||||
|
||||
test("hasReasoning recognises an unclosed <think> tag mid-stream", () => {
|
||||
expect(hasReasoning(aiMessage("<think>thinking in progress"))).toBe(true);
|
||||
});
|
||||
|
||||
test("hasContent excludes an unclosed <think> tail when no preamble exists", () => {
|
||||
expect(hasContent(aiMessage("<think>thinking in progress"))).toBe(false);
|
||||
});
|
||||
|
||||
test("hasContent stays true when preamble precedes an unclosed <think>", () => {
|
||||
expect(hasContent(aiMessage("preamble<think>still thinking"))).toBe(true);
|
||||
});
|
||||
|
||||
test("a lone <think> open tag with no body yields no reasoning and no content", () => {
|
||||
const message = aiMessage("<think>");
|
||||
expect(extractContentFromMessage(message)).toBe("");
|
||||
expect(extractReasoningContentFromMessage(message)).toBeNull();
|
||||
expect(hasReasoning(message)).toBe(false);
|
||||
});
|
||||
|
||||
test("a literal <think> inside markdown inline code is not treated as reasoning", () => {
|
||||
const message = aiMessage(
|
||||
"Use `<think>` markers to delimit reasoning sections.",
|
||||
);
|
||||
expect(extractContentFromMessage(message)).toBe(
|
||||
"Use `<think>` markers to delimit reasoning sections.",
|
||||
);
|
||||
expect(extractReasoningContentFromMessage(message)).toBeNull();
|
||||
expect(hasReasoning(message)).toBe(false);
|
||||
});
|
||||
|
||||
test("a backtick-prefixed <think> mid-stream is not split into reasoning", () => {
|
||||
// Simulates the moment the model has emitted the opening backtick and
|
||||
// `<think>` for a literal documentation reference, before the closing
|
||||
// backtick arrives. The pre-fix behaviour would have permanently
|
||||
// truncated the content here.
|
||||
const message = aiMessage("Documentation: `<think>");
|
||||
expect(extractContentFromMessage(message)).toBe("Documentation: `<think>");
|
||||
expect(extractReasoningContentFromMessage(message)).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
test("hides internal todo reminder messages from message groups", () => {
|
||||
const messages = [
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user