From 11dd5b0683b8bd2d5c562a653f2e4e21c681c874 Mon Sep 17 00:00:00 2001
From: AochenShen99 <sac1063467113@hotmail.com>
Date: Tue, 26 May 2026 09:35:07 +0800
Subject: [PATCH] fix(frontend): strip unclosed <think> tags from streaming AI
 content (#3218)

* fix(frontend): strip unclosed <think> tags from streaming AI content

During streaming, an opening <think> tag may arrive in one chunk
while the matching </think> arrives in a later chunk. The existing
splitInlineReasoning regex only matched fully closed pairs, so the
mid-flight reasoning was left in message.content and rendered into
the chat bubble via the markdown pipeline's rehypeRaw plugin until
the closing tag landed.

Extend splitInlineReasoning with a second pass: after stripping every
closed <think>...</think> pair, route any remaining content from a
lone opener to the reasoning slot and leave only the preceding
preamble in content. Closed-tag behavior is unchanged.

Covers every provider whose stream emits reasoning inline as <think>
tags (MiniMax streaming path, MindIE, PatchedChatOpenAI, and any
gateway-served DeepSeek/OpenAI-compatible model).

* style(frontend): apply prettier formatting to streaming reasoning tests

* fix(frontend): skip <think> split for literal think tags in inline code

Treats a `<think>` opener immediately preceded by a backtick as part of
markdown inline code rather than a streaming reasoning marker. Prevents
permanent content truncation when an AI message documents the `<think>`
tag literally (e.g. ``Use `<think>` markers``), where the streaming-safe
fallback would otherwise route the rest of the answer into the reasoning
panel because no `</think>` ever arrives.

Adds regression tests for both the post-stream and mid-stream cases.
---
 frontend/src/core/messages/utils.ts           |  40 +++++--
 .../tests/unit/core/messages/utils.test.ts    | 108 +++++++++++++++++-
 2 files changed, 137 insertions(+), 11 deletions(-)
diff --git a/frontend/src/core/messages/utils.ts b/frontend/src/core/messages/utils.ts
index 5863195b8..f1bbe4d07 100644
--- a/frontend/src/core/messages/utils.ts
+++ b/frontend/src/core/messages/utils.ts
@@ -266,22 +266,42 @@ export function extractTextFromMessage(message: Message) {
   return "";
 }
 
+const THINK_OPEN_TAG = "<think>";
 const THINK_TAG_RE = /<think>\s*([\s\S]*?)\s*<\/think>/g;
 
 function splitInlineReasoning(content: string) {
   const reasoningParts: string[] = [];
-  const cleaned = content
-    .replace(THINK_TAG_RE, (_, reasoning: string) => {
-      const normalized = reasoning.trim();
-      if (normalized) {
-        reasoningParts.push(normalized);
-      }
-      return "";
-    })
-    .trim();
+
+  // First pass: strip every fully closed `<think>...</think>` pair and
+  // collect its body as reasoning.
+  let cleaned = content.replace(THINK_TAG_RE, (_, reasoning: string) => {
+    const normalized = reasoning.trim();
+    if (normalized) {
+      reasoningParts.push(normalized);
+    }
+    return "";
+  });
+
+  // Streaming-safe pass: a `<think>` opener whose `</think>` has not arrived
+  // yet means the rest of the chunk is reasoning in flight. Route it into the
+  // reasoning slot instead of letting it render as message content (the
+  // raw-HTML markdown pipeline would otherwise paint the inner text on
+  // screen until the closing tag lands).
+  //
+  // Skip when the opener sits right after a backtick — that is the model
+  // talking about `<think>` literally inside markdown inline code, not
+  // actually streaming reasoning.
+  const openTagIndex = cleaned.indexOf(THINK_OPEN_TAG);
+  if (openTagIndex !== -1 && cleaned[openTagIndex - 1] !== "`") {
+    const tail = cleaned.slice(openTagIndex + THINK_OPEN_TAG.length).trim();
+    if (tail) {
+      reasoningParts.push(tail);
+    }
+    cleaned = cleaned.slice(0, openTagIndex);
+  }
 
   return {
-    content: cleaned,
+    content: cleaned.trim(),
     reasoning: reasoningParts.length > 0 ? reasoningParts.join("\n\n") : null,
   };
 }
diff --git a/frontend/tests/unit/core/messages/utils.test.ts b/frontend/tests/unit/core/messages/utils.test.ts
index 1cc456e22..b827c95eb 100644
--- a/frontend/tests/unit/core/messages/utils.test.ts
+++ b/frontend/tests/unit/core/messages/utils.test.ts
@@ -1,14 +1,26 @@
 import type { Message } from "@langchain/langgraph-sdk";
-import { expect, test } from "vitest";
+import { describe, expect, test } from "vitest";
 
 import {
+  extractContentFromMessage,
+  extractReasoningContentFromMessage,
   getAssistantTurnCopyData,
   getAssistantTurnUsageMessages,
   getMessageGroups,
   getStreamingMessageLookup,
+  hasContent,
+  hasReasoning,
   isAssistantMessageGroupStreaming,
 } from "@/core/messages/utils";
 
+function aiMessage(content: string): Message {
+  return {
+    id: "ai-1",
+    type: "ai",
+    content,
+  } as Message;
+}
+
 test("aggregates token usage messages once per assistant turn", () => {
   const messages = [
     {
@@ -67,6 +79,100 @@ test("aggregates token usage messages once per assistant turn", () => {
   ).toEqual([null, null, ["ai-1", "ai-2"], null, ["ai-3"]]);
 });
 
+describe("inline <think> tag splitting", () => {
+  test("strips a fully closed <think> block from AI content", () => {
+    const message = aiMessage("<think>internal reasoning</think>final answer");
+    expect(extractContentFromMessage(message)).toBe("final answer");
+    expect(extractReasoningContentFromMessage(message)).toBe(
+      "internal reasoning",
+    );
+  });
+
+  test("strips multiple closed <think> blocks and joins their reasoning", () => {
+    const message = aiMessage(
+      "<think>step one</think>between<think>step two</think>after",
+    );
+    expect(extractContentFromMessage(message)).toBe("betweenafter");
+    expect(extractReasoningContentFromMessage(message)).toBe(
+      "step one\n\nstep two",
+    );
+  });
+
+  test("during streaming, an unclosed <think> tag does not leak its tail into content", () => {
+    // Simulates accumulated content mid-stream, before </think> arrives.
+    const message = aiMessage(
+      "<think>I need to analyze the user's question step by",
+    );
+    expect(extractContentFromMessage(message)).toBe("");
+    expect(extractContentFromMessage(message)).not.toContain("<think>");
+    expect(extractReasoningContentFromMessage(message)).toBe(
+      "I need to analyze the user's question step by",
+    );
+  });
+
+  test("preamble before an unclosed <think> stays in content", () => {
+    const message = aiMessage(
+      "Here is part of the answer.<think>but wait, let me reconsider",
+    );
+    expect(extractContentFromMessage(message)).toBe(
+      "Here is part of the answer.",
+    );
+    expect(extractReasoningContentFromMessage(message)).toBe(
+      "but wait, let me reconsider",
+    );
+  });
+
+  test("closed <think> followed by a trailing unclosed <think> merges both into reasoning", () => {
+    const message = aiMessage(
+      "<think>first step</think>partial answer<think>second step still streaming",
+    );
+    expect(extractContentFromMessage(message)).toBe("partial answer");
+    expect(extractReasoningContentFromMessage(message)).toBe(
+      "first step\n\nsecond step still streaming",
+    );
+  });
+
+  test("hasReasoning recognises an unclosed <think> tag mid-stream", () => {
+    expect(hasReasoning(aiMessage("<think>thinking in progress"))).toBe(true);
+  });
+
+  test("hasContent excludes an unclosed <think> tail when no preamble exists", () => {
+    expect(hasContent(aiMessage("<think>thinking in progress"))).toBe(false);
+  });
+
+  test("hasContent stays true when preamble precedes an unclosed <think>", () => {
+    expect(hasContent(aiMessage("preamble<think>still thinking"))).toBe(true);
+  });
+
+  test("a lone <think> open tag with no body yields no reasoning and no content", () => {
+    const message = aiMessage("<think>");
+    expect(extractContentFromMessage(message)).toBe("");
+    expect(extractReasoningContentFromMessage(message)).toBeNull();
+    expect(hasReasoning(message)).toBe(false);
+  });
+
+  test("a literal <think> inside markdown inline code is not treated as reasoning", () => {
+    const message = aiMessage(
+      "Use `<think>` markers to delimit reasoning sections.",
+    );
+    expect(extractContentFromMessage(message)).toBe(
+      "Use `<think>` markers to delimit reasoning sections.",
+    );
+    expect(extractReasoningContentFromMessage(message)).toBeNull();
+    expect(hasReasoning(message)).toBe(false);
+  });
+
+  test("a backtick-prefixed <think> mid-stream is not split into reasoning", () => {
+    // Simulates the moment the model has emitted the opening backtick and
+    // `<think>` for a literal documentation reference, before the closing
+    // backtick arrives. The pre-fix behaviour would have permanently
+    // truncated the content here.
+    const message = aiMessage("Documentation: `<think>");
+    expect(extractContentFromMessage(message)).toBe("Documentation: `<think>");
+    expect(extractReasoningContentFromMessage(message)).toBeNull();
+  });
+});
+
 test("hides internal todo reminder messages from message groups", () => {
   const messages = [
     {