fix(frontend): render user messages as plain text and cap blockquote nesting (#3502)

* fix(frontend): render user messages as plain text and cap blockquote nesting User messages are typed or pasted plain text, not authored Markdown, but they were rendered through the full Streamdown pipeline. Pasted source files got fragmented (indented chunks become code blocks, paragraphs collapse and lose indentation), "$...$" spans were KaTeX-ified, and a message with thousands of nested ">" markers overflowed the call stack in marked's recursive blockquote lexer, permanently crashing the thread on every load. Render human message content verbatim with pre-wrap instead, and cap blockquote nesting at 100 levels at the Streamdown chokepoint so model output cannot trigger the same recursion either. Closes #3500 * fix(frontend): absorb marked lexer crashes with a render fallback boundary Review found two gaps in the nesting cap: marked's list and blockquote tokenizers are mutually recursive, so a list marker in front of the quote chain ("- > > > ...") bypassed the blockquote-only regex and still overflowed the stack; and the line-based rewrite was fence-blind, silently truncating literal ">" runs inside code blocks. Add an error boundary around Streamdown that renders the raw content as plain pre-wrap text when rendering throws (retrying on the next content change), keep the cap as a fast path for the dominant pure-">" case, and make it skip fenced and indented code lines.
2026-06-15 11:56:01 +00:00 · 2026-06-12 16:15:40 +08:00
parent aa015462a7
commit 503eeac788
6 changed files with 314 additions and 30 deletions
@@ -2,6 +2,59 @@ import { normalizeMermaidMarkdown } from "./mermaid";

 const MERMAID_BLOCK_HINT_RE = /mermaid/i;

+// marked's blockquote tokenizer (used by Streamdown to split content into
+// memoizable blocks) recurses once per nesting level and overflows the call
+// stack at roughly 2,000 levels, replacing the whole chat route with an error
+// page. 100 levels is far beyond any legitimate content while keeping a wide
+// margin below the crash threshold.
+const MAX_BLOCKQUOTE_DEPTH = 100;
+const DEEP_BLOCKQUOTE_HINT_RE = new RegExp(
+  `^(?:[ \\t]*>){${MAX_BLOCKQUOTE_DEPTH + 1}}`,
+  "m",
+);
+// Only up to 3 leading spaces can start a blockquote; 4+ (or a tab) is an
+// indented code block, where ">" runs are literal content.
+const BLOCKQUOTE_PREFIX_RE = /^ {0,3}(?:[ \t]*>)+/;
+const CODE_FENCE_RE = /^ {0,3}(?:```|~~~)/;
+const INDENTED_CODE_RE = /^(?: {4}|\t)/;
+
+export function capBlockquoteNesting(markdown: string): string {
+  if (!DEEP_BLOCKQUOTE_HINT_RE.test(markdown)) {
+    return markdown;
+  }
+
+  let insideFence = false;
+  return markdown
+    .split("\n")
+    .map((line) => {
+      if (CODE_FENCE_RE.test(line)) {
+        insideFence = !insideFence;
+        return line;
+      }
+      // ">" runs inside fenced or indented code blocks are literal text, not
+      // nesting — rewriting them would silently corrupt code content.
+      if (insideFence || INDENTED_CODE_RE.test(line)) {
+        return line;
+      }
+      const match = BLOCKQUOTE_PREFIX_RE.exec(line);
+      if (!match) {
+        return line;
+      }
+      const prefix = match[0];
+      let depth = 0;
+      for (let i = 0; i < prefix.length; i++) {
+        if (prefix[i] === ">") {
+          depth += 1;
+          if (depth > MAX_BLOCKQUOTE_DEPTH) {
+            return line.slice(0, i) + line.slice(prefix.length);
+          }
+        }
+      }
+      return line;
+    })
+    .join("\n");
+}
+
 export function preprocessStreamdownMarkdown(markdown: string): string {
  if (!MERMAID_BLOCK_HINT_RE.test(markdown) || !markdown.includes("-.->")) {
    return markdown;