fix(frontend): render user messages as plain text and cap blockquote nesting (#3502)

* fix(frontend): render user messages as plain text and cap blockquote nesting

User messages are typed or pasted plain text, not authored Markdown, but
they were rendered through the full Streamdown pipeline. Pasted source
files got fragmented (indented chunks become code blocks, paragraphs
collapse and lose indentation), "$...$" spans were KaTeX-ified, and a
message with thousands of nested ">" markers overflowed the call stack
in marked's recursive blockquote lexer, permanently crashing the thread
on every load.

Render human message content verbatim with pre-wrap instead, and cap
blockquote nesting at 100 levels at the Streamdown chokepoint so model
output cannot trigger the same recursion either.

Closes #3500

* fix(frontend): absorb marked lexer crashes with a render fallback boundary

Review found two gaps in the nesting cap: marked's list and blockquote
tokenizers are mutually recursive, so a list marker in front of the
quote chain ("- > > > ...") bypassed the blockquote-only regex and
still overflowed the stack; and the line-based rewrite was fence-blind,
silently truncating literal ">" runs inside code blocks.

Add an error boundary around Streamdown that renders the raw content as
plain pre-wrap text when rendering throws (retrying on the next content
change), keep the cap as a fast path for the dominant pure-">" case,
and make it skip fenced and indented code lines.
This commit is contained in:
Xinmin Zeng
2026-06-12 16:15:40 +08:00
committed by GitHub
parent aa015462a7
commit 503eeac788
6 changed files with 314 additions and 30 deletions
@@ -1,9 +1,10 @@
"use client";
import { type ComponentProps } from "react";
import { Component, useMemo, type ComponentProps, type ReactNode } from "react";
import { Streamdown } from "streamdown";
import { installClipboardFallback } from "@/core/clipboard";
import { capBlockquoteNesting } from "@/core/streamdown/preprocess";
export type ClipboardSafeStreamdownProps = ComponentProps<typeof Streamdown>;
@@ -12,6 +13,61 @@ if (typeof document !== "undefined") {
installClipboardFallback();
}
export function ClipboardSafeStreamdown(props: ClipboardSafeStreamdownProps) {
return <Streamdown {...props} />;
// marked (used by Streamdown to split content into blocks) has mutually
// recursive tokenizers — blockquote/list nesting a couple thousand levels
// deep overflows the call stack during render and would otherwise take down
// the whole route. When rendering a message throws, fall back to showing
// that message as plain pre-formatted text instead.
class StreamdownFallbackBoundary extends Component<
{ raw: ClipboardSafeStreamdownProps["children"]; children: ReactNode },
{ errored: boolean; prevRaw: ClipboardSafeStreamdownProps["children"] }
> {
state = { errored: false, prevRaw: this.props.raw };
static getDerivedStateFromError() {
return { errored: true };
}
static getDerivedStateFromProps(
props: { raw: ClipboardSafeStreamdownProps["children"] },
state: {
errored: boolean;
prevRaw: ClipboardSafeStreamdownProps["children"];
},
) {
// Retry rendering when the content changes (e.g. the next streaming chunk).
if (props.raw !== state.prevRaw) {
return { errored: false, prevRaw: props.raw };
}
return null;
}
render() {
if (this.state.errored) {
return (
<div className="break-words whitespace-pre-wrap">
{typeof this.props.raw === "string" ? this.props.raw : null}
</div>
);
}
return this.props.children;
}
}
export function ClipboardSafeStreamdown({
children,
...props
}: ClipboardSafeStreamdownProps) {
// Fast path for the dominant pathological input (pure ">" chains) so the
// error boundary below rarely has to absorb a full stack overflow.
const safeChildren = useMemo(
() =>
typeof children === "string" ? capBlockquoteNesting(children) : children,
[children],
);
return (
<StreamdownFallbackBoundary raw={children}>
<Streamdown {...props}>{safeChildren}</Streamdown>
</StreamdownFallbackBoundary>
);
}
@@ -19,7 +19,6 @@ import { Loader } from "@/components/ai-elements/loader";
import {
Message as AIElementMessage,
MessageContent as AIElementMessageContent,
MessageResponse as AIElementMessageResponse,
MessageToolbar,
} from "@/components/ai-elements/message";
import {
@@ -44,7 +43,6 @@ import {
type FileInMessage,
} from "@/core/messages/utils";
import { useRehypeSplitWordsIntoSpans } from "@/core/rehype";
import { humanMessagePlugins } from "@/core/streamdown";
import { cn } from "@/lib/utils";
import { CopyButton } from "../copy-button";
@@ -300,17 +298,10 @@ function MessageContent_({
}
if (isHuman) {
const messageResponse = contentToDisplay ? (
<AIElementMessageResponse
className="break-words"
remarkPlugins={humanMessagePlugins.remarkPlugins}
rehypePlugins={humanMessagePlugins.rehypePlugins}
components={components}
parseIncompleteMarkdown={false}
>
{contentToDisplay}
</AIElementMessageResponse>
) : null;
// Composer input is plain text, not authored Markdown. Parsing it as
// Markdown mangles pasted code/logs (indented lines become code blocks,
// "$...$" spans become math) and lets pathological input crash the page
// through marked's recursive blockquote lexer, so render it verbatim.
return (
<div
className={cn(
@@ -319,9 +310,11 @@ function MessageContent_({
)}
>
{filesList}
{messageResponse && (
{contentToDisplay && (
<AIElementMessageContent className="w-full max-w-full">
{messageResponse}
<div className="break-words whitespace-pre-wrap">
{contentToDisplay}
</div>
</AIElementMessageContent>
)}
</div>
-12
View File
@@ -36,15 +36,3 @@ export const reasoningPlugins = {
(p) => p !== rehypeRaw,
) as StreamdownProps["rehypePlugins"],
};
// Plugins for human messages - no autolink to prevent URL bleeding into adjacent text
export const humanMessagePlugins = {
remarkPlugins: [
// Use remark-gfm without autolink literals by not including it
// Only include math support for human messages
[remarkMath, { singleDollarTextMath: true }],
] as StreamdownProps["remarkPlugins"],
rehypePlugins: [
[rehypeKatex, { output: "html" }],
] as StreamdownProps["rehypePlugins"],
};
@@ -2,6 +2,59 @@ import { normalizeMermaidMarkdown } from "./mermaid";
const MERMAID_BLOCK_HINT_RE = /mermaid/i;
// marked's blockquote tokenizer (used by Streamdown to split content into
// memoizable blocks) recurses once per nesting level and overflows the call
// stack at roughly 2,000 levels, replacing the whole chat route with an error
// page. 100 levels is far beyond any legitimate content while keeping a wide
// margin below the crash threshold.
const MAX_BLOCKQUOTE_DEPTH = 100;
const DEEP_BLOCKQUOTE_HINT_RE = new RegExp(
`^(?:[ \\t]*>){${MAX_BLOCKQUOTE_DEPTH + 1}}`,
"m",
);
// Only up to 3 leading spaces can start a blockquote; 4+ (or a tab) is an
// indented code block, where ">" runs are literal content.
const BLOCKQUOTE_PREFIX_RE = /^ {0,3}(?:[ \t]*>)+/;
const CODE_FENCE_RE = /^ {0,3}(?:```|~~~)/;
const INDENTED_CODE_RE = /^(?: {4}|\t)/;
export function capBlockquoteNesting(markdown: string): string {
if (!DEEP_BLOCKQUOTE_HINT_RE.test(markdown)) {
return markdown;
}
let insideFence = false;
return markdown
.split("\n")
.map((line) => {
if (CODE_FENCE_RE.test(line)) {
insideFence = !insideFence;
return line;
}
// ">" runs inside fenced or indented code blocks are literal text, not
// nesting — rewriting them would silently corrupt code content.
if (insideFence || INDENTED_CODE_RE.test(line)) {
return line;
}
const match = BLOCKQUOTE_PREFIX_RE.exec(line);
if (!match) {
return line;
}
const prefix = match[0];
let depth = 0;
for (let i = 0; i < prefix.length; i++) {
if (prefix[i] === ">") {
depth += 1;
if (depth > MAX_BLOCKQUOTE_DEPTH) {
return line.slice(0, i) + line.slice(prefix.length);
}
}
}
return line;
})
.join("\n");
}
export function preprocessStreamdownMarkdown(markdown: string): string {
if (!MERMAID_BLOCK_HINT_RE.test(markdown) || !markdown.includes("-.->")) {
return markdown;