fix(frontend): cap deeply nested list indentation to prevent render crash (#3393) (#3570)

* fix(frontend): cap deeply nested list indentation to prevent render crash

Deeply nested lists make marked's recursive list tokenizer overflow the
call stack during Streamdown's lexing useMemo, throwing an uncaught
"RangeError: Maximum call stack size exceeded" that replaces the chat
route with an error page (issue #3393); on larger stacks the same input
exhausts the heap, which the render error boundary cannot catch.

Mirror the existing capBlockquoteNesting guard with capListNesting, which
clamps leading whitespace to 200 columns (~100 nesting levels) only when
pathologically deep indentation is present, leaving normal content and
fenced code untouched. Wire both through capMarkdownNesting.

* fix(frontend): satisfy prettier format check in preprocess test

* fix(frontend): exempt indented code from list-indent cap (PR #3570 review)

* fix(frontend): keep capping all deep indentation outside fenced code

Revert the indented-code exemption from the PR #3570 review nit. Taken
literally the suggested guard (insideFence || INDENTED_CODE_RE.test(line))
no-ops capListNesting, because INDENTED_CODE_RE matches every line with
4+ leading spaces — i.e. exactly the deep-indent lines the cap targets.
A context-aware exemption (only treat 4+-space lines as code after a
blank line) instead reopens the crash: blank-separated deeply nested list
items get exempted and still blow up marked (verified: OOM at depth ~1.5k).

Unlike blockquotes (markers take <=3 leading spaces, so deep-quote lines
never look like indented code), list vs. indented-code indentation is
ambiguous line-by-line, so any exemption is exploitable. Keep capping all
deep indentation outside fenced code; the only cost is mild corruption of
a >200-column indented-code line, which never occurs in real content and
is strictly preferable to a render crash. Add a regression test locking
the blank-line case.
This commit is contained in:
Eilen Shin
2026-06-14 22:19:54 +08:00
committed by GitHub
parent 34e126ee4b
commit 25fbd25b05
3 changed files with 101 additions and 4 deletions
@@ -4,7 +4,7 @@ import { Component, useMemo, type ComponentProps, type ReactNode } from "react";
import { Streamdown } from "streamdown"; import { Streamdown } from "streamdown";
import { installClipboardFallback } from "@/core/clipboard"; import { installClipboardFallback } from "@/core/clipboard";
import { capBlockquoteNesting } from "@/core/streamdown/preprocess"; import { capMarkdownNesting } from "@/core/streamdown/preprocess";
export type ClipboardSafeStreamdownProps = ComponentProps<typeof Streamdown>; export type ClipboardSafeStreamdownProps = ComponentProps<typeof Streamdown>;
@@ -58,11 +58,14 @@ export function ClipboardSafeStreamdown({
children, children,
...props ...props
}: ClipboardSafeStreamdownProps) { }: ClipboardSafeStreamdownProps) {
// Fast path for the dominant pathological input (pure ">" chains) so the // Fast path for the dominant pathological inputs (deep ">" chains and deeply
// error boundary below rarely has to absorb a full stack overflow. // nested lists both blow up marked's recursive tokenizers) so the error
// boundary below rarely has to absorb a stack overflow — and never has to
// face the heap exhaustion the same lists cause on larger stacks, which it
// cannot catch.
const safeChildren = useMemo( const safeChildren = useMemo(
() => () =>
typeof children === "string" ? capBlockquoteNesting(children) : children, typeof children === "string" ? capMarkdownNesting(children) : children,
[children], [children],
); );
return ( return (
@@ -18,6 +18,18 @@ const BLOCKQUOTE_PREFIX_RE = /^ {0,3}(?:[ \t]*>)+/;
const CODE_FENCE_RE = /^ {0,3}(?:```|~~~)/; const CODE_FENCE_RE = /^ {0,3}(?:```|~~~)/;
const INDENTED_CODE_RE = /^(?: {4}|\t)/; const INDENTED_CODE_RE = /^(?: {4}|\t)/;
// marked's list tokenizer recurses once per nesting level too (list ->
// blockTokens -> list -> ...). In the browser's tighter stack a deeply nested
// list overflows during render and throws "Maximum call stack size exceeded"
// from inside Streamdown's lexing useMemo (see issue #3393); on larger stacks
// the same input instead goes quadratic and exhausts the heap. Each list level
// requires at least ~2 columns of indentation, so capping leading whitespace at
// 200 columns bounds the effective nesting near 100 levels — far beyond any
// legitimate content while keeping marked safe. Anything indented past this is
// pathological nesting, not prose or code.
const MAX_LIST_INDENT = 200;
const DEEP_INDENT_HINT_RE = new RegExp(`^[ \\t]{${MAX_LIST_INDENT + 1},}`, "m");
export function capBlockquoteNesting(markdown: string): string { export function capBlockquoteNesting(markdown: string): string {
if (!DEEP_BLOCKQUOTE_HINT_RE.test(markdown)) { if (!DEEP_BLOCKQUOTE_HINT_RE.test(markdown)) {
return markdown; return markdown;
@@ -55,6 +67,39 @@ export function capBlockquoteNesting(markdown: string): string {
.join("\n"); .join("\n");
} }
export function capListNesting(markdown: string): string {
if (!DEEP_INDENT_HINT_RE.test(markdown)) {
return markdown;
}
let insideFence = false;
return markdown
.split("\n")
.map((line) => {
if (CODE_FENCE_RE.test(line)) {
insideFence = !insideFence;
return line;
}
// Indentation inside fenced code is literal layout (ASCII art, pasted
// source); collapsing it would corrupt the rendered block.
if (insideFence) {
return line;
}
const whitespace = /^[ \t]*/.exec(line)![0];
if (whitespace.length <= MAX_LIST_INDENT) {
return line;
}
return " ".repeat(MAX_LIST_INDENT) + line.slice(whitespace.length);
})
.join("\n");
}
// Cap every runaway nesting construct that can take down a message render
// before marked sees the content.
export function capMarkdownNesting(markdown: string): string {
return capListNesting(capBlockquoteNesting(markdown));
}
export function preprocessStreamdownMarkdown(markdown: string): string { export function preprocessStreamdownMarkdown(markdown: string): string {
if (!MERMAID_BLOCK_HINT_RE.test(markdown) || !markdown.includes("-.->")) { if (!MERMAID_BLOCK_HINT_RE.test(markdown) || !markdown.includes("-.->")) {
return markdown; return markdown;
@@ -2,6 +2,8 @@ import { expect, test } from "vitest";
import { import {
capBlockquoteNesting, capBlockquoteNesting,
capListNesting,
capMarkdownNesting,
preprocessStreamdownMarkdown, preprocessStreamdownMarkdown,
} from "@/core/streamdown/preprocess"; } from "@/core/streamdown/preprocess";
@@ -51,6 +53,53 @@ test("capBlockquoteNesting only rewrites pathological lines", () => {
expect(lines[2]).toBe("plain"); expect(lines[2]).toBe("plain");
}); });
test("capListNesting returns normally indented content unchanged", () => {
const input = "- a\n - b\n - c\n\n code continuation";
expect(capListNesting(input)).toBe(input);
});
test("capListNesting caps pathologically deep list indentation", () => {
const deep = " ".repeat(2000) + "- x";
const result = capListNesting(deep);
const indent = /^[ \t]*/.exec(result)![0];
expect(indent.length).toBe(200);
expect(result.endsWith("- x")).toBe(true);
});
test("capListNesting leaves fenced code content untouched", () => {
const literal = " ".repeat(400) + "deeply indented ascii art";
const input = `\`\`\`text\n${literal}\n\`\`\``;
expect(capListNesting(input).split("\n")[1]).toBe(literal);
});
// Outside a fence, deep indentation is capped regardless of blank-line context:
// we cannot tell an indented-code line from deeply nested list content (both can
// follow a blank line), and exempting either reopens the crash — blank-separated
// deep-indent lists otherwise blow up marked just like contiguous ones.
test("capListNesting caps deep indentation even after a blank line", () => {
const input = `- a\n\n${" ".repeat(500)}- deep`;
const lines = capListNesting(input).split("\n");
expect(/^[ \t]*/.exec(lines[2]!)![0].length).toBe(200);
});
test("capListNesting only rewrites pathological lines", () => {
const normal = " indented paragraph";
const deep = " ".repeat(500) + "- deep";
const result = capListNesting(`${normal}\n${deep}\nplain`);
const lines = result.split("\n");
expect(lines[0]).toBe(normal);
expect(/^[ \t]*/.exec(lines[1]!)![0].length).toBe(200);
expect(lines[2]).toBe("plain");
});
test("capMarkdownNesting caps both blockquote and list nesting", () => {
const input = `${"> ".repeat(3000)}quote\n${" ".repeat(500)}- item`;
const result = capMarkdownNesting(input);
const lines = result.split("\n");
expect((lines[0]?.match(/>/g) ?? []).length).toBe(100);
expect(/^[ \t]*/.exec(lines[1]!)![0].length).toBe(200);
});
test("preprocessStreamdownMarkdown leaves non-mermaid content unchanged", () => { test("preprocessStreamdownMarkdown leaves non-mermaid content unchanged", () => {
const input = "just some text"; const input = "just some text";
expect(preprocessStreamdownMarkdown(input)).toBe(input); expect(preprocessStreamdownMarkdown(input)).toBe(input);