Files
deer-flow/frontend/src/core/streamdown/preprocess.ts
T
Eilen Shin 25fbd25b05 fix(frontend): cap deeply nested list indentation to prevent render crash (#3393) (#3570)
* fix(frontend): cap deeply nested list indentation to prevent render crash

Deeply nested lists make marked's recursive list tokenizer overflow the
call stack during Streamdown's lexing useMemo, throwing an uncaught
"RangeError: Maximum call stack size exceeded" that replaces the chat
route with an error page (issue #3393); on larger stacks the same input
exhausts the heap, which the render error boundary cannot catch.

Mirror the existing capBlockquoteNesting guard with capListNesting, which
clamps leading whitespace to 200 columns (~100 nesting levels) only when
pathologically deep indentation is present, leaving normal content and
fenced code untouched. Wire both through capMarkdownNesting.

* fix(frontend): satisfy prettier format check in preprocess test

* fix(frontend): exempt indented code from list-indent cap (PR #3570 review)

* fix(frontend): keep capping all deep indentation outside fenced code

Revert the indented-code exemption from the PR #3570 review nit. Taken
literally the suggested guard (insideFence || INDENTED_CODE_RE.test(line))
no-ops capListNesting, because INDENTED_CODE_RE matches every line with
4+ leading spaces — i.e. exactly the deep-indent lines the cap targets.
A context-aware exemption (only treat 4+-space lines as code after a
blank line) instead reopens the crash: blank-separated deeply nested list
items get exempted and still blow up marked (verified: OOM at depth ~1.5k).

Unlike blockquotes (markers take <=3 leading spaces, so deep-quote lines
never look like indented code), list vs. indented-code indentation is
ambiguous line-by-line, so any exemption is exploitable. Keep capping all
deep indentation outside fenced code; the only cost is mild corruption of
a >200-column indented-code line, which never occurs in real content and
is strictly preferable to a render crash. Add a regression test locking
the blank-line case.
2026-06-14 22:19:54 +08:00

110 lines
3.8 KiB
TypeScript

import { normalizeMermaidMarkdown } from "./mermaid";
const MERMAID_BLOCK_HINT_RE = /mermaid/i;
// marked's blockquote tokenizer (used by Streamdown to split content into
// memoizable blocks) recurses once per nesting level and overflows the call
// stack at roughly 2,000 levels, replacing the whole chat route with an error
// page. 100 levels is far beyond any legitimate content while keeping a wide
// margin below the crash threshold.
const MAX_BLOCKQUOTE_DEPTH = 100;
const DEEP_BLOCKQUOTE_HINT_RE = new RegExp(
`^(?:[ \\t]*>){${MAX_BLOCKQUOTE_DEPTH + 1}}`,
"m",
);
// Only up to 3 leading spaces can start a blockquote; 4+ (or a tab) is an
// indented code block, where ">" runs are literal content.
const BLOCKQUOTE_PREFIX_RE = /^ {0,3}(?:[ \t]*>)+/;
const CODE_FENCE_RE = /^ {0,3}(?:```|~~~)/;
const INDENTED_CODE_RE = /^(?: {4}|\t)/;
// marked's list tokenizer recurses once per nesting level too (list ->
// blockTokens -> list -> ...). In the browser's tighter stack a deeply nested
// list overflows during render and throws "Maximum call stack size exceeded"
// from inside Streamdown's lexing useMemo (see issue #3393); on larger stacks
// the same input instead goes quadratic and exhausts the heap. Each list level
// requires at least ~2 columns of indentation, so capping leading whitespace at
// 200 columns bounds the effective nesting near 100 levels — far beyond any
// legitimate content while keeping marked safe. Anything indented past this is
// pathological nesting, not prose or code.
const MAX_LIST_INDENT = 200;
const DEEP_INDENT_HINT_RE = new RegExp(`^[ \\t]{${MAX_LIST_INDENT + 1},}`, "m");
export function capBlockquoteNesting(markdown: string): string {
if (!DEEP_BLOCKQUOTE_HINT_RE.test(markdown)) {
return markdown;
}
let insideFence = false;
return markdown
.split("\n")
.map((line) => {
if (CODE_FENCE_RE.test(line)) {
insideFence = !insideFence;
return line;
}
// ">" runs inside fenced or indented code blocks are literal text, not
// nesting — rewriting them would silently corrupt code content.
if (insideFence || INDENTED_CODE_RE.test(line)) {
return line;
}
const match = BLOCKQUOTE_PREFIX_RE.exec(line);
if (!match) {
return line;
}
const prefix = match[0];
let depth = 0;
for (let i = 0; i < prefix.length; i++) {
if (prefix[i] === ">") {
depth += 1;
if (depth > MAX_BLOCKQUOTE_DEPTH) {
return line.slice(0, i) + line.slice(prefix.length);
}
}
}
return line;
})
.join("\n");
}
export function capListNesting(markdown: string): string {
if (!DEEP_INDENT_HINT_RE.test(markdown)) {
return markdown;
}
let insideFence = false;
return markdown
.split("\n")
.map((line) => {
if (CODE_FENCE_RE.test(line)) {
insideFence = !insideFence;
return line;
}
// Indentation inside fenced code is literal layout (ASCII art, pasted
// source); collapsing it would corrupt the rendered block.
if (insideFence) {
return line;
}
const whitespace = /^[ \t]*/.exec(line)![0];
if (whitespace.length <= MAX_LIST_INDENT) {
return line;
}
return " ".repeat(MAX_LIST_INDENT) + line.slice(whitespace.length);
})
.join("\n");
}
// Cap every runaway nesting construct that can take down a message render
// before marked sees the content.
export function capMarkdownNesting(markdown: string): string {
return capListNesting(capBlockquoteNesting(markdown));
}
export function preprocessStreamdownMarkdown(markdown: string): string {
if (!MERMAID_BLOCK_HINT_RE.test(markdown) || !markdown.includes("-.->")) {
return markdown;
}
return normalizeMermaidMarkdown(markdown);
}