mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-15 11:56:01 +00:00
25fbd25b05
* fix(frontend): cap deeply nested list indentation to prevent render crash Deeply nested lists make marked's recursive list tokenizer overflow the call stack during Streamdown's lexing useMemo, throwing an uncaught "RangeError: Maximum call stack size exceeded" that replaces the chat route with an error page (issue #3393); on larger stacks the same input exhausts the heap, which the render error boundary cannot catch. Mirror the existing capBlockquoteNesting guard with capListNesting, which clamps leading whitespace to 200 columns (~100 nesting levels) only when pathologically deep indentation is present, leaving normal content and fenced code untouched. Wire both through capMarkdownNesting. * fix(frontend): satisfy prettier format check in preprocess test * fix(frontend): exempt indented code from list-indent cap (PR #3570 review) * fix(frontend): keep capping all deep indentation outside fenced code Revert the indented-code exemption from the PR #3570 review nit. Taken literally the suggested guard (insideFence || INDENTED_CODE_RE.test(line)) no-ops capListNesting, because INDENTED_CODE_RE matches every line with 4+ leading spaces — i.e. exactly the deep-indent lines the cap targets. A context-aware exemption (only treat 4+-space lines as code after a blank line) instead reopens the crash: blank-separated deeply nested list items get exempted and still blow up marked (verified: OOM at depth ~1.5k). Unlike blockquotes (markers take <=3 leading spaces, so deep-quote lines never look like indented code), list vs. indented-code indentation is ambiguous line-by-line, so any exemption is exploitable. Keep capping all deep indentation outside fenced code; the only cost is mild corruption of a >200-column indented-code line, which never occurs in real content and is strictly preferable to a render crash. Add a regression test locking the blank-line case.
110 lines
3.8 KiB
TypeScript
110 lines
3.8 KiB
TypeScript
import { normalizeMermaidMarkdown } from "./mermaid";
|
|
|
|
const MERMAID_BLOCK_HINT_RE = /mermaid/i;
|
|
|
|
// marked's blockquote tokenizer (used by Streamdown to split content into
|
|
// memoizable blocks) recurses once per nesting level and overflows the call
|
|
// stack at roughly 2,000 levels, replacing the whole chat route with an error
|
|
// page. 100 levels is far beyond any legitimate content while keeping a wide
|
|
// margin below the crash threshold.
|
|
const MAX_BLOCKQUOTE_DEPTH = 100;
|
|
const DEEP_BLOCKQUOTE_HINT_RE = new RegExp(
|
|
`^(?:[ \\t]*>){${MAX_BLOCKQUOTE_DEPTH + 1}}`,
|
|
"m",
|
|
);
|
|
// Only up to 3 leading spaces can start a blockquote; 4+ (or a tab) is an
|
|
// indented code block, where ">" runs are literal content.
|
|
const BLOCKQUOTE_PREFIX_RE = /^ {0,3}(?:[ \t]*>)+/;
|
|
const CODE_FENCE_RE = /^ {0,3}(?:```|~~~)/;
|
|
const INDENTED_CODE_RE = /^(?: {4}|\t)/;
|
|
|
|
// marked's list tokenizer recurses once per nesting level too (list ->
|
|
// blockTokens -> list -> ...). In the browser's tighter stack a deeply nested
|
|
// list overflows during render and throws "Maximum call stack size exceeded"
|
|
// from inside Streamdown's lexing useMemo (see issue #3393); on larger stacks
|
|
// the same input instead goes quadratic and exhausts the heap. Each list level
|
|
// requires at least ~2 columns of indentation, so capping leading whitespace at
|
|
// 200 columns bounds the effective nesting near 100 levels — far beyond any
|
|
// legitimate content while keeping marked safe. Anything indented past this is
|
|
// pathological nesting, not prose or code.
|
|
const MAX_LIST_INDENT = 200;
|
|
const DEEP_INDENT_HINT_RE = new RegExp(`^[ \\t]{${MAX_LIST_INDENT + 1},}`, "m");
|
|
|
|
export function capBlockquoteNesting(markdown: string): string {
|
|
if (!DEEP_BLOCKQUOTE_HINT_RE.test(markdown)) {
|
|
return markdown;
|
|
}
|
|
|
|
let insideFence = false;
|
|
return markdown
|
|
.split("\n")
|
|
.map((line) => {
|
|
if (CODE_FENCE_RE.test(line)) {
|
|
insideFence = !insideFence;
|
|
return line;
|
|
}
|
|
// ">" runs inside fenced or indented code blocks are literal text, not
|
|
// nesting — rewriting them would silently corrupt code content.
|
|
if (insideFence || INDENTED_CODE_RE.test(line)) {
|
|
return line;
|
|
}
|
|
const match = BLOCKQUOTE_PREFIX_RE.exec(line);
|
|
if (!match) {
|
|
return line;
|
|
}
|
|
const prefix = match[0];
|
|
let depth = 0;
|
|
for (let i = 0; i < prefix.length; i++) {
|
|
if (prefix[i] === ">") {
|
|
depth += 1;
|
|
if (depth > MAX_BLOCKQUOTE_DEPTH) {
|
|
return line.slice(0, i) + line.slice(prefix.length);
|
|
}
|
|
}
|
|
}
|
|
return line;
|
|
})
|
|
.join("\n");
|
|
}
|
|
|
|
export function capListNesting(markdown: string): string {
|
|
if (!DEEP_INDENT_HINT_RE.test(markdown)) {
|
|
return markdown;
|
|
}
|
|
|
|
let insideFence = false;
|
|
return markdown
|
|
.split("\n")
|
|
.map((line) => {
|
|
if (CODE_FENCE_RE.test(line)) {
|
|
insideFence = !insideFence;
|
|
return line;
|
|
}
|
|
// Indentation inside fenced code is literal layout (ASCII art, pasted
|
|
// source); collapsing it would corrupt the rendered block.
|
|
if (insideFence) {
|
|
return line;
|
|
}
|
|
const whitespace = /^[ \t]*/.exec(line)![0];
|
|
if (whitespace.length <= MAX_LIST_INDENT) {
|
|
return line;
|
|
}
|
|
return " ".repeat(MAX_LIST_INDENT) + line.slice(whitespace.length);
|
|
})
|
|
.join("\n");
|
|
}
|
|
|
|
// Cap every runaway nesting construct that can take down a message render
|
|
// before marked sees the content.
|
|
export function capMarkdownNesting(markdown: string): string {
|
|
return capListNesting(capBlockquoteNesting(markdown));
|
|
}
|
|
|
|
export function preprocessStreamdownMarkdown(markdown: string): string {
|
|
if (!MERMAID_BLOCK_HINT_RE.test(markdown) || !markdown.includes("-.->")) {
|
|
return markdown;
|
|
}
|
|
|
|
return normalizeMermaidMarkdown(markdown);
|
|
}
|