fix(frontend): sanitize JSON export content via the Markdown content path

Address @ShenAC-SAC's BUG-006 review and the Copilot inline comment on
#3131. The previous cut filtered hidden/tool messages out of the JSON
export but still serialised `msg.content` verbatim, so:

- inline `<think>…</think>` wrappers stayed in the exported `content`
  even with `includeReasoning: false`,
- content-array thinking blocks leaked the `thinking` field,
- `<uploaded_files>…</uploaded_files>` markers leaked the workspace
  paths a user uploaded files to.

JSON now goes through the same sanitiser the Markdown path uses
(`extractContentFromMessage` + `stripUploadedFilesTag`). Reasoning and
tool_calls remain gated behind their `ExportOptions` flags. AI / human
rows that sanitise to empty content with no opted-in reasoning or tool
calls are dropped so the JSON matches the Markdown path's `continue`
on empty assistant fragments.

New regression tests cover the three leak shapes the reviewer called
out plus the empty-content-drop case.

Refs: bytedance/deer-flow#3107 (BUG-006), bytedance/deer-flow#3131 review
This commit is contained in:
fancyboi999
2026-05-21 18:52:39 +08:00
parent 50e2c257bf
commit 258ca800fe
2 changed files with 96 additions and 10 deletions
+48 -10
View File
@@ -122,6 +122,51 @@ export function formatThreadAsMarkdown(
return lines.join("\n").trimEnd() + "\n";
}
interface JSONExportMessage {
type: Message["type"];
id: string | undefined;
content: string;
reasoning?: string;
tool_calls?: unknown;
}
function buildJSONMessage(
msg: Message,
options: ExportOptions,
): JSONExportMessage | null {
// Run the same sanitiser the Markdown path uses so the JSON `content`
// field never carries inline `<think>...</think>` wrappers, content-array
// thinking blocks, `<uploaded_files>` markers, or other internal payloads.
const content = formatMessageContent(msg);
const reasoning =
options.includeReasoning && msg.type === "ai"
? (extractReasoningContentFromMessage(msg) ?? undefined)
: undefined;
const toolCalls =
options.includeToolCalls &&
msg.type === "ai" &&
"tool_calls" in msg &&
msg.tool_calls?.length
? msg.tool_calls
: undefined;
// Drop rows with no exportable payload (empty content + no opted-in
// reasoning / tool_calls). This matches the Markdown path's `continue`
// on `!content && !toolCalls && !reasoning` so the two formats agree on
// which AI fragments are visible to the user.
if (!content && reasoning === undefined && toolCalls === undefined) {
return null;
}
return {
type: msg.type,
id: msg.id,
content,
...(reasoning !== undefined ? { reasoning } : {}),
...(toolCalls !== undefined ? { tool_calls: toolCalls } : {}),
};
}
export function formatThreadAsJSON(
thread: AgentThread,
messages: Message[],
@@ -132,16 +177,9 @@ export function formatThreadAsJSON(
thread_id: thread.thread_id,
created_at: thread.created_at,
exported_at: new Date().toISOString(),
messages: visibleMessages(messages, options).map((msg) => ({
type: msg.type,
id: msg.id,
content: msg.content,
...(options.includeToolCalls &&
msg.type === "ai" &&
msg.tool_calls?.length
? { tool_calls: msg.tool_calls }
: {}),
})),
messages: visibleMessages(messages, options)
.map((msg) => buildJSONMessage(msg, options))
.filter((m): m is JSONExportMessage => m !== null),
};
return JSON.stringify(exportData, null, 2);
}