From 258ca800fe2812bb0204c32a19f45fde0b7f13ce Mon Sep 17 00:00:00 2001 From: fancyboi999 <135568692+fancyboi999@users.noreply.github.com> Date: Thu, 21 May 2026 18:52:39 +0800 Subject: [PATCH] fix(frontend): sanitize JSON export content via the Markdown content path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address @ShenAC-SAC's BUG-006 review and the Copilot inline comment on #3131. The previous cut filtered hidden/tool messages out of the JSON export but still serialised `msg.content` verbatim, so: - inline `` wrappers stayed in the exported `content` even with `includeReasoning: false`, - content-array thinking blocks leaked the `thinking` field, - `` markers leaked the workspace paths a user uploaded files to. JSON now goes through the same sanitiser the Markdown path uses (`extractContentFromMessage` + `stripUploadedFilesTag`). Reasoning and tool_calls remain gated behind their `ExportOptions` flags. AI / human rows that sanitise to empty content with no opted-in reasoning or tool calls are dropped so the JSON matches the Markdown path's `continue` on empty assistant fragments. New regression tests cover the three leak shapes the reviewer called out plus the empty-content-drop case. Refs: bytedance/deer-flow#3107 (BUG-006), bytedance/deer-flow#3131 review --- frontend/src/core/threads/export.ts | 58 +++++++++++++++---- .../tests/unit/core/threads/export.test.ts | 48 +++++++++++++++ 2 files changed, 96 insertions(+), 10 deletions(-) diff --git a/frontend/src/core/threads/export.ts b/frontend/src/core/threads/export.ts index 92c4ae85f..b98791ccd 100644 --- a/frontend/src/core/threads/export.ts +++ b/frontend/src/core/threads/export.ts @@ -122,6 +122,51 @@ export function formatThreadAsMarkdown( return lines.join("\n").trimEnd() + "\n"; } +interface JSONExportMessage { + type: Message["type"]; + id: string | undefined; + content: string; + reasoning?: string; + tool_calls?: unknown; +} + +function buildJSONMessage( + msg: Message, + options: ExportOptions, +): JSONExportMessage | null { + // Run the same sanitiser the Markdown path uses so the JSON `content` + // field never carries inline `...` wrappers, content-array + // thinking blocks, `` markers, or other internal payloads. + const content = formatMessageContent(msg); + const reasoning = + options.includeReasoning && msg.type === "ai" + ? (extractReasoningContentFromMessage(msg) ?? undefined) + : undefined; + const toolCalls = + options.includeToolCalls && + msg.type === "ai" && + "tool_calls" in msg && + msg.tool_calls?.length + ? msg.tool_calls + : undefined; + + // Drop rows with no exportable payload (empty content + no opted-in + // reasoning / tool_calls). This matches the Markdown path's `continue` + // on `!content && !toolCalls && !reasoning` so the two formats agree on + // which AI fragments are visible to the user. + if (!content && reasoning === undefined && toolCalls === undefined) { + return null; + } + + return { + type: msg.type, + id: msg.id, + content, + ...(reasoning !== undefined ? { reasoning } : {}), + ...(toolCalls !== undefined ? { tool_calls: toolCalls } : {}), + }; +} + export function formatThreadAsJSON( thread: AgentThread, messages: Message[], @@ -132,16 +177,9 @@ export function formatThreadAsJSON( thread_id: thread.thread_id, created_at: thread.created_at, exported_at: new Date().toISOString(), - messages: visibleMessages(messages, options).map((msg) => ({ - type: msg.type, - id: msg.id, - content: msg.content, - ...(options.includeToolCalls && - msg.type === "ai" && - msg.tool_calls?.length - ? { tool_calls: msg.tool_calls } - : {}), - })), + messages: visibleMessages(messages, options) + .map((msg) => buildJSONMessage(msg, options)) + .filter((m): m is JSONExportMessage => m !== null), }; return JSON.stringify(exportData, null, 2); } diff --git a/frontend/tests/unit/core/threads/export.test.ts b/frontend/tests/unit/core/threads/export.test.ts index fd59ff4cb..eec49fa35 100644 --- a/frontend/tests/unit/core/threads/export.test.ts +++ b/frontend/tests/unit/core/threads/export.test.ts @@ -191,4 +191,52 @@ describe("formatThreadAsJSON", () => { expect(raw).not.toContain("internal trace"); expect(raw).not.toContain("tool_calls"); }); + + it("strips inline ... wrappers from content", () => { + // bytedance/deer-flow#3131 review: JSON export must run the same + // sanitiser the Markdown path uses so inline reasoning never leaks + // even when `includeReasoning` is left at its default false. + const message = ai("internal monologuevisible answer", { + id: "ai-1", + } as Partial); + const raw = formatThreadAsJSON(makeThread(), [message]); + expect(raw).not.toContain("internal monologue"); + expect(raw).not.toContain(""); + expect(raw).toContain("visible answer"); + }); + + it("strips content-array thinking blocks from content", () => { + const message = ai("placeholder", { + id: "ai-2", + content: [ + { type: "thinking", thinking: "hidden reasoning step" }, + { type: "text", text: "final visible text" }, + ], + } as unknown as Partial); + const raw = formatThreadAsJSON(makeThread(), [message]); + expect(raw).not.toContain("hidden reasoning step"); + expect(raw).toContain("final visible text"); + }); + + it("strips markers from content", () => { + const message = human( + "real prompt\n\n/mnt/user-data/uploads/secret.pdf\n", + { id: "h-clean" } as Partial, + ); + const raw = formatThreadAsJSON(makeThread(), [message]); + expect(raw).not.toContain(""); + expect(raw).not.toContain("secret.pdf"); + expect(raw).toContain("real prompt"); + }); + + it("drops AI messages that sanitise to empty content", () => { + // Pure-reasoning AI fragments (no visible text, no tool calls) should + // not survive as `{content: ""}` rows in the export. + const message = ai("only thinking, no answer", { + id: "ai-3", + } as Partial); + const raw = formatThreadAsJSON(makeThread(), [message]); + const parsed = JSON.parse(raw) as { messages: unknown[] }; + expect(parsed.messages).toHaveLength(0); + }); });