From 258ca800fe2812bb0204c32a19f45fde0b7f13ce Mon Sep 17 00:00:00 2001
From: fancyboi999 <135568692+fancyboi999@users.noreply.github.com>
Date: Thu, 21 May 2026 18:52:39 +0800
Subject: [PATCH] fix(frontend): sanitize JSON export content via the Markdown
content path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Address @ShenAC-SAC's BUG-006 review and the Copilot inline comment on
#3131. The previous cut filtered hidden/tool messages out of the JSON
export but still serialised `msg.content` verbatim, so:
- inline `…` wrappers stayed in the exported `content`
even with `includeReasoning: false`,
- content-array thinking blocks leaked the `thinking` field,
- `…` markers leaked the workspace
paths a user uploaded files to.
JSON now goes through the same sanitiser the Markdown path uses
(`extractContentFromMessage` + `stripUploadedFilesTag`). Reasoning and
tool_calls remain gated behind their `ExportOptions` flags. AI / human
rows that sanitise to empty content with no opted-in reasoning or tool
calls are dropped so the JSON matches the Markdown path's `continue`
on empty assistant fragments.
New regression tests cover the three leak shapes the reviewer called
out plus the empty-content-drop case.
Refs: bytedance/deer-flow#3107 (BUG-006), bytedance/deer-flow#3131 review
---
frontend/src/core/threads/export.ts | 58 +++++++++++++++----
.../tests/unit/core/threads/export.test.ts | 48 +++++++++++++++
2 files changed, 96 insertions(+), 10 deletions(-)
diff --git a/frontend/src/core/threads/export.ts b/frontend/src/core/threads/export.ts
index 92c4ae85f..b98791ccd 100644
--- a/frontend/src/core/threads/export.ts
+++ b/frontend/src/core/threads/export.ts
@@ -122,6 +122,51 @@ export function formatThreadAsMarkdown(
return lines.join("\n").trimEnd() + "\n";
}
+interface JSONExportMessage {
+ type: Message["type"];
+ id: string | undefined;
+ content: string;
+ reasoning?: string;
+ tool_calls?: unknown;
+}
+
+function buildJSONMessage(
+ msg: Message,
+ options: ExportOptions,
+): JSONExportMessage | null {
+ // Run the same sanitiser the Markdown path uses so the JSON `content`
+ // field never carries inline `...` wrappers, content-array
+ // thinking blocks, `` markers, or other internal payloads.
+ const content = formatMessageContent(msg);
+ const reasoning =
+ options.includeReasoning && msg.type === "ai"
+ ? (extractReasoningContentFromMessage(msg) ?? undefined)
+ : undefined;
+ const toolCalls =
+ options.includeToolCalls &&
+ msg.type === "ai" &&
+ "tool_calls" in msg &&
+ msg.tool_calls?.length
+ ? msg.tool_calls
+ : undefined;
+
+ // Drop rows with no exportable payload (empty content + no opted-in
+ // reasoning / tool_calls). This matches the Markdown path's `continue`
+ // on `!content && !toolCalls && !reasoning` so the two formats agree on
+ // which AI fragments are visible to the user.
+ if (!content && reasoning === undefined && toolCalls === undefined) {
+ return null;
+ }
+
+ return {
+ type: msg.type,
+ id: msg.id,
+ content,
+ ...(reasoning !== undefined ? { reasoning } : {}),
+ ...(toolCalls !== undefined ? { tool_calls: toolCalls } : {}),
+ };
+}
+
export function formatThreadAsJSON(
thread: AgentThread,
messages: Message[],
@@ -132,16 +177,9 @@ export function formatThreadAsJSON(
thread_id: thread.thread_id,
created_at: thread.created_at,
exported_at: new Date().toISOString(),
- messages: visibleMessages(messages, options).map((msg) => ({
- type: msg.type,
- id: msg.id,
- content: msg.content,
- ...(options.includeToolCalls &&
- msg.type === "ai" &&
- msg.tool_calls?.length
- ? { tool_calls: msg.tool_calls }
- : {}),
- })),
+ messages: visibleMessages(messages, options)
+ .map((msg) => buildJSONMessage(msg, options))
+ .filter((m): m is JSONExportMessage => m !== null),
};
return JSON.stringify(exportData, null, 2);
}
diff --git a/frontend/tests/unit/core/threads/export.test.ts b/frontend/tests/unit/core/threads/export.test.ts
index fd59ff4cb..eec49fa35 100644
--- a/frontend/tests/unit/core/threads/export.test.ts
+++ b/frontend/tests/unit/core/threads/export.test.ts
@@ -191,4 +191,52 @@ describe("formatThreadAsJSON", () => {
expect(raw).not.toContain("internal trace");
expect(raw).not.toContain("tool_calls");
});
+
+ it("strips inline ... wrappers from content", () => {
+ // bytedance/deer-flow#3131 review: JSON export must run the same
+ // sanitiser the Markdown path uses so inline reasoning never leaks
+ // even when `includeReasoning` is left at its default false.
+ const message = ai("internal monologuevisible answer", {
+ id: "ai-1",
+ } as Partial);
+ const raw = formatThreadAsJSON(makeThread(), [message]);
+ expect(raw).not.toContain("internal monologue");
+ expect(raw).not.toContain("");
+ expect(raw).toContain("visible answer");
+ });
+
+ it("strips content-array thinking blocks from content", () => {
+ const message = ai("placeholder", {
+ id: "ai-2",
+ content: [
+ { type: "thinking", thinking: "hidden reasoning step" },
+ { type: "text", text: "final visible text" },
+ ],
+ } as unknown as Partial);
+ const raw = formatThreadAsJSON(makeThread(), [message]);
+ expect(raw).not.toContain("hidden reasoning step");
+ expect(raw).toContain("final visible text");
+ });
+
+ it("strips markers from content", () => {
+ const message = human(
+ "real prompt\n\n/mnt/user-data/uploads/secret.pdf\n",
+ { id: "h-clean" } as Partial,
+ );
+ const raw = formatThreadAsJSON(makeThread(), [message]);
+ expect(raw).not.toContain("");
+ expect(raw).not.toContain("secret.pdf");
+ expect(raw).toContain("real prompt");
+ });
+
+ it("drops AI messages that sanitise to empty content", () => {
+ // Pure-reasoning AI fragments (no visible text, no tool calls) should
+ // not survive as `{content: ""}` rows in the export.
+ const message = ai("only thinking, no answer", {
+ id: "ai-3",
+ } as Partial);
+ const raw = formatThreadAsJSON(makeThread(), [message]);
+ const parsed = JSON.parse(raw) as { messages: unknown[] };
+ expect(parsed.messages).toHaveLength(0);
+ });
});