mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-23 00:16:48 +00:00
fix(frontend): sanitize JSON export content via the Markdown content path
Address @ShenAC-SAC's BUG-006 review and the Copilot inline comment on #3131. The previous cut filtered hidden/tool messages out of the JSON export but still serialised `msg.content` verbatim, so: - inline `<think>…</think>` wrappers stayed in the exported `content` even with `includeReasoning: false`, - content-array thinking blocks leaked the `thinking` field, - `<uploaded_files>…</uploaded_files>` markers leaked the workspace paths a user uploaded files to. JSON now goes through the same sanitiser the Markdown path uses (`extractContentFromMessage` + `stripUploadedFilesTag`). Reasoning and tool_calls remain gated behind their `ExportOptions` flags. AI / human rows that sanitise to empty content with no opted-in reasoning or tool calls are dropped so the JSON matches the Markdown path's `continue` on empty assistant fragments. New regression tests cover the three leak shapes the reviewer called out plus the empty-content-drop case. Refs: bytedance/deer-flow#3107 (BUG-006), bytedance/deer-flow#3131 review
This commit is contained in:
@@ -122,6 +122,51 @@ export function formatThreadAsMarkdown(
|
|||||||
return lines.join("\n").trimEnd() + "\n";
|
return lines.join("\n").trimEnd() + "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface JSONExportMessage {
|
||||||
|
type: Message["type"];
|
||||||
|
id: string | undefined;
|
||||||
|
content: string;
|
||||||
|
reasoning?: string;
|
||||||
|
tool_calls?: unknown;
|
||||||
|
}
|
||||||
|
|
||||||
|
function buildJSONMessage(
|
||||||
|
msg: Message,
|
||||||
|
options: ExportOptions,
|
||||||
|
): JSONExportMessage | null {
|
||||||
|
// Run the same sanitiser the Markdown path uses so the JSON `content`
|
||||||
|
// field never carries inline `<think>...</think>` wrappers, content-array
|
||||||
|
// thinking blocks, `<uploaded_files>` markers, or other internal payloads.
|
||||||
|
const content = formatMessageContent(msg);
|
||||||
|
const reasoning =
|
||||||
|
options.includeReasoning && msg.type === "ai"
|
||||||
|
? (extractReasoningContentFromMessage(msg) ?? undefined)
|
||||||
|
: undefined;
|
||||||
|
const toolCalls =
|
||||||
|
options.includeToolCalls &&
|
||||||
|
msg.type === "ai" &&
|
||||||
|
"tool_calls" in msg &&
|
||||||
|
msg.tool_calls?.length
|
||||||
|
? msg.tool_calls
|
||||||
|
: undefined;
|
||||||
|
|
||||||
|
// Drop rows with no exportable payload (empty content + no opted-in
|
||||||
|
// reasoning / tool_calls). This matches the Markdown path's `continue`
|
||||||
|
// on `!content && !toolCalls && !reasoning` so the two formats agree on
|
||||||
|
// which AI fragments are visible to the user.
|
||||||
|
if (!content && reasoning === undefined && toolCalls === undefined) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
type: msg.type,
|
||||||
|
id: msg.id,
|
||||||
|
content,
|
||||||
|
...(reasoning !== undefined ? { reasoning } : {}),
|
||||||
|
...(toolCalls !== undefined ? { tool_calls: toolCalls } : {}),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
export function formatThreadAsJSON(
|
export function formatThreadAsJSON(
|
||||||
thread: AgentThread,
|
thread: AgentThread,
|
||||||
messages: Message[],
|
messages: Message[],
|
||||||
@@ -132,16 +177,9 @@ export function formatThreadAsJSON(
|
|||||||
thread_id: thread.thread_id,
|
thread_id: thread.thread_id,
|
||||||
created_at: thread.created_at,
|
created_at: thread.created_at,
|
||||||
exported_at: new Date().toISOString(),
|
exported_at: new Date().toISOString(),
|
||||||
messages: visibleMessages(messages, options).map((msg) => ({
|
messages: visibleMessages(messages, options)
|
||||||
type: msg.type,
|
.map((msg) => buildJSONMessage(msg, options))
|
||||||
id: msg.id,
|
.filter((m): m is JSONExportMessage => m !== null),
|
||||||
content: msg.content,
|
|
||||||
...(options.includeToolCalls &&
|
|
||||||
msg.type === "ai" &&
|
|
||||||
msg.tool_calls?.length
|
|
||||||
? { tool_calls: msg.tool_calls }
|
|
||||||
: {}),
|
|
||||||
})),
|
|
||||||
};
|
};
|
||||||
return JSON.stringify(exportData, null, 2);
|
return JSON.stringify(exportData, null, 2);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -191,4 +191,52 @@ describe("formatThreadAsJSON", () => {
|
|||||||
expect(raw).not.toContain("internal trace");
|
expect(raw).not.toContain("internal trace");
|
||||||
expect(raw).not.toContain("tool_calls");
|
expect(raw).not.toContain("tool_calls");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("strips inline <think>...</think> wrappers from content", () => {
|
||||||
|
// bytedance/deer-flow#3131 review: JSON export must run the same
|
||||||
|
// sanitiser the Markdown path uses so inline reasoning never leaks
|
||||||
|
// even when `includeReasoning` is left at its default false.
|
||||||
|
const message = ai("<think>internal monologue</think>visible answer", {
|
||||||
|
id: "ai-1",
|
||||||
|
} as Partial<Message>);
|
||||||
|
const raw = formatThreadAsJSON(makeThread(), [message]);
|
||||||
|
expect(raw).not.toContain("internal monologue");
|
||||||
|
expect(raw).not.toContain("<think>");
|
||||||
|
expect(raw).toContain("visible answer");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("strips content-array thinking blocks from content", () => {
|
||||||
|
const message = ai("placeholder", {
|
||||||
|
id: "ai-2",
|
||||||
|
content: [
|
||||||
|
{ type: "thinking", thinking: "hidden reasoning step" },
|
||||||
|
{ type: "text", text: "final visible text" },
|
||||||
|
],
|
||||||
|
} as unknown as Partial<Message>);
|
||||||
|
const raw = formatThreadAsJSON(makeThread(), [message]);
|
||||||
|
expect(raw).not.toContain("hidden reasoning step");
|
||||||
|
expect(raw).toContain("final visible text");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("strips <uploaded_files> markers from content", () => {
|
||||||
|
const message = human(
|
||||||
|
"real prompt\n<uploaded_files>\n/mnt/user-data/uploads/secret.pdf\n</uploaded_files>",
|
||||||
|
{ id: "h-clean" } as Partial<Message>,
|
||||||
|
);
|
||||||
|
const raw = formatThreadAsJSON(makeThread(), [message]);
|
||||||
|
expect(raw).not.toContain("<uploaded_files>");
|
||||||
|
expect(raw).not.toContain("secret.pdf");
|
||||||
|
expect(raw).toContain("real prompt");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("drops AI messages that sanitise to empty content", () => {
|
||||||
|
// Pure-reasoning AI fragments (no visible text, no tool calls) should
|
||||||
|
// not survive as `{content: ""}` rows in the export.
|
||||||
|
const message = ai("<think>only thinking, no answer</think>", {
|
||||||
|
id: "ai-3",
|
||||||
|
} as Partial<Message>);
|
||||||
|
const raw = formatThreadAsJSON(makeThread(), [message]);
|
||||||
|
const parsed = JSON.parse(raw) as { messages: unknown[] };
|
||||||
|
expect(parsed.messages).toHaveLength(0);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
Reference in New Issue
Block a user