import { existsSync, readFileSync, writeFileSync } from "node:fs"; import { expect, test } from "@playwright/test"; /** * RECORD driver (Plan A): drive the real frontend through the write/read-file * scenario against the real-model gateway. The gateway captures every model * call to DEERFLOW_RECORD_OUT; this just needs to drive the flow and wait until * the captures stop arriving (main turns + in-graph title + follow-up * suggestions all fired). It asserts nothing about content — it produces the * fixture, it doesn't verify it. */ const APP = "http://localhost:3000"; const SCENARIO = "write_read_file"; const MODE = "ultra"; const PROMPT = "Using your own file tools directly, create the file /mnt/user-data/outputs/note.txt " + "with exactly this content: hi from replay. Then read that same file back and reply with its " + "exact contents. Do NOT delegate to a subagent and do NOT use the task tool — do it yourself. " + "Do not ask any clarifying questions."; function countLines(path: string): number { return existsSync(path) ? readFileSync(path, "utf-8") .split("\n") .filter((l) => l.trim()).length : 0; } async function waitForCaptureStable( path: string, { stableMs = 12_000, maxMs = 160_000 } = {}, ): Promise { const start = Date.now(); let last = -1; let lastChange = Date.now(); while (Date.now() - start < maxMs) { const n = countLines(path); if (n !== last) { last = n; lastChange = Date.now(); } else if (n > 0 && Date.now() - lastChange > stableMs) { return n; } await new Promise((r) => setTimeout(r, 1000)); } // Hard failure on timeout: returning the last count here would let a // truncated/partial recording pass silently (captured > 0). A recording must // stabilize, or it is not trustworthy. throw new Error( `[record] captures never stabilized within ${maxMs}ms (last count=${last}); ` + `the recording may be truncated — raise maxMs or check the record gateway.`, ); } test.describe.configure({ timeout: 220_000 }); test("record write/read-file run through the real frontend", async ({ page, context, }) => { const out = process.env.DEERFLOW_RECORD_OUT; expect(out, "DEERFLOW_RECORD_OUT must be set").toBeTruthy(); // The context the frontend derives for ultra mode (core/threads/hooks.ts). The // backend-direct golden test (Layer 1) POSTs this so its prompt — hence the // recorded input hashes — matches the browser run. thinking/reasoning don't // affect the prompt; is_plan_mode + subagent_enabled add the todo/task tools. const CONTEXT = { is_bootstrap: false, mode: MODE, thinking_enabled: true, is_plan_mode: true, subagent_enabled: true, }; writeFileSync( `${out}.meta.json`, JSON.stringify({ scenario: SCENARIO, mode: MODE, prompt: PROMPT, context: CONTEXT, }), "utf-8", ); const reg = await context.request.post(`${APP}/api/v1/auth/register`, { data: { email: `rec-${Date.now()}@example.com`, password: "very-strong-password-123", }, }); expect(reg.status(), await reg.text()).toBe(201); await page.addInitScript(() => { window.localStorage.setItem( "deerflow.local-settings", JSON.stringify({ context: { mode: "ultra" } }), ); }); await page.goto("/workspace/chats/new"); const textarea = page.getByPlaceholder(/how can i assist you/i); await expect(textarea).toBeVisible({ timeout: 30_000 }); await textarea.fill(PROMPT); await textarea.press("Enter"); const captured = await waitForCaptureStable(out!); console.log( `[record] captures stabilized at ${captured} model call(s) -> ${out}`, ); expect( captured, "expected at least the agent turns to be captured", ).toBeGreaterThan(0); });