deer-flow/frontend/tests/e2e-real-backend/multi-run-order.spec.ts

import { expect, test } from "@playwright/test";

/**
 * Layer 2 (cross-stack contract): reproduces upstream issue #3352 — after the
 * checkpoint no longer holds the older messages (post context-compression), the
 * frontend rebuilds thread history from the per-run endpoints, and the order it
 * rebuilds them in must stay chronological.
 *
 * The dangerous class this guards: a BACKEND change to run ordering silently
 * breaks a FRONTEND assumption. Backend `list_by_thread` returns runs
 * NEWEST-FIRST (PR #2932); the pre-#3354 frontend iterated runs from the end and
 * PREPENDED each loaded page (`core/threads/hooks.ts`), which inverts order. A
 * backend-only ordering test was green the whole time #3352 was live, and the
 * frontend regression unit test hardcodes "backend returns newest-first" in a
 * mock — so only a real frontend against a real backend catches the desync.
 *
 * This drives the REAL frontend against a REAL gateway with two seeded runs and
 * NO checkpoint (the seeder forces the per-run reload path to be the sole source
 * of truth), then asserts the first run's message renders ABOVE the second's.
 * No model, no recording, no API key — the runs are seeded via a test-only
 * endpoint mounted only on the replay gateway.
 */
const APP = "http://localhost:3000";

// Distinctive markers so getByText can't collide with UI chrome.
const ALPHA = "ALPHA-FIRST-QUESTION-7f3a2c";
const OMEGA = "OMEGA-SECOND-QUESTION-9b21d4";

test.describe("multi-run thread renders chronologically (replay, no API key)", () => {
  test("first run renders above second run after history rebuild (#3352)", async ({
    page,
    context,
  }) => {
    const uniq = `${Date.now()}-${Math.floor(Math.random() * 1e6)}`;
    const threadId = `e2e-multi-run-${uniq}`;
    const email = `e2e-${uniq}@example.com`;

    // Register through the frontend origin (same-origin proxy) so the auth
    // cookies are stored for localhost and forwarded to the gateway via the
    // next.config rewrite — never cross-origin from the browser.
    const reg = await context.request.post(`${APP}/api/v1/auth/register`, {
      data: { email, password: "very-strong-password-123" },
    });
    expect(reg.status(), await reg.text()).toBe(201);

    const cookies = await context.cookies();
    const csrf = cookies.find((c) => c.name === "csrf_token")?.value;
    expect(csrf, "register must set csrf_token cookie").toBeTruthy();

    // Seed two runs in one thread: run-1 (ALPHA) older, run-2 (OMEGA) newer, so
    // the real backend's list_by_thread returns them newest-first. No checkpoint
    // is seeded — that is the #3352 precondition.
    const seed = await context.request.post(`${APP}/api/test-only/seed-runs`, {
      headers: { "X-CSRF-Token": csrf! },
      data: {
        thread_id: threadId,
        runs: [
          {
            run_id: `${threadId}-r1`,
            created_at: "2026-01-01T00:00:00+00:00",
            messages: [
              { role: "human", content: ALPHA, id: `${threadId}-a-h` },
              { role: "ai", content: "ALPHA reply", id: `${threadId}-a-a` },
            ],
          },
          {
            run_id: `${threadId}-r2`,
            created_at: "2026-01-01T00:01:00+00:00",
            messages: [
              { role: "human", content: OMEGA, id: `${threadId}-o-h` },
              { role: "ai", content: "OMEGA reply", id: `${threadId}-o-a` },
            ],
          },
        ],
      },
    });
    expect(seed.status(), await seed.text()).toBe(200);

    // Load the thread fresh — triggers useThreadHistory's per-run reload path.
    await page.goto(`/workspace/chats/${threadId}`);

    const alpha = page.getByText(ALPHA, { exact: false });
    const omega = page.getByText(OMEGA, { exact: false });
    await expect(alpha).toBeVisible({ timeout: 60_000 });
    await expect(omega).toBeVisible({ timeout: 30_000 });
    // Each marker renders exactly once (guards against accidental duplicate matches).
    expect(await alpha.count(), "ALPHA should render exactly once").toBe(1);
    expect(await omega.count(), "OMEGA should render exactly once").toBe(1);

    // The contract: ALPHA (first run) must render ABOVE OMEGA (second run). With
    // the #3352 bug the per-run rebuild inverts this and OMEGA renders first.
    const alphaBox = await alpha.first().boundingBox();
    const omegaBox = await omega.first().boundingBox();
    expect(alphaBox, "ALPHA must have a layout box").toBeTruthy();
    expect(omegaBox, "OMEGA must have a layout box").toBeTruthy();
    expect(
      alphaBox!.y,
      `chronological order broken: ALPHA(first run) rendered at y=${alphaBox!.y}, OMEGA(second run) at y=${omegaBox!.y} — backend list_by_thread ordering and frontend history rebuild are out of sync (#3352)`,
    ).toBeLessThan(omegaBox!.y);
  });
});