fix(frontend): treat any task tool error as a terminal subtask failure

The subtask card status machine matched only three English prefixes (`Task Succeeded. Result:`, `Task failed.`, `Task timed out`). Anything else fell through to `in_progress`, so a `task` tool error wrapped by `ToolErrorHandlingMiddleware` (`Error: Tool 'task' failed ...`) left the card spinning forever even after the run had ended. Extract the prefix logic into `parseSubtaskResult` and recognise any leading `Error:` token as a terminal failure. The extracted function is unit-tested against the legacy prefixes plus the `AsyncCallbackManager` regression captured in the upstream issue. Refs: bytedance/deer-flow#3107 (BUG-007)
2026-05-24 17:06:00 +00:00 · 2026-05-21 15:08:32 +08:00
parent a1ee59cde4
commit 308d43c9bd
3 changed files with 114 additions and 27 deletions
@@ -27,6 +27,7 @@ import {
 import { useRehypeSplitWordsIntoSpans } from "@/core/rehype";
 import type { Subtask } from "@/core/tasks";
 import { useUpdateSubtask } from "@/core/tasks/context";
 import { parseSubtaskResult } from "@/core/tasks/subtask-result";
 import type { AgentThreadState } from "@/core/threads";
 import { cn } from "@/lib/utils";
@@ -359,33 +360,10 @@ export function MessageList({
              } else if (message.type === "tool") {
                const taskId = message.tool_call_id;
                if (taskId) {
-                  const result = extractTextFromMessage(message);
+                  const parsed = parseSubtaskResult(
-                  if (result.startsWith("Task Succeeded. Result:")) {
+                    extractTextFromMessage(message),
-                    updateSubtask({
+                  );
-                      id: taskId,
+                  updateSubtask({ id: taskId, ...parsed });
                      status: "completed",
                      result: result
                        .split("Task Succeeded. Result:")[1]
                        ?.trim(),
                    });
                  } else if (result.startsWith("Task failed.")) {
                    updateSubtask({
                      id: taskId,
                      status: "failed",
                      error: result.split("Task failed.")[1]?.trim(),
                    });
                  } else if (result.startsWith("Task timed out")) {
                    updateSubtask({
                      id: taskId,
                      status: "failed",
                      error: result,
                    });
                  } else {
                    updateSubtask({
                      id: taskId,
                      status: "in_progress",
                    });
                  }
                }
              }
            }
@@ -0,0 +1,52 @@
 import type { Subtask } from "./types";
 export type SubtaskStatus = Subtask["status"];
 export interface SubtaskResultUpdate {
  status: SubtaskStatus;
  result?: string;
  error?: string;
 }
 const SUCCESS_PREFIX = "Task Succeeded. Result:";
 const FAILURE_PREFIX = "Task failed.";
 const TIMEOUT_PREFIX = "Task timed out";
 /**
 * Map a `task` tool result string to a {@link SubtaskStatus}.
 *
 * Bytedance/deer-flow issue #3107 BUG-007: parent-visible task tool errors do
 * not always start with one of the three legacy prefixes (e.g. when
 * `ToolErrorHandlingMiddleware` wraps an exception as
 * `Error: Tool 'task' failed ...`). Treat any leading `Error:` token as a
 * terminal failure so subtask cards stop being stuck on "in_progress".
 */
 export function parseSubtaskResult(text: string): SubtaskResultUpdate {
  const trimmed = text.trim();
  if (trimmed.startsWith(SUCCESS_PREFIX)) {
    return {
      status: "completed",
      result: trimmed.slice(SUCCESS_PREFIX.length).trim(),
    };
  }
  if (trimmed.startsWith(FAILURE_PREFIX)) {
    return {
      status: "failed",
      error: trimmed.slice(FAILURE_PREFIX.length).trim(),
    };
  }
  if (trimmed.startsWith(TIMEOUT_PREFIX)) {
    return { status: "failed", error: trimmed };
  }
  // ToolErrorHandlingMiddleware-style wrapper, or any other terminal error
  // signal the backend forwards to the lead agent.
  if (/^Error\b/i.test(trimmed)) {
    return { status: "failed", error: trimmed };
  }
  return { status: "in_progress" };
 }
@@ -0,0 +1,57 @@
 import { describe, expect, it } from "vitest";
 import { parseSubtaskResult } from "@/core/tasks/subtask-result";
 describe("parseSubtaskResult", () => {
  it("recognises the standard success prefix", () => {
    const parsed = parseSubtaskResult(
      "Task Succeeded. Result: investigated and produced a 3-page report",
    );
    expect(parsed.status).toBe("completed");
    expect(parsed.result).toBe("investigated and produced a 3-page report");
  });
  it("recognises the standard failure prefix", () => {
    const parsed = parseSubtaskResult(
      "Task failed. underlying tool raised RuntimeError",
    );
    expect(parsed.status).toBe("failed");
    expect(parsed.error).toBe("underlying tool raised RuntimeError");
  });
  it("recognises the standard timeout prefix", () => {
    const parsed = parseSubtaskResult("Task timed out after 900s");
    expect(parsed.status).toBe("failed");
    expect(parsed.error).toBe("Task timed out after 900s");
  });
  it("treats middleware-wrapped tool errors as terminal failures", () => {
    // bytedance/deer-flow issue #3107 BUG-007: the parent-visible ToolMessage
    // produced by ToolErrorHandlingMiddleware never matches the three legacy
    // prefixes, so subtask cards stay stuck on "in_progress".
    const parsed = parseSubtaskResult(
      "Error: Tool 'task' failed with TypeError: 'AsyncCallbackManager' object is not iterable. Continue with available context, or choose an alternative tool.",
    );
    expect(parsed.status).toBe("failed");
    expect(parsed.error).toContain("AsyncCallbackManager");
  });
  it("treats any other Error: prefix as a terminal failure", () => {
    const parsed = parseSubtaskResult("Error: subagent worker pool exhausted");
    expect(parsed.status).toBe("failed");
  });
  it("keeps unrecognised non-error output as in_progress", () => {
    // Streaming partial chunks should not flip the card to terminal early.
    const parsed = parseSubtaskResult("Investigating ...");
    expect(parsed.status).toBe("in_progress");
    expect(parsed.error).toBeUndefined();
    expect(parsed.result).toBeUndefined();
  });
  it("trims surrounding whitespace before matching prefixes", () => {
    const parsed = parseSubtaskResult("   Task Succeeded. Result: ok   ");
    expect(parsed.status).toBe("completed");
    expect(parsed.result).toBe("ok");
  });
 });