mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-24 17:06:00 +00:00
fix(frontend): treat any task tool error as a terminal subtask failure
The subtask card status machine matched only three English prefixes (`Task Succeeded. Result:`, `Task failed.`, `Task timed out`). Anything else fell through to `in_progress`, so a `task` tool error wrapped by `ToolErrorHandlingMiddleware` (`Error: Tool 'task' failed ...`) left the card spinning forever even after the run had ended. Extract the prefix logic into `parseSubtaskResult` and recognise any leading `Error:` token as a terminal failure. The extracted function is unit-tested against the legacy prefixes plus the `AsyncCallbackManager` regression captured in the upstream issue. Refs: bytedance/deer-flow#3107 (BUG-007)
This commit is contained in:
@@ -27,6 +27,7 @@ import {
|
|||||||
import { useRehypeSplitWordsIntoSpans } from "@/core/rehype";
|
import { useRehypeSplitWordsIntoSpans } from "@/core/rehype";
|
||||||
import type { Subtask } from "@/core/tasks";
|
import type { Subtask } from "@/core/tasks";
|
||||||
import { useUpdateSubtask } from "@/core/tasks/context";
|
import { useUpdateSubtask } from "@/core/tasks/context";
|
||||||
|
import { parseSubtaskResult } from "@/core/tasks/subtask-result";
|
||||||
import type { AgentThreadState } from "@/core/threads";
|
import type { AgentThreadState } from "@/core/threads";
|
||||||
import { cn } from "@/lib/utils";
|
import { cn } from "@/lib/utils";
|
||||||
|
|
||||||
@@ -359,33 +360,10 @@ export function MessageList({
|
|||||||
} else if (message.type === "tool") {
|
} else if (message.type === "tool") {
|
||||||
const taskId = message.tool_call_id;
|
const taskId = message.tool_call_id;
|
||||||
if (taskId) {
|
if (taskId) {
|
||||||
const result = extractTextFromMessage(message);
|
const parsed = parseSubtaskResult(
|
||||||
if (result.startsWith("Task Succeeded. Result:")) {
|
extractTextFromMessage(message),
|
||||||
updateSubtask({
|
);
|
||||||
id: taskId,
|
updateSubtask({ id: taskId, ...parsed });
|
||||||
status: "completed",
|
|
||||||
result: result
|
|
||||||
.split("Task Succeeded. Result:")[1]
|
|
||||||
?.trim(),
|
|
||||||
});
|
|
||||||
} else if (result.startsWith("Task failed.")) {
|
|
||||||
updateSubtask({
|
|
||||||
id: taskId,
|
|
||||||
status: "failed",
|
|
||||||
error: result.split("Task failed.")[1]?.trim(),
|
|
||||||
});
|
|
||||||
} else if (result.startsWith("Task timed out")) {
|
|
||||||
updateSubtask({
|
|
||||||
id: taskId,
|
|
||||||
status: "failed",
|
|
||||||
error: result,
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
updateSubtask({
|
|
||||||
id: taskId,
|
|
||||||
status: "in_progress",
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,52 @@
|
|||||||
|
import type { Subtask } from "./types";
|
||||||
|
|
||||||
|
export type SubtaskStatus = Subtask["status"];
|
||||||
|
|
||||||
|
export interface SubtaskResultUpdate {
|
||||||
|
status: SubtaskStatus;
|
||||||
|
result?: string;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const SUCCESS_PREFIX = "Task Succeeded. Result:";
|
||||||
|
const FAILURE_PREFIX = "Task failed.";
|
||||||
|
const TIMEOUT_PREFIX = "Task timed out";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map a `task` tool result string to a {@link SubtaskStatus}.
|
||||||
|
*
|
||||||
|
* Bytedance/deer-flow issue #3107 BUG-007: parent-visible task tool errors do
|
||||||
|
* not always start with one of the three legacy prefixes (e.g. when
|
||||||
|
* `ToolErrorHandlingMiddleware` wraps an exception as
|
||||||
|
* `Error: Tool 'task' failed ...`). Treat any leading `Error:` token as a
|
||||||
|
* terminal failure so subtask cards stop being stuck on "in_progress".
|
||||||
|
*/
|
||||||
|
export function parseSubtaskResult(text: string): SubtaskResultUpdate {
|
||||||
|
const trimmed = text.trim();
|
||||||
|
|
||||||
|
if (trimmed.startsWith(SUCCESS_PREFIX)) {
|
||||||
|
return {
|
||||||
|
status: "completed",
|
||||||
|
result: trimmed.slice(SUCCESS_PREFIX.length).trim(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trimmed.startsWith(FAILURE_PREFIX)) {
|
||||||
|
return {
|
||||||
|
status: "failed",
|
||||||
|
error: trimmed.slice(FAILURE_PREFIX.length).trim(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (trimmed.startsWith(TIMEOUT_PREFIX)) {
|
||||||
|
return { status: "failed", error: trimmed };
|
||||||
|
}
|
||||||
|
|
||||||
|
// ToolErrorHandlingMiddleware-style wrapper, or any other terminal error
|
||||||
|
// signal the backend forwards to the lead agent.
|
||||||
|
if (/^Error\b/i.test(trimmed)) {
|
||||||
|
return { status: "failed", error: trimmed };
|
||||||
|
}
|
||||||
|
|
||||||
|
return { status: "in_progress" };
|
||||||
|
}
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
|
||||||
|
import { parseSubtaskResult } from "@/core/tasks/subtask-result";
|
||||||
|
|
||||||
|
describe("parseSubtaskResult", () => {
|
||||||
|
it("recognises the standard success prefix", () => {
|
||||||
|
const parsed = parseSubtaskResult(
|
||||||
|
"Task Succeeded. Result: investigated and produced a 3-page report",
|
||||||
|
);
|
||||||
|
expect(parsed.status).toBe("completed");
|
||||||
|
expect(parsed.result).toBe("investigated and produced a 3-page report");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("recognises the standard failure prefix", () => {
|
||||||
|
const parsed = parseSubtaskResult(
|
||||||
|
"Task failed. underlying tool raised RuntimeError",
|
||||||
|
);
|
||||||
|
expect(parsed.status).toBe("failed");
|
||||||
|
expect(parsed.error).toBe("underlying tool raised RuntimeError");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("recognises the standard timeout prefix", () => {
|
||||||
|
const parsed = parseSubtaskResult("Task timed out after 900s");
|
||||||
|
expect(parsed.status).toBe("failed");
|
||||||
|
expect(parsed.error).toBe("Task timed out after 900s");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("treats middleware-wrapped tool errors as terminal failures", () => {
|
||||||
|
// bytedance/deer-flow issue #3107 BUG-007: the parent-visible ToolMessage
|
||||||
|
// produced by ToolErrorHandlingMiddleware never matches the three legacy
|
||||||
|
// prefixes, so subtask cards stay stuck on "in_progress".
|
||||||
|
const parsed = parseSubtaskResult(
|
||||||
|
"Error: Tool 'task' failed with TypeError: 'AsyncCallbackManager' object is not iterable. Continue with available context, or choose an alternative tool.",
|
||||||
|
);
|
||||||
|
expect(parsed.status).toBe("failed");
|
||||||
|
expect(parsed.error).toContain("AsyncCallbackManager");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("treats any other Error: prefix as a terminal failure", () => {
|
||||||
|
const parsed = parseSubtaskResult("Error: subagent worker pool exhausted");
|
||||||
|
expect(parsed.status).toBe("failed");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("keeps unrecognised non-error output as in_progress", () => {
|
||||||
|
// Streaming partial chunks should not flip the card to terminal early.
|
||||||
|
const parsed = parseSubtaskResult("Investigating ...");
|
||||||
|
expect(parsed.status).toBe("in_progress");
|
||||||
|
expect(parsed.error).toBeUndefined();
|
||||||
|
expect(parsed.result).toBeUndefined();
|
||||||
|
});
|
||||||
|
|
||||||
|
it("trims surrounding whitespace before matching prefixes", () => {
|
||||||
|
const parsed = parseSubtaskResult(" Task Succeeded. Result: ok ");
|
||||||
|
expect(parsed.status).toBe("completed");
|
||||||
|
expect(parsed.result).toBe("ok");
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user