feat: show token usage per assistant response (#2270)

* feat: show token usage per assistant response

* fix: align client models response with token usage

* fix: address token usage review feedback

* docs: clarify token usage config example

---------

Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
YuJitang
2026-04-16 08:56:49 +08:00
committed by GitHub
parent 0e16a7fe55
commit 105db00987
17 changed files with 271 additions and 50 deletions
@@ -38,17 +38,20 @@ import { cn } from "@/lib/utils";
import { CopyButton } from "../copy-button";
import { MarkdownContent } from "./markdown-content";
import { MessageTokenUsage } from "./message-token-usage";
export function MessageListItem({
className,
message,
isLoading,
threadId,
tokenUsageEnabled = false,
}: {
className?: string;
message: Message;
isLoading?: boolean;
threadId: string;
tokenUsageEnabled?: boolean;
}) {
const isHuman = message.type === "human";
return (
@@ -61,6 +64,7 @@ export function MessageListItem({
message={message}
isLoading={isLoading}
threadId={threadId}
tokenUsageEnabled={tokenUsageEnabled}
/>
{!isLoading && (
<MessageToolbar
@@ -119,11 +123,13 @@ function MessageContent_({
message,
isLoading = false,
threadId,
tokenUsageEnabled = false,
}: {
className?: string;
message: Message;
isLoading?: boolean;
threadId: string;
tokenUsageEnabled?: boolean;
}) {
const rehypePlugins = useRehypeSplitWordsIntoSpans(isLoading);
const isHuman = message.type === "human";
@@ -201,6 +207,11 @@ function MessageContent_({
<ReasoningTrigger />
<ReasoningContent>{reasoningContent}</ReasoningContent>
</Reasoning>
<MessageTokenUsage
enabled={tokenUsageEnabled}
isLoading={isLoading}
message={message}
/>
</AIElementMessageContent>
);
}
@@ -238,6 +249,11 @@ function MessageContent_({
className="my-3"
components={components}
/>
<MessageTokenUsage
enabled={tokenUsageEnabled}
isLoading={isLoading}
message={message}
/>
</AIElementMessageContent>
);
}
@@ -13,6 +13,7 @@ import {
hasContent,
hasPresentFiles,
hasReasoning,
hasToolCalls,
} from "@/core/messages/utils";
import { useRehypeSplitWordsIntoSpans } from "@/core/rehype";
import type { Subtask } from "@/core/tasks";
@@ -26,6 +27,7 @@ import { StreamingIndicator } from "../streaming-indicator";
import { MarkdownContent } from "./markdown-content";
import { MessageGroup } from "./message-group";
import { MessageListItem } from "./message-list-item";
import { MessageTokenUsageList } from "./message-token-usage";
import { MessageListSkeleton } from "./skeleton";
import { SubtaskCard } from "./subtask-card";
@@ -37,11 +39,13 @@ export function MessageList({
threadId,
thread,
paddingBottom = MESSAGE_LIST_DEFAULT_PADDING_BOTTOM,
tokenUsageEnabled = false,
}: {
className?: string;
threadId: string;
thread: BaseStream<AgentThreadState>;
paddingBottom?: number;
tokenUsageEnabled?: boolean;
}) {
const { t } = useI18n();
const rehypePlugins = useRehypeSplitWordsIntoSpans(thread.isLoading);
@@ -64,6 +68,7 @@ export function MessageList({
message={msg}
isLoading={thread.isLoading}
threadId={threadId}
tokenUsageEnabled={tokenUsageEnabled}
/>
);
});
@@ -71,12 +76,18 @@ export function MessageList({
const message = group.messages[0];
if (message && hasContent(message)) {
return (
<MarkdownContent
key={group.id}
content={extractContentFromMessage(message)}
isLoading={thread.isLoading}
rehypePlugins={rehypePlugins}
/>
<div key={group.id} className="w-full">
<MarkdownContent
content={extractContentFromMessage(message)}
isLoading={thread.isLoading}
rehypePlugins={rehypePlugins}
/>
<MessageTokenUsageList
enabled={tokenUsageEnabled}
isLoading={thread.isLoading}
messages={group.messages}
/>
</div>
);
}
return null;
@@ -99,6 +110,11 @@ export function MessageList({
/>
)}
<ArtifactFileList files={files} threadId={threadId} />
<MessageTokenUsageList
enabled={tokenUsageEnabled}
isLoading={thread.isLoading}
messages={group.messages}
/>
</div>
);
} else if (group.type === "assistant:subagent") {
@@ -191,15 +207,31 @@ export function MessageList({
className="relative z-1 flex flex-col gap-2"
>
{results}
<MessageTokenUsageList
enabled={tokenUsageEnabled}
isLoading={thread.isLoading}
messages={group.messages}
/>
</div>
);
}
const tokenUsageMessages = group.messages.filter(
(message) =>
message.type === "ai" &&
(hasToolCalls(message) ? true : !hasContent(message)),
);
return (
<MessageGroup
key={"group-" + group.id}
messages={group.messages}
isLoading={thread.isLoading}
/>
<div key={"group-" + group.id} className="w-full">
<MessageGroup
messages={group.messages}
isLoading={thread.isLoading}
/>
<MessageTokenUsageList
enabled={tokenUsageEnabled}
isLoading={thread.isLoading}
messages={tokenUsageMessages}
/>
</div>
);
})}
{thread.isLoading && <StreamingIndicator className="my-4" />}
@@ -0,0 +1,91 @@
import type { Message } from "@langchain/langgraph-sdk";
import { CoinsIcon } from "lucide-react";
import { useI18n } from "@/core/i18n/hooks";
import { formatTokenCount, getUsageMetadata } from "@/core/messages/usage";
import { cn } from "@/lib/utils";
export function MessageTokenUsage({
className,
enabled = false,
isLoading = false,
message,
}: {
className?: string;
enabled?: boolean;
isLoading?: boolean;
message: Message;
}) {
const { t } = useI18n();
if (!enabled || isLoading || message.type !== "ai") {
return null;
}
const usage = getUsageMetadata(message);
return (
<div
className={cn(
"text-muted-foreground border-border/60 mt-1 flex flex-wrap items-center gap-x-3 gap-y-1 border-t pt-2 text-[11px]",
className,
)}
>
<span className="inline-flex items-center gap-1 font-medium">
<CoinsIcon className="size-3" />
{t.tokenUsage.label}
</span>
{usage ? (
<>
<span>
{t.tokenUsage.input}: {formatTokenCount(usage.inputTokens)}
</span>
<span>
{t.tokenUsage.output}: {formatTokenCount(usage.outputTokens)}
</span>
<span className="font-medium">
{t.tokenUsage.total}: {formatTokenCount(usage.totalTokens)}
</span>
</>
) : (
<span>{t.tokenUsage.unavailableShort}</span>
)}
</div>
);
}
export function MessageTokenUsageList({
className,
enabled = false,
isLoading = false,
messages,
}: {
className?: string;
enabled?: boolean;
isLoading?: boolean;
messages: Message[];
}) {
if (!enabled || isLoading) {
return null;
}
const aiMessages = messages.filter((message) => message.type === "ai");
if (aiMessages.length === 0) {
return null;
}
return (
<>
{aiMessages.map((message, index) => (
<MessageTokenUsage
className={className}
enabled={enabled}
isLoading={isLoading}
key={message.id ?? index}
message={message}
/>
))}
</>
);
}
@@ -15,18 +15,20 @@ import { cn } from "@/lib/utils";
interface TokenUsageIndicatorProps {
messages: Message[];
enabled?: boolean;
className?: string;
}
export function TokenUsageIndicator({
messages,
enabled = false,
className,
}: TokenUsageIndicatorProps) {
const { t } = useI18n();
const usage = useMemo(() => accumulateUsage(messages), [messages]);
if (!usage) {
if (!enabled) {
return null;
}
@@ -36,37 +38,49 @@ export function TokenUsageIndicator({
<button
type="button"
className={cn(
"text-muted-foreground flex cursor-default items-center gap-1 text-xs",
"text-muted-foreground bg-background/70 flex cursor-default items-center gap-1.5 rounded-full border px-2 py-1 text-xs",
!usage && "opacity-60",
className,
)}
>
<CoinsIcon size={14} />
<span>{formatTokenCount(usage.totalTokens)}</span>
<span>{t.tokenUsage.label}</span>
<span className="font-mono">
{usage ? formatTokenCount(usage.totalTokens) : "-"}
</span>
</button>
</TooltipTrigger>
<TooltipContent side="bottom" align="end">
<div className="space-y-1 text-xs">
<div className="font-medium">{t.tokenUsage.title}</div>
<div className="flex justify-between gap-4">
<span>{t.tokenUsage.input}</span>
<span className="font-mono">
{formatTokenCount(usage.inputTokens)}
</span>
</div>
<div className="flex justify-between gap-4">
<span>{t.tokenUsage.output}</span>
<span className="font-mono">
{formatTokenCount(usage.outputTokens)}
</span>
</div>
<div className="border-t pt-1">
<div className="flex justify-between gap-4">
<span>{t.tokenUsage.total}</span>
<span className="font-mono font-medium">
{formatTokenCount(usage.totalTokens)}
</span>
{usage ? (
<>
<div className="flex justify-between gap-4">
<span>{t.tokenUsage.input}</span>
<span className="font-mono">
{formatTokenCount(usage.inputTokens)}
</span>
</div>
<div className="flex justify-between gap-4">
<span>{t.tokenUsage.output}</span>
<span className="font-mono">
{formatTokenCount(usage.outputTokens)}
</span>
</div>
<div className="border-t pt-1">
<div className="flex justify-between gap-4">
<span>{t.tokenUsage.total}</span>
<span className="font-mono font-medium">
{formatTokenCount(usage.totalTokens)}
</span>
</div>
</div>
</>
) : (
<div className="text-muted-foreground max-w-56">
{t.tokenUsage.unavailable}
</div>
</div>
)}
</div>
</TooltipContent>
</Tooltip>