mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-23 16:35:59 +00:00
Populate Codex usage metadata for token accounting (#2585)
This commit is contained in:
@@ -21,6 +21,7 @@ from langchain_core.callbacks import CallbackManagerForLLMRun
|
|||||||
from langchain_core.language_models.chat_models import BaseChatModel
|
from langchain_core.language_models.chat_models import BaseChatModel
|
||||||
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage
|
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage
|
||||||
from langchain_core.outputs import ChatGeneration, ChatResult
|
from langchain_core.outputs import ChatGeneration, ChatResult
|
||||||
|
from langchain_openai.chat_models.base import _create_usage_metadata_responses
|
||||||
|
|
||||||
from deerflow.models.credential_loader import CodexCliCredential, load_codex_cli_credential
|
from deerflow.models.credential_loader import CodexCliCredential, load_codex_cli_credential
|
||||||
|
|
||||||
@@ -346,6 +347,7 @@ class CodexChatModel(BaseChatModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
usage = response.get("usage", {})
|
usage = response.get("usage", {})
|
||||||
|
usage_metadata = _create_usage_metadata_responses(usage) if usage else None
|
||||||
additional_kwargs = {}
|
additional_kwargs = {}
|
||||||
if reasoning_content:
|
if reasoning_content:
|
||||||
additional_kwargs["reasoning_content"] = reasoning_content
|
additional_kwargs["reasoning_content"] = reasoning_content
|
||||||
@@ -355,6 +357,7 @@ class CodexChatModel(BaseChatModel):
|
|||||||
tool_calls=tool_calls if tool_calls else [],
|
tool_calls=tool_calls if tool_calls else [],
|
||||||
invalid_tool_calls=invalid_tool_calls,
|
invalid_tool_calls=invalid_tool_calls,
|
||||||
additional_kwargs=additional_kwargs,
|
additional_kwargs=additional_kwargs,
|
||||||
|
usage_metadata=usage_metadata,
|
||||||
response_metadata={
|
response_metadata={
|
||||||
"model": response.get("model", self.model),
|
"model": response.get("model", self.model),
|
||||||
"usage": usage,
|
"usage": usage,
|
||||||
|
|||||||
@@ -82,6 +82,36 @@ def test_parse_response_text_content():
|
|||||||
assert result.generations[0].message.content == "Hello world"
|
assert result.generations[0].message.content == "Hello world"
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_response_populates_usage_metadata():
|
||||||
|
model = _make_model()
|
||||||
|
response = {
|
||||||
|
"output": [
|
||||||
|
{
|
||||||
|
"type": "message",
|
||||||
|
"content": [{"type": "output_text", "text": "Hello world"}],
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": {
|
||||||
|
"input_tokens": 10,
|
||||||
|
"output_tokens": 5,
|
||||||
|
"total_tokens": 15,
|
||||||
|
"input_tokens_details": {"cached_tokens": 3},
|
||||||
|
"output_tokens_details": {"reasoning_tokens": 2},
|
||||||
|
},
|
||||||
|
"model": "gpt-5.4",
|
||||||
|
}
|
||||||
|
|
||||||
|
result = model._parse_response(response)
|
||||||
|
|
||||||
|
assert result.generations[0].message.usage_metadata == {
|
||||||
|
"input_tokens": 10,
|
||||||
|
"output_tokens": 5,
|
||||||
|
"total_tokens": 15,
|
||||||
|
"input_token_details": {"cache_read": 3},
|
||||||
|
"output_token_details": {"reasoning": 2},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def test_parse_response_reasoning_content():
|
def test_parse_response_reasoning_content():
|
||||||
model = _make_model()
|
model = _make_model()
|
||||||
response = {
|
response = {
|
||||||
|
|||||||
@@ -0,0 +1,32 @@
|
|||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
from langchain_core.messages import AIMessage
|
||||||
|
|
||||||
|
from deerflow.agents.middlewares.token_usage_middleware import TokenUsageMiddleware
|
||||||
|
|
||||||
|
|
||||||
|
def test_after_model_logs_usage_metadata_counts():
|
||||||
|
middleware = TokenUsageMiddleware()
|
||||||
|
state = {
|
||||||
|
"messages": [
|
||||||
|
AIMessage(
|
||||||
|
content="done",
|
||||||
|
usage_metadata={
|
||||||
|
"input_tokens": 10,
|
||||||
|
"output_tokens": 5,
|
||||||
|
"total_tokens": 15,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("deerflow.agents.middlewares.token_usage_middleware.logger.info") as info_mock:
|
||||||
|
result = middleware.after_model(state=state, runtime=MagicMock())
|
||||||
|
|
||||||
|
assert result is None
|
||||||
|
info_mock.assert_called_once_with(
|
||||||
|
"LLM token usage: input=%s output=%s total=%s",
|
||||||
|
10,
|
||||||
|
5,
|
||||||
|
15,
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user