feat: show token usage per assistant response (#2270)

* feat: show token usage per assistant response

* fix: align client models response with token usage

* fix: address token usage review feedback

* docs: clarify token usage config example

---------

Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
YuJitang
2026-04-16 08:56:49 +08:00
committed by GitHub
parent 0e16a7fe55
commit 105db00987
17 changed files with 271 additions and 50 deletions
+22 -5
View File
@@ -17,10 +17,17 @@ class ModelResponse(BaseModel):
supports_reasoning_effort: bool = Field(default=False, description="Whether model supports reasoning effort")
class TokenUsageResponse(BaseModel):
"""Token usage display configuration."""
enabled: bool = Field(default=False, description="Whether token usage display is enabled")
class ModelsListResponse(BaseModel):
"""Response model for listing all models."""
models: list[ModelResponse]
token_usage: TokenUsageResponse
@router.get(
@@ -36,7 +43,7 @@ async def list_models() -> ModelsListResponse:
excluding sensitive fields like API keys and internal configuration.
Returns:
A list of all configured models with their metadata.
A list of all configured models with their metadata and token usage display settings.
Example Response:
```json
@@ -44,17 +51,24 @@ async def list_models() -> ModelsListResponse:
"models": [
{
"name": "gpt-4",
"model": "gpt-4",
"display_name": "GPT-4",
"description": "OpenAI GPT-4 model",
"supports_thinking": false
"supports_thinking": false,
"supports_reasoning_effort": false
},
{
"name": "claude-3-opus",
"model": "claude-3-opus",
"display_name": "Claude 3 Opus",
"description": "Anthropic Claude 3 Opus model",
"supports_thinking": true
"supports_thinking": true,
"supports_reasoning_effort": false
}
]
],
"token_usage": {
"enabled": true
}
}
```
"""
@@ -70,7 +84,10 @@ async def list_models() -> ModelsListResponse:
)
for model in config.models
]
return ModelsListResponse(models=models)
return ModelsListResponse(
models=models,
token_usage=TokenUsageResponse(enabled=config.token_usage.enabled),
)
@router.get(
+6 -1
View File
@@ -722,6 +722,10 @@ class DeerFlowClient:
Dict with "models" key containing list of model info dicts,
matching the Gateway API ``ModelsListResponse`` schema.
"""
token_usage_enabled = getattr(getattr(self._app_config, "token_usage", None), "enabled", False)
if not isinstance(token_usage_enabled, bool):
token_usage_enabled = False
return {
"models": [
{
@@ -733,7 +737,8 @@ class DeerFlowClient:
"supports_reasoning_effort": getattr(model, "supports_reasoning_effort", False),
}
for model in self._app_config.models
]
],
"token_usage": {"enabled": token_usage_enabled},
}
def list_skills(self, enabled_only: bool = False) -> dict:
+5
View File
@@ -38,6 +38,7 @@ def mock_app_config():
config = MagicMock()
config.models = [model]
config.token_usage.enabled = False
return config
@@ -107,6 +108,7 @@ class TestConfigQueries:
def test_list_models(self, client):
result = client.list_models()
assert "models" in result
assert result["token_usage"] == {"enabled": False}
assert len(result["models"]) == 1
assert result["models"][0]["name"] == "test-model"
# Verify Gateway-aligned fields are present
@@ -2196,7 +2198,9 @@ class TestGatewayConformance:
model.display_name = "Test Model"
model.description = "A test model"
model.supports_thinking = False
model.supports_reasoning_effort = False
mock_app_config.models = [model]
mock_app_config.token_usage.enabled = True
with patch("deerflow.client.get_app_config", return_value=mock_app_config):
client = DeerFlowClient()
@@ -2206,6 +2210,7 @@ class TestGatewayConformance:
assert len(parsed.models) == 1
assert parsed.models[0].name == "test-model"
assert parsed.models[0].model == "gpt-test"
assert parsed.token_usage.enabled is True
def test_get_model(self, mock_app_config):
model = MagicMock()