feat: show token usage per assistant response (#2270)

* feat: show token usage per assistant response

* fix: align client models response with token usage

* fix: address token usage review feedback

* docs: clarify token usage config example

---------

Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
YuJitang
2026-04-16 08:56:49 +08:00
committed by GitHub
parent 0e16a7fe55
commit 105db00987
17 changed files with 271 additions and 50 deletions
+22 -5
View File
@@ -17,10 +17,17 @@ class ModelResponse(BaseModel):
supports_reasoning_effort: bool = Field(default=False, description="Whether model supports reasoning effort")
class TokenUsageResponse(BaseModel):
"""Token usage display configuration."""
enabled: bool = Field(default=False, description="Whether token usage display is enabled")
class ModelsListResponse(BaseModel):
"""Response model for listing all models."""
models: list[ModelResponse]
token_usage: TokenUsageResponse
@router.get(
@@ -36,7 +43,7 @@ async def list_models() -> ModelsListResponse:
excluding sensitive fields like API keys and internal configuration.
Returns:
A list of all configured models with their metadata.
A list of all configured models with their metadata and token usage display settings.
Example Response:
```json
@@ -44,17 +51,24 @@ async def list_models() -> ModelsListResponse:
"models": [
{
"name": "gpt-4",
"model": "gpt-4",
"display_name": "GPT-4",
"description": "OpenAI GPT-4 model",
"supports_thinking": false
"supports_thinking": false,
"supports_reasoning_effort": false
},
{
"name": "claude-3-opus",
"model": "claude-3-opus",
"display_name": "Claude 3 Opus",
"description": "Anthropic Claude 3 Opus model",
"supports_thinking": true
"supports_thinking": true,
"supports_reasoning_effort": false
}
]
],
"token_usage": {
"enabled": true
}
}
```
"""
@@ -70,7 +84,10 @@ async def list_models() -> ModelsListResponse:
)
for model in config.models
]
return ModelsListResponse(models=models)
return ModelsListResponse(
models=models,
token_usage=TokenUsageResponse(enabled=config.token_usage.enabled),
)
@router.get(