feat(responses): add usage types to inference and responses APIs

Add OpenAI-compatible usage tracking types:
- OpenAIChatCompletionUsage with prompt/completion token counts
- OpenAIResponseUsage with input/output token counts
- Token detail types for cached_tokens and reasoning_tokens
- Add usage field to chat completion and response objects

This enables reporting token consumption for both streaming and
non-streaming responses, matching OpenAI's usage reporting format.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Ashwin Bharambe 2025-10-09 21:12:29 -07:00
parent ebae0385bb
commit c92a1c99f0
8 changed files with 747 additions and 0 deletions

View file

@ -4277,6 +4277,10 @@
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information for the completion"
},
"input_messages": {
"type": "array",
"items": {
@ -4479,6 +4483,55 @@
"title": "OpenAIChatCompletionToolCallFunction",
"description": "Function call details for OpenAI-compatible tool calls."
},
"OpenAIChatCompletionUsage": {
"type": "object",
"properties": {
"prompt_tokens": {
"type": "integer",
"description": "Number of tokens in the prompt"
},
"completion_tokens": {
"type": "integer",
"description": "Number of tokens in the completion"
},
"total_tokens": {
"type": "integer",
"description": "Total tokens used (prompt + completion)"
},
"prompt_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer",
"description": "Number of tokens retrieved from cache"
}
},
"additionalProperties": false,
"title": "OpenAIChatCompletionUsagePromptTokensDetails",
"description": "Token details for prompt tokens in OpenAI chat completion usage."
},
"completion_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer",
"description": "Number of tokens used for reasoning (o1/o3 models)"
}
},
"additionalProperties": false,
"title": "OpenAIChatCompletionUsageCompletionTokensDetails",
"description": "Token details for output tokens in OpenAI chat completion usage."
}
},
"additionalProperties": false,
"required": [
"prompt_tokens",
"completion_tokens",
"total_tokens"
],
"title": "OpenAIChatCompletionUsage",
"description": "Usage information for OpenAI chat completion."
},
"OpenAIChoice": {
"type": "object",
"properties": {
@ -5241,6 +5294,10 @@
"model": {
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information for the completion"
}
},
"additionalProperties": false,
@ -5281,6 +5338,10 @@
"model": {
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information (typically included in final chunk with stream_options)"
}
},
"additionalProperties": false,
@ -5378,6 +5439,10 @@
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information for the completion"
},
"input_messages": {
"type": "array",
"items": {
@ -7503,6 +7568,10 @@
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"usage": {
"$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response"
},
"input": {
"type": "array",
"items": {
@ -7636,6 +7705,53 @@
"title": "OpenAIResponseText",
"description": "Text response configuration for OpenAI responses."
},
"OpenAIResponseUsage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer",
"description": "Number of tokens in the input"
},
"output_tokens": {
"type": "integer",
"description": "Number of tokens in the output"
},
"total_tokens": {
"type": "integer",
"description": "Total tokens used (input + output)"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer",
"description": "Number of tokens retrieved from cache"
}
},
"additionalProperties": false,
"description": "Detailed breakdown of input token usage"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer",
"description": "Number of tokens used for reasoning (o1/o3 models)"
}
},
"additionalProperties": false,
"description": "Detailed breakdown of output token usage"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"output_tokens",
"total_tokens"
],
"title": "OpenAIResponseUsage",
"description": "Usage information for OpenAI response."
},
"ResponseShieldSpec": {
"type": "object",
"properties": {
@ -8078,6 +8194,10 @@
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"usage": {
"$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response"
}
},
"additionalProperties": false,