feat(responses): add usage types to inference and responses APIs (#3764)
Some checks failed
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 0s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 4s
Python Package Build Test / build (3.12) (push) Failing after 2s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Vector IO Integration Tests / test-matrix (push) Failing after 6s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
Test External API and Providers / test-external (venv) (push) Failing after 6s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
Python Package Build Test / build (3.13) (push) Failing after 23s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 27s
API Conformance Tests / check-schema-compatibility (push) Successful in 36s
UI Tests / ui-tests (22) (push) Successful in 55s
Pre-commit / pre-commit (push) Successful in 2m7s

## Summary
Adds OpenAI-compatible usage tracking types to enable reporting token
consumption for both streaming and non-streaming responses.

## Type Definitions
**Chat Completion Usage** (inference API):
```python
class OpenAIChatCompletionUsage(BaseModel):
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int
    prompt_tokens_details: OpenAIChatCompletionUsagePromptTokensDetails | None
    completion_tokens_details: OpenAIChatCompletionUsageCompletionTokensDetails | None
```

**Response Usage** (responses API):
```python
class OpenAIResponseUsage(BaseModel):
    input_tokens: int
    output_tokens: int
    total_tokens: int
    input_tokens_details: OpenAIResponseUsageInputTokensDetails | None
    output_tokens_details: OpenAIResponseUsageOutputTokensDetails | None
```

This matches OpenAI's usage reporting format and enables PR #3766 to
implement usage tracking in streaming responses.

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Ashwin Bharambe 2025-10-10 06:22:59 -07:00 committed by GitHub
parent ebae0385bb
commit aaf5036235
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 747 additions and 0 deletions

View file

@ -6781,6 +6781,10 @@
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information for the completion"
},
"input_messages": {
"type": "array",
"items": {
@ -6983,6 +6987,55 @@
"title": "OpenAIChatCompletionToolCallFunction",
"description": "Function call details for OpenAI-compatible tool calls."
},
"OpenAIChatCompletionUsage": {
"type": "object",
"properties": {
"prompt_tokens": {
"type": "integer",
"description": "Number of tokens in the prompt"
},
"completion_tokens": {
"type": "integer",
"description": "Number of tokens in the completion"
},
"total_tokens": {
"type": "integer",
"description": "Total tokens used (prompt + completion)"
},
"prompt_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer",
"description": "Number of tokens retrieved from cache"
}
},
"additionalProperties": false,
"title": "OpenAIChatCompletionUsagePromptTokensDetails",
"description": "Token details for prompt tokens in OpenAI chat completion usage."
},
"completion_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer",
"description": "Number of tokens used for reasoning (o1/o3 models)"
}
},
"additionalProperties": false,
"title": "OpenAIChatCompletionUsageCompletionTokensDetails",
"description": "Token details for output tokens in OpenAI chat completion usage."
}
},
"additionalProperties": false,
"required": [
"prompt_tokens",
"completion_tokens",
"total_tokens"
],
"title": "OpenAIChatCompletionUsage",
"description": "Usage information for OpenAI chat completion."
},
"OpenAIChoice": {
"type": "object",
"properties": {
@ -7745,6 +7798,10 @@
"model": {
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information for the completion"
}
},
"additionalProperties": false,
@ -7785,6 +7842,10 @@
"model": {
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information (typically included in final chunk with stream_options)"
}
},
"additionalProperties": false,
@ -7882,6 +7943,10 @@
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information for the completion"
},
"input_messages": {
"type": "array",
"items": {
@ -9096,6 +9161,10 @@
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"usage": {
"$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response"
},
"input": {
"type": "array",
"items": {
@ -9541,6 +9610,53 @@
"title": "OpenAIResponseText",
"description": "Text response configuration for OpenAI responses."
},
"OpenAIResponseUsage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer",
"description": "Number of tokens in the input"
},
"output_tokens": {
"type": "integer",
"description": "Number of tokens in the output"
},
"total_tokens": {
"type": "integer",
"description": "Total tokens used (input + output)"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer",
"description": "Number of tokens retrieved from cache"
}
},
"additionalProperties": false,
"description": "Detailed breakdown of input token usage"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer",
"description": "Number of tokens used for reasoning (o1/o3 models)"
}
},
"additionalProperties": false,
"description": "Detailed breakdown of output token usage"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"output_tokens",
"total_tokens"
],
"title": "OpenAIResponseUsage",
"description": "Usage information for OpenAI response."
},
"ResponseShieldSpec": {
"type": "object",
"properties": {
@ -9983,6 +10099,10 @@
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"usage": {
"$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response"
}
},
"additionalProperties": false,

View file

@ -4999,6 +4999,10 @@ components:
type: string
description: >-
The model that was used to generate the chat completion
usage:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
input_messages:
type: array
items:
@ -5165,6 +5169,49 @@ components:
title: OpenAIChatCompletionToolCallFunction
description: >-
Function call details for OpenAI-compatible tool calls.
OpenAIChatCompletionUsage:
type: object
properties:
prompt_tokens:
type: integer
description: Number of tokens in the prompt
completion_tokens:
type: integer
description: Number of tokens in the completion
total_tokens:
type: integer
description: Total tokens used (prompt + completion)
prompt_tokens_details:
type: object
properties:
cached_tokens:
type: integer
description: Number of tokens retrieved from cache
additionalProperties: false
title: >-
OpenAIChatCompletionUsagePromptTokensDetails
description: >-
Token details for prompt tokens in OpenAI chat completion usage.
completion_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
description: >-
Number of tokens used for reasoning (o1/o3 models)
additionalProperties: false
title: >-
OpenAIChatCompletionUsageCompletionTokensDetails
description: >-
Token details for output tokens in OpenAI chat completion usage.
additionalProperties: false
required:
- prompt_tokens
- completion_tokens
- total_tokens
title: OpenAIChatCompletionUsage
description: >-
Usage information for OpenAI chat completion.
OpenAIChoice:
type: object
properties:
@ -5696,6 +5743,10 @@ components:
type: string
description: >-
The model that was used to generate the chat completion
usage:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
additionalProperties: false
required:
- id
@ -5731,6 +5782,10 @@ components:
type: string
description: >-
The model that was used to generate the chat completion
usage:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information (typically included in final chunk with stream_options)
additionalProperties: false
required:
- id
@ -5811,6 +5866,10 @@ components:
type: string
description: >-
The model that was used to generate the chat completion
usage:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
input_messages:
type: array
items:
@ -6747,6 +6806,10 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
usage:
$ref: '#/components/schemas/OpenAIResponseUsage'
description: >-
(Optional) Token usage information for the response
input:
type: array
items:
@ -7095,6 +7158,42 @@ components:
title: OpenAIResponseText
description: >-
Text response configuration for OpenAI responses.
OpenAIResponseUsage:
type: object
properties:
input_tokens:
type: integer
description: Number of tokens in the input
output_tokens:
type: integer
description: Number of tokens in the output
total_tokens:
type: integer
description: Total tokens used (input + output)
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
description: Number of tokens retrieved from cache
additionalProperties: false
description: Detailed breakdown of input token usage
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
description: >-
Number of tokens used for reasoning (o1/o3 models)
additionalProperties: false
description: Detailed breakdown of output token usage
additionalProperties: false
required:
- input_tokens
- output_tokens
- total_tokens
title: OpenAIResponseUsage
description: Usage information for OpenAI response.
ResponseShieldSpec:
type: object
properties:
@ -7421,6 +7520,10 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
usage:
$ref: '#/components/schemas/OpenAIResponseUsage'
description: >-
(Optional) Token usage information for the response
additionalProperties: false
required:
- created_at

View file

@ -4277,6 +4277,10 @@
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information for the completion"
},
"input_messages": {
"type": "array",
"items": {
@ -4479,6 +4483,55 @@
"title": "OpenAIChatCompletionToolCallFunction",
"description": "Function call details for OpenAI-compatible tool calls."
},
"OpenAIChatCompletionUsage": {
"type": "object",
"properties": {
"prompt_tokens": {
"type": "integer",
"description": "Number of tokens in the prompt"
},
"completion_tokens": {
"type": "integer",
"description": "Number of tokens in the completion"
},
"total_tokens": {
"type": "integer",
"description": "Total tokens used (prompt + completion)"
},
"prompt_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer",
"description": "Number of tokens retrieved from cache"
}
},
"additionalProperties": false,
"title": "OpenAIChatCompletionUsagePromptTokensDetails",
"description": "Token details for prompt tokens in OpenAI chat completion usage."
},
"completion_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer",
"description": "Number of tokens used for reasoning (o1/o3 models)"
}
},
"additionalProperties": false,
"title": "OpenAIChatCompletionUsageCompletionTokensDetails",
"description": "Token details for output tokens in OpenAI chat completion usage."
}
},
"additionalProperties": false,
"required": [
"prompt_tokens",
"completion_tokens",
"total_tokens"
],
"title": "OpenAIChatCompletionUsage",
"description": "Usage information for OpenAI chat completion."
},
"OpenAIChoice": {
"type": "object",
"properties": {
@ -5241,6 +5294,10 @@
"model": {
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information for the completion"
}
},
"additionalProperties": false,
@ -5281,6 +5338,10 @@
"model": {
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information (typically included in final chunk with stream_options)"
}
},
"additionalProperties": false,
@ -5378,6 +5439,10 @@
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information for the completion"
},
"input_messages": {
"type": "array",
"items": {
@ -7503,6 +7568,10 @@
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"usage": {
"$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response"
},
"input": {
"type": "array",
"items": {
@ -7636,6 +7705,53 @@
"title": "OpenAIResponseText",
"description": "Text response configuration for OpenAI responses."
},
"OpenAIResponseUsage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer",
"description": "Number of tokens in the input"
},
"output_tokens": {
"type": "integer",
"description": "Number of tokens in the output"
},
"total_tokens": {
"type": "integer",
"description": "Total tokens used (input + output)"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer",
"description": "Number of tokens retrieved from cache"
}
},
"additionalProperties": false,
"description": "Detailed breakdown of input token usage"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer",
"description": "Number of tokens used for reasoning (o1/o3 models)"
}
},
"additionalProperties": false,
"description": "Detailed breakdown of output token usage"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"output_tokens",
"total_tokens"
],
"title": "OpenAIResponseUsage",
"description": "Usage information for OpenAI response."
},
"ResponseShieldSpec": {
"type": "object",
"properties": {
@ -8078,6 +8194,10 @@
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"usage": {
"$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response"
}
},
"additionalProperties": false,

View file

@ -3248,6 +3248,10 @@ components:
type: string
description: >-
The model that was used to generate the chat completion
usage:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
input_messages:
type: array
items:
@ -3414,6 +3418,49 @@ components:
title: OpenAIChatCompletionToolCallFunction
description: >-
Function call details for OpenAI-compatible tool calls.
OpenAIChatCompletionUsage:
type: object
properties:
prompt_tokens:
type: integer
description: Number of tokens in the prompt
completion_tokens:
type: integer
description: Number of tokens in the completion
total_tokens:
type: integer
description: Total tokens used (prompt + completion)
prompt_tokens_details:
type: object
properties:
cached_tokens:
type: integer
description: Number of tokens retrieved from cache
additionalProperties: false
title: >-
OpenAIChatCompletionUsagePromptTokensDetails
description: >-
Token details for prompt tokens in OpenAI chat completion usage.
completion_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
description: >-
Number of tokens used for reasoning (o1/o3 models)
additionalProperties: false
title: >-
OpenAIChatCompletionUsageCompletionTokensDetails
description: >-
Token details for output tokens in OpenAI chat completion usage.
additionalProperties: false
required:
- prompt_tokens
- completion_tokens
- total_tokens
title: OpenAIChatCompletionUsage
description: >-
Usage information for OpenAI chat completion.
OpenAIChoice:
type: object
properties:
@ -3945,6 +3992,10 @@ components:
type: string
description: >-
The model that was used to generate the chat completion
usage:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
additionalProperties: false
required:
- id
@ -3980,6 +4031,10 @@ components:
type: string
description: >-
The model that was used to generate the chat completion
usage:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information (typically included in final chunk with stream_options)
additionalProperties: false
required:
- id
@ -4060,6 +4115,10 @@ components:
type: string
description: >-
The model that was used to generate the chat completion
usage:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
input_messages:
type: array
items:
@ -5700,6 +5759,10 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
usage:
$ref: '#/components/schemas/OpenAIResponseUsage'
description: >-
(Optional) Token usage information for the response
input:
type: array
items:
@ -5791,6 +5854,42 @@ components:
title: OpenAIResponseText
description: >-
Text response configuration for OpenAI responses.
OpenAIResponseUsage:
type: object
properties:
input_tokens:
type: integer
description: Number of tokens in the input
output_tokens:
type: integer
description: Number of tokens in the output
total_tokens:
type: integer
description: Total tokens used (input + output)
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
description: Number of tokens retrieved from cache
additionalProperties: false
description: Detailed breakdown of input token usage
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
description: >-
Number of tokens used for reasoning (o1/o3 models)
additionalProperties: false
description: Detailed breakdown of output token usage
additionalProperties: false
required:
- input_tokens
- output_tokens
- total_tokens
title: OpenAIResponseUsage
description: Usage information for OpenAI response.
ResponseShieldSpec:
type: object
properties:
@ -6117,6 +6216,10 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
usage:
$ref: '#/components/schemas/OpenAIResponseUsage'
description: >-
(Optional) Token usage information for the response
additionalProperties: false
required:
- created_at

View file

@ -6286,6 +6286,10 @@
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information for the completion"
},
"input_messages": {
"type": "array",
"items": {
@ -6488,6 +6492,55 @@
"title": "OpenAIChatCompletionToolCallFunction",
"description": "Function call details for OpenAI-compatible tool calls."
},
"OpenAIChatCompletionUsage": {
"type": "object",
"properties": {
"prompt_tokens": {
"type": "integer",
"description": "Number of tokens in the prompt"
},
"completion_tokens": {
"type": "integer",
"description": "Number of tokens in the completion"
},
"total_tokens": {
"type": "integer",
"description": "Total tokens used (prompt + completion)"
},
"prompt_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer",
"description": "Number of tokens retrieved from cache"
}
},
"additionalProperties": false,
"title": "OpenAIChatCompletionUsagePromptTokensDetails",
"description": "Token details for prompt tokens in OpenAI chat completion usage."
},
"completion_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer",
"description": "Number of tokens used for reasoning (o1/o3 models)"
}
},
"additionalProperties": false,
"title": "OpenAIChatCompletionUsageCompletionTokensDetails",
"description": "Token details for output tokens in OpenAI chat completion usage."
}
},
"additionalProperties": false,
"required": [
"prompt_tokens",
"completion_tokens",
"total_tokens"
],
"title": "OpenAIChatCompletionUsage",
"description": "Usage information for OpenAI chat completion."
},
"OpenAIChoice": {
"type": "object",
"properties": {
@ -7250,6 +7303,10 @@
"model": {
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information for the completion"
}
},
"additionalProperties": false,
@ -7290,6 +7347,10 @@
"model": {
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information (typically included in final chunk with stream_options)"
}
},
"additionalProperties": false,
@ -7387,6 +7448,10 @@
"type": "string",
"description": "The model that was used to generate the chat completion"
},
"usage": {
"$ref": "#/components/schemas/OpenAIChatCompletionUsage",
"description": "Token usage information for the completion"
},
"input_messages": {
"type": "array",
"items": {
@ -9512,6 +9577,10 @@
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"usage": {
"$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response"
},
"input": {
"type": "array",
"items": {
@ -9645,6 +9714,53 @@
"title": "OpenAIResponseText",
"description": "Text response configuration for OpenAI responses."
},
"OpenAIResponseUsage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer",
"description": "Number of tokens in the input"
},
"output_tokens": {
"type": "integer",
"description": "Number of tokens in the output"
},
"total_tokens": {
"type": "integer",
"description": "Total tokens used (input + output)"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer",
"description": "Number of tokens retrieved from cache"
}
},
"additionalProperties": false,
"description": "Detailed breakdown of input token usage"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer",
"description": "Number of tokens used for reasoning (o1/o3 models)"
}
},
"additionalProperties": false,
"description": "Detailed breakdown of output token usage"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"output_tokens",
"total_tokens"
],
"title": "OpenAIResponseUsage",
"description": "Usage information for OpenAI response."
},
"ResponseShieldSpec": {
"type": "object",
"properties": {
@ -10087,6 +10203,10 @@
"truncation": {
"type": "string",
"description": "(Optional) Truncation strategy applied to the response"
},
"usage": {
"$ref": "#/components/schemas/OpenAIResponseUsage",
"description": "(Optional) Token usage information for the response"
}
},
"additionalProperties": false,

View file

@ -4693,6 +4693,10 @@ components:
type: string
description: >-
The model that was used to generate the chat completion
usage:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
input_messages:
type: array
items:
@ -4859,6 +4863,49 @@ components:
title: OpenAIChatCompletionToolCallFunction
description: >-
Function call details for OpenAI-compatible tool calls.
OpenAIChatCompletionUsage:
type: object
properties:
prompt_tokens:
type: integer
description: Number of tokens in the prompt
completion_tokens:
type: integer
description: Number of tokens in the completion
total_tokens:
type: integer
description: Total tokens used (prompt + completion)
prompt_tokens_details:
type: object
properties:
cached_tokens:
type: integer
description: Number of tokens retrieved from cache
additionalProperties: false
title: >-
OpenAIChatCompletionUsagePromptTokensDetails
description: >-
Token details for prompt tokens in OpenAI chat completion usage.
completion_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
description: >-
Number of tokens used for reasoning (o1/o3 models)
additionalProperties: false
title: >-
OpenAIChatCompletionUsageCompletionTokensDetails
description: >-
Token details for output tokens in OpenAI chat completion usage.
additionalProperties: false
required:
- prompt_tokens
- completion_tokens
- total_tokens
title: OpenAIChatCompletionUsage
description: >-
Usage information for OpenAI chat completion.
OpenAIChoice:
type: object
properties:
@ -5390,6 +5437,10 @@ components:
type: string
description: >-
The model that was used to generate the chat completion
usage:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
additionalProperties: false
required:
- id
@ -5425,6 +5476,10 @@ components:
type: string
description: >-
The model that was used to generate the chat completion
usage:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information (typically included in final chunk with stream_options)
additionalProperties: false
required:
- id
@ -5505,6 +5560,10 @@ components:
type: string
description: >-
The model that was used to generate the chat completion
usage:
$ref: '#/components/schemas/OpenAIChatCompletionUsage'
description: >-
Token usage information for the completion
input_messages:
type: array
items:
@ -7145,6 +7204,10 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
usage:
$ref: '#/components/schemas/OpenAIResponseUsage'
description: >-
(Optional) Token usage information for the response
input:
type: array
items:
@ -7236,6 +7299,42 @@ components:
title: OpenAIResponseText
description: >-
Text response configuration for OpenAI responses.
OpenAIResponseUsage:
type: object
properties:
input_tokens:
type: integer
description: Number of tokens in the input
output_tokens:
type: integer
description: Number of tokens in the output
total_tokens:
type: integer
description: Total tokens used (input + output)
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
description: Number of tokens retrieved from cache
additionalProperties: false
description: Detailed breakdown of input token usage
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
description: >-
Number of tokens used for reasoning (o1/o3 models)
additionalProperties: false
description: Detailed breakdown of output token usage
additionalProperties: false
required:
- input_tokens
- output_tokens
- total_tokens
title: OpenAIResponseUsage
description: Usage information for OpenAI response.
ResponseShieldSpec:
type: object
properties:
@ -7562,6 +7661,10 @@ components:
type: string
description: >-
(Optional) Truncation strategy applied to the response
usage:
$ref: '#/components/schemas/OpenAIResponseUsage'
description: >-
(Optional) Token usage information for the response
additionalProperties: false
required:
- created_at