Inference

This commit is contained in:
Sai Soundararaj 2025-07-01 15:33:38 -07:00
parent c5fd9886ae
commit 8fc72e4669
3 changed files with 297 additions and 53 deletions

View file

@ -5144,14 +5144,16 @@
"type": { "type": {
"type": "string", "type": "string",
"const": "greedy", "const": "greedy",
"default": "greedy" "default": "greedy",
"description": "Must be \"greedy\" to identify this sampling strategy"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type"
], ],
"title": "GreedySamplingStrategy" "title": "GreedySamplingStrategy",
"description": "Greedy sampling strategy that selects the highest probability token at each step."
}, },
"ImageContentItem": { "ImageContentItem": {
"type": "object", "type": "object",
@ -5671,10 +5673,12 @@
"type": { "type": {
"type": "string", "type": "string",
"const": "top_k", "const": "top_k",
"default": "top_k" "default": "top_k",
"description": "Must be \"top_k\" to identify this sampling strategy"
}, },
"top_k": { "top_k": {
"type": "integer" "type": "integer",
"description": "Number of top tokens to consider for sampling. Must be at least 1"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -5682,7 +5686,8 @@
"type", "type",
"top_k" "top_k"
], ],
"title": "TopKSamplingStrategy" "title": "TopKSamplingStrategy",
"description": "Top-k sampling strategy that restricts sampling to the k most likely tokens."
}, },
"TopPSamplingStrategy": { "TopPSamplingStrategy": {
"type": "object", "type": "object",
@ -5690,21 +5695,25 @@
"type": { "type": {
"type": "string", "type": "string",
"const": "top_p", "const": "top_p",
"default": "top_p" "default": "top_p",
"description": "Must be \"top_p\" to identify this sampling strategy"
}, },
"temperature": { "temperature": {
"type": "number" "type": "number",
"description": "Controls randomness in sampling. Higher values increase randomness"
}, },
"top_p": { "top_p": {
"type": "number", "type": "number",
"default": 0.95 "default": 0.95,
"description": "Cumulative probability threshold for nucleus sampling. Defaults to 0.95"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type"
], ],
"title": "TopPSamplingStrategy" "title": "TopPSamplingStrategy",
"description": "Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p."
}, },
"URL": { "URL": {
"type": "object", "type": "object",
@ -5808,14 +5817,16 @@
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/ChatCompletionResponse" "$ref": "#/components/schemas/ChatCompletionResponse"
} },
"description": "List of chat completion responses, one for each conversation in the batch"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"batch" "batch"
], ],
"title": "BatchChatCompletionResponse" "title": "BatchChatCompletionResponse",
"description": "Response from a batch chat completion request."
}, },
"ChatCompletionResponse": { "ChatCompletionResponse": {
"type": "object", "type": "object",
@ -5939,14 +5950,16 @@
"type": "array", "type": "array",
"items": { "items": {
"$ref": "#/components/schemas/CompletionResponse" "$ref": "#/components/schemas/CompletionResponse"
} },
"description": "List of completion responses, one for each input in the batch"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"batch" "batch"
], ],
"title": "BatchCompletionResponse" "title": "BatchCompletionResponse",
"description": "Response from a batch completion request."
}, },
"CompletionResponse": { "CompletionResponse": {
"type": "object", "type": "object",
@ -6938,7 +6951,8 @@
"type": "object", "type": "object",
"properties": { "properties": {
"call_id": { "call_id": {
"type": "string" "type": "string",
"description": "Unique identifier for the tool call this response is for"
}, },
"tool_name": { "tool_name": {
"oneOf": [ "oneOf": [
@ -6955,10 +6969,12 @@
{ {
"type": "string" "type": "string"
} }
] ],
"description": "Name of the tool that was invoked"
}, },
"content": { "content": {
"$ref": "#/components/schemas/InterleavedContent" "$ref": "#/components/schemas/InterleavedContent",
"description": "The response content from the tool"
}, },
"metadata": { "metadata": {
"type": "object", "type": "object",
@ -6983,7 +6999,8 @@
"type": "object" "type": "object"
} }
] ]
} },
"description": "(Optional) Additional metadata about the tool response"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -6992,7 +7009,8 @@
"tool_name", "tool_name",
"content" "content"
], ],
"title": "ToolResponse" "title": "ToolResponse",
"description": "Response from a tool invocation."
}, },
"Turn": { "Turn": {
"type": "object", "type": "object",
@ -9838,10 +9856,12 @@
"type": { "type": {
"type": "string", "type": "string",
"const": "image_url", "const": "image_url",
"default": "image_url" "default": "image_url",
"description": "Must be \"image_url\" to identify this as image content"
}, },
"image_url": { "image_url": {
"$ref": "#/components/schemas/OpenAIImageURL" "$ref": "#/components/schemas/OpenAIImageURL",
"description": "Image URL specification and processing details"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -9849,7 +9869,8 @@
"type", "type",
"image_url" "image_url"
], ],
"title": "OpenAIChatCompletionContentPartImageParam" "title": "OpenAIChatCompletionContentPartImageParam",
"description": "Image content part for OpenAI-compatible chat completion messages."
}, },
"OpenAIChatCompletionContentPartParam": { "OpenAIChatCompletionContentPartParam": {
"oneOf": [ "oneOf": [
@ -9874,10 +9895,12 @@
"type": { "type": {
"type": "string", "type": "string",
"const": "text", "const": "text",
"default": "text" "default": "text",
"description": "Must be \"text\" to identify this as text content"
}, },
"text": { "text": {
"type": "string" "type": "string",
"description": "The text content of the message"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -9885,44 +9908,53 @@
"type", "type",
"text" "text"
], ],
"title": "OpenAIChatCompletionContentPartTextParam" "title": "OpenAIChatCompletionContentPartTextParam",
"description": "Text content part for OpenAI-compatible chat completion messages."
}, },
"OpenAIChatCompletionToolCall": { "OpenAIChatCompletionToolCall": {
"type": "object", "type": "object",
"properties": { "properties": {
"index": { "index": {
"type": "integer" "type": "integer",
"description": "(Optional) Index of the tool call in the list"
}, },
"id": { "id": {
"type": "string" "type": "string",
"description": "(Optional) Unique identifier for the tool call"
}, },
"type": { "type": {
"type": "string", "type": "string",
"const": "function", "const": "function",
"default": "function" "default": "function",
"description": "Must be \"function\" to identify this as a function call"
}, },
"function": { "function": {
"$ref": "#/components/schemas/OpenAIChatCompletionToolCallFunction" "$ref": "#/components/schemas/OpenAIChatCompletionToolCallFunction",
"description": "(Optional) Function call details"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type"
], ],
"title": "OpenAIChatCompletionToolCall" "title": "OpenAIChatCompletionToolCall",
"description": "Tool call specification for OpenAI-compatible chat completion responses."
}, },
"OpenAIChatCompletionToolCallFunction": { "OpenAIChatCompletionToolCallFunction": {
"type": "object", "type": "object",
"properties": { "properties": {
"name": { "name": {
"type": "string" "type": "string",
"description": "(Optional) Name of the function to call"
}, },
"arguments": { "arguments": {
"type": "string" "type": "string",
"description": "(Optional) Arguments to pass to the function as a JSON string"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"title": "OpenAIChatCompletionToolCallFunction" "title": "OpenAIChatCompletionToolCallFunction",
"description": "Function call details for OpenAI-compatible tool calls."
}, },
"OpenAIChoice": { "OpenAIChoice": {
"type": "object", "type": "object",
@ -10015,17 +10047,20 @@
"type": "object", "type": "object",
"properties": { "properties": {
"url": { "url": {
"type": "string" "type": "string",
"description": "URL of the image to include in the message"
}, },
"detail": { "detail": {
"type": "string" "type": "string",
"description": "(Optional) Level of detail for image processing. Can be \"low\", \"high\", or \"auto\""
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"url" "url"
], ],
"title": "OpenAIImageURL" "title": "OpenAIImageURL",
"description": "Image URL specification for OpenAI-compatible chat completion messages."
}, },
"OpenAIMessageParam": { "OpenAIMessageParam": {
"oneOf": [ "oneOf": [
@ -11882,21 +11917,26 @@
"input_messages" "input_messages"
], ],
"title": "OpenAICompletionWithInputMessages" "title": "OpenAICompletionWithInputMessages"
} },
"description": "List of chat completion objects with their input messages"
}, },
"has_more": { "has_more": {
"type": "boolean" "type": "boolean",
"description": "Whether there are more completions available beyond this list"
}, },
"first_id": { "first_id": {
"type": "string" "type": "string",
"description": "ID of the first completion in this list"
}, },
"last_id": { "last_id": {
"type": "string" "type": "string",
"description": "ID of the last completion in this list"
}, },
"object": { "object": {
"type": "string", "type": "string",
"const": "list", "const": "list",
"default": "list" "default": "list",
"description": "Must be \"list\" to identify this as a list response"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -11907,7 +11947,8 @@
"last_id", "last_id",
"object" "object"
], ],
"title": "ListOpenAIChatCompletionResponse" "title": "ListOpenAIChatCompletionResponse",
"description": "Response from listing OpenAI-compatible chat completions."
}, },
"ListDatasetsResponse": { "ListDatasetsResponse": {
"type": "object", "type": "object",
@ -12762,13 +12803,16 @@
"type": "object", "type": "object",
"properties": { "properties": {
"name": { "name": {
"type": "string" "type": "string",
"description": "Name of the schema"
}, },
"description": { "description": {
"type": "string" "type": "string",
"description": "(Optional) Description of the schema"
}, },
"strict": { "strict": {
"type": "boolean" "type": "boolean",
"description": "(Optional) Whether to enforce strict adherence to the schema"
}, },
"schema": { "schema": {
"type": "object", "type": "object",
@ -12793,14 +12837,16 @@
"type": "object" "type": "object"
} }
] ]
} },
"description": "(Optional) The JSON schema definition"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"name" "name"
], ],
"title": "OpenAIJSONSchema" "title": "OpenAIJSONSchema",
"description": "JSON schema specification for OpenAI-compatible structured response format."
}, },
"OpenAIResponseFormatJSONObject": { "OpenAIResponseFormatJSONObject": {
"type": "object", "type": "object",
@ -12808,14 +12854,16 @@
"type": { "type": {
"type": "string", "type": "string",
"const": "json_object", "const": "json_object",
"default": "json_object" "default": "json_object",
"description": "Must be \"json_object\" to indicate generic JSON object response format"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type"
], ],
"title": "OpenAIResponseFormatJSONObject" "title": "OpenAIResponseFormatJSONObject",
"description": "JSON object response format for OpenAI-compatible chat completion requests."
}, },
"OpenAIResponseFormatJSONSchema": { "OpenAIResponseFormatJSONSchema": {
"type": "object", "type": "object",
@ -12823,10 +12871,12 @@
"type": { "type": {
"type": "string", "type": "string",
"const": "json_schema", "const": "json_schema",
"default": "json_schema" "default": "json_schema",
"description": "Must be \"json_schema\" to indicate structured JSON response format"
}, },
"json_schema": { "json_schema": {
"$ref": "#/components/schemas/OpenAIJSONSchema" "$ref": "#/components/schemas/OpenAIJSONSchema",
"description": "The JSON schema specification for the response"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
@ -12834,7 +12884,8 @@
"type", "type",
"json_schema" "json_schema"
], ],
"title": "OpenAIResponseFormatJSONSchema" "title": "OpenAIResponseFormatJSONSchema",
"description": "JSON schema response format for OpenAI-compatible chat completion requests."
}, },
"OpenAIResponseFormatParam": { "OpenAIResponseFormatParam": {
"oneOf": [ "oneOf": [
@ -12863,14 +12914,16 @@
"type": { "type": {
"type": "string", "type": "string",
"const": "text", "const": "text",
"default": "text" "default": "text",
"description": "Must be \"text\" to indicate plain text response format"
} }
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"type" "type"
], ],
"title": "OpenAIResponseFormatText" "title": "OpenAIResponseFormatText",
"description": "Text response format for OpenAI-compatible chat completion requests."
}, },
"OpenaiChatCompletionRequest": { "OpenaiChatCompletionRequest": {
"type": "object", "type": "object",

View file

@ -3636,10 +3636,15 @@ components:
type: string type: string
const: greedy const: greedy
default: greedy default: greedy
description: >-
Must be "greedy" to identify this sampling strategy
additionalProperties: false additionalProperties: false
required: required:
- type - type
title: GreedySamplingStrategy title: GreedySamplingStrategy
description: >-
Greedy sampling strategy that selects the highest probability token at each
step.
ImageContentItem: ImageContentItem:
type: object type: object
properties: properties:
@ -3997,13 +4002,19 @@ components:
type: string type: string
const: top_k const: top_k
default: top_k default: top_k
description: >-
Must be "top_k" to identify this sampling strategy
top_k: top_k:
type: integer type: integer
description: >-
Number of top tokens to consider for sampling. Must be at least 1
additionalProperties: false additionalProperties: false
required: required:
- type - type
- top_k - top_k
title: TopKSamplingStrategy title: TopKSamplingStrategy
description: >-
Top-k sampling strategy that restricts sampling to the k most likely tokens.
TopPSamplingStrategy: TopPSamplingStrategy:
type: object type: object
properties: properties:
@ -4011,15 +4022,24 @@ components:
type: string type: string
const: top_p const: top_p
default: top_p default: top_p
description: >-
Must be "top_p" to identify this sampling strategy
temperature: temperature:
type: number type: number
description: >-
Controls randomness in sampling. Higher values increase randomness
top_p: top_p:
type: number type: number
default: 0.95 default: 0.95
description: >-
Cumulative probability threshold for nucleus sampling. Defaults to 0.95
additionalProperties: false additionalProperties: false
required: required:
- type - type
title: TopPSamplingStrategy title: TopPSamplingStrategy
description: >-
Top-p (nucleus) sampling strategy that samples from the smallest set of tokens
with cumulative probability >= p.
URL: URL:
type: object type: object
properties: properties:
@ -4111,10 +4131,14 @@ components:
type: array type: array
items: items:
$ref: '#/components/schemas/ChatCompletionResponse' $ref: '#/components/schemas/ChatCompletionResponse'
description: >-
List of chat completion responses, one for each conversation in the batch
additionalProperties: false additionalProperties: false
required: required:
- batch - batch
title: BatchChatCompletionResponse title: BatchChatCompletionResponse
description: >-
Response from a batch chat completion request.
ChatCompletionResponse: ChatCompletionResponse:
type: object type: object
properties: properties:
@ -4211,10 +4235,14 @@ components:
type: array type: array
items: items:
$ref: '#/components/schemas/CompletionResponse' $ref: '#/components/schemas/CompletionResponse'
description: >-
List of completion responses, one for each input in the batch
additionalProperties: false additionalProperties: false
required: required:
- batch - batch
title: BatchCompletionResponse title: BatchCompletionResponse
description: >-
Response from a batch completion request.
CompletionResponse: CompletionResponse:
type: object type: object
properties: properties:
@ -4967,6 +4995,8 @@ components:
properties: properties:
call_id: call_id:
type: string type: string
description: >-
Unique identifier for the tool call this response is for
tool_name: tool_name:
oneOf: oneOf:
- type: string - type: string
@ -4977,8 +5007,10 @@ components:
- code_interpreter - code_interpreter
title: BuiltinTool title: BuiltinTool
- type: string - type: string
description: Name of the tool that was invoked
content: content:
$ref: '#/components/schemas/InterleavedContent' $ref: '#/components/schemas/InterleavedContent'
description: The response content from the tool
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -4989,12 +5021,15 @@ components:
- type: string - type: string
- type: array - type: array
- type: object - type: object
description: >-
(Optional) Additional metadata about the tool response
additionalProperties: false additionalProperties: false
required: required:
- call_id - call_id
- tool_name - tool_name
- content - content
title: ToolResponse title: ToolResponse
description: Response from a tool invocation.
Turn: Turn:
type: object type: object
properties: properties:
@ -6991,14 +7026,20 @@ components:
type: string type: string
const: image_url const: image_url
default: image_url default: image_url
description: >-
Must be "image_url" to identify this as image content
image_url: image_url:
$ref: '#/components/schemas/OpenAIImageURL' $ref: '#/components/schemas/OpenAIImageURL'
description: >-
Image URL specification and processing details
additionalProperties: false additionalProperties: false
required: required:
- type - type
- image_url - image_url
title: >- title: >-
OpenAIChatCompletionContentPartImageParam OpenAIChatCompletionContentPartImageParam
description: >-
Image content part for OpenAI-compatible chat completion messages.
OpenAIChatCompletionContentPartParam: OpenAIChatCompletionContentPartParam:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam' - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
@ -7015,39 +7056,58 @@ components:
type: string type: string
const: text const: text
default: text default: text
description: >-
Must be "text" to identify this as text content
text: text:
type: string type: string
description: The text content of the message
additionalProperties: false additionalProperties: false
required: required:
- type - type
- text - text
title: OpenAIChatCompletionContentPartTextParam title: OpenAIChatCompletionContentPartTextParam
description: >-
Text content part for OpenAI-compatible chat completion messages.
OpenAIChatCompletionToolCall: OpenAIChatCompletionToolCall:
type: object type: object
properties: properties:
index: index:
type: integer type: integer
description: >-
(Optional) Index of the tool call in the list
id: id:
type: string type: string
description: >-
(Optional) Unique identifier for the tool call
type: type:
type: string type: string
const: function const: function
default: function default: function
description: >-
Must be "function" to identify this as a function call
function: function:
$ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction' $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
description: (Optional) Function call details
additionalProperties: false additionalProperties: false
required: required:
- type - type
title: OpenAIChatCompletionToolCall title: OpenAIChatCompletionToolCall
description: >-
Tool call specification for OpenAI-compatible chat completion responses.
OpenAIChatCompletionToolCallFunction: OpenAIChatCompletionToolCallFunction:
type: object type: object
properties: properties:
name: name:
type: string type: string
description: (Optional) Name of the function to call
arguments: arguments:
type: string type: string
description: >-
(Optional) Arguments to pass to the function as a JSON string
additionalProperties: false additionalProperties: false
title: OpenAIChatCompletionToolCallFunction title: OpenAIChatCompletionToolCallFunction
description: >-
Function call details for OpenAI-compatible tool calls.
OpenAIChoice: OpenAIChoice:
type: object type: object
properties: properties:
@ -7124,12 +7184,19 @@ components:
properties: properties:
url: url:
type: string type: string
description: >-
URL of the image to include in the message
detail: detail:
type: string type: string
description: >-
(Optional) Level of detail for image processing. Can be "low", "high",
or "auto"
additionalProperties: false additionalProperties: false
required: required:
- url - url
title: OpenAIImageURL title: OpenAIImageURL
description: >-
Image URL specification for OpenAI-compatible chat completion messages.
OpenAIMessageParam: OpenAIMessageParam:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIUserMessageParam' - $ref: '#/components/schemas/OpenAIUserMessageParam'
@ -8405,16 +8472,24 @@ components:
- model - model
- input_messages - input_messages
title: OpenAICompletionWithInputMessages title: OpenAICompletionWithInputMessages
description: >-
List of chat completion objects with their input messages
has_more: has_more:
type: boolean type: boolean
description: >-
Whether there are more completions available beyond this list
first_id: first_id:
type: string type: string
description: ID of the first completion in this list
last_id: last_id:
type: string type: string
description: ID of the last completion in this list
object: object:
type: string type: string
const: list const: list
default: list default: list
description: >-
Must be "list" to identify this as a list response
additionalProperties: false additionalProperties: false
required: required:
- data - data
@ -8423,6 +8498,8 @@ components:
- last_id - last_id
- object - object
title: ListOpenAIChatCompletionResponse title: ListOpenAIChatCompletionResponse
description: >-
Response from listing OpenAI-compatible chat completions.
ListDatasetsResponse: ListDatasetsResponse:
type: object type: object
properties: properties:
@ -8989,10 +9066,14 @@ components:
properties: properties:
name: name:
type: string type: string
description: Name of the schema
description: description:
type: string type: string
description: (Optional) Description of the schema
strict: strict:
type: boolean type: boolean
description: >-
(Optional) Whether to enforce strict adherence to the schema
schema: schema:
type: object type: object
additionalProperties: additionalProperties:
@ -9003,10 +9084,13 @@ components:
- type: string - type: string
- type: array - type: array
- type: object - type: object
description: (Optional) The JSON schema definition
additionalProperties: false additionalProperties: false
required: required:
- name - name
title: OpenAIJSONSchema title: OpenAIJSONSchema
description: >-
JSON schema specification for OpenAI-compatible structured response format.
OpenAIResponseFormatJSONObject: OpenAIResponseFormatJSONObject:
type: object type: object
properties: properties:
@ -9014,10 +9098,14 @@ components:
type: string type: string
const: json_object const: json_object
default: json_object default: json_object
description: >-
Must be "json_object" to indicate generic JSON object response format
additionalProperties: false additionalProperties: false
required: required:
- type - type
title: OpenAIResponseFormatJSONObject title: OpenAIResponseFormatJSONObject
description: >-
JSON object response format for OpenAI-compatible chat completion requests.
OpenAIResponseFormatJSONSchema: OpenAIResponseFormatJSONSchema:
type: object type: object
properties: properties:
@ -9025,13 +9113,19 @@ components:
type: string type: string
const: json_schema const: json_schema
default: json_schema default: json_schema
description: >-
Must be "json_schema" to indicate structured JSON response format
json_schema: json_schema:
$ref: '#/components/schemas/OpenAIJSONSchema' $ref: '#/components/schemas/OpenAIJSONSchema'
description: >-
The JSON schema specification for the response
additionalProperties: false additionalProperties: false
required: required:
- type - type
- json_schema - json_schema
title: OpenAIResponseFormatJSONSchema title: OpenAIResponseFormatJSONSchema
description: >-
JSON schema response format for OpenAI-compatible chat completion requests.
OpenAIResponseFormatParam: OpenAIResponseFormatParam:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIResponseFormatText' - $ref: '#/components/schemas/OpenAIResponseFormatText'
@ -9050,10 +9144,14 @@ components:
type: string type: string
const: text const: text
default: text default: text
description: >-
Must be "text" to indicate plain text response format
additionalProperties: false additionalProperties: false
required: required:
- type - type
title: OpenAIResponseFormatText title: OpenAIResponseFormatText
description: >-
Text response format for OpenAI-compatible chat completion requests.
OpenaiChatCompletionRequest: OpenaiChatCompletionRequest:
type: object type: object
properties: properties:

View file

@ -41,11 +41,21 @@ from enum import StrEnum
@json_schema_type @json_schema_type
class GreedySamplingStrategy(BaseModel): class GreedySamplingStrategy(BaseModel):
"""Greedy sampling strategy that selects the highest probability token at each step.
:param type: Must be "greedy" to identify this sampling strategy
"""
type: Literal["greedy"] = "greedy" type: Literal["greedy"] = "greedy"
@json_schema_type @json_schema_type
class TopPSamplingStrategy(BaseModel): class TopPSamplingStrategy(BaseModel):
"""Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p.
:param type: Must be "top_p" to identify this sampling strategy
:param temperature: Controls randomness in sampling. Higher values increase randomness
:param top_p: Cumulative probability threshold for nucleus sampling. Defaults to 0.95
"""
type: Literal["top_p"] = "top_p" type: Literal["top_p"] = "top_p"
temperature: float | None = Field(..., gt=0.0) temperature: float | None = Field(..., gt=0.0)
top_p: float | None = 0.95 top_p: float | None = 0.95
@ -53,6 +63,11 @@ class TopPSamplingStrategy(BaseModel):
@json_schema_type @json_schema_type
class TopKSamplingStrategy(BaseModel): class TopKSamplingStrategy(BaseModel):
"""Top-k sampling strategy that restricts sampling to the k most likely tokens.
:param type: Must be "top_k" to identify this sampling strategy
:param top_k: Number of top tokens to consider for sampling. Must be at least 1
"""
type: Literal["top_k"] = "top_k" type: Literal["top_k"] = "top_k"
top_k: int = Field(..., ge=1) top_k: int = Field(..., ge=1)
@ -108,11 +123,19 @@ class QuantizationType(Enum):
@json_schema_type @json_schema_type
class Fp8QuantizationConfig(BaseModel): class Fp8QuantizationConfig(BaseModel):
"""Configuration for 8-bit floating point quantization.
:param type: Must be "fp8_mixed" to identify this quantization type
"""
type: Literal["fp8_mixed"] = "fp8_mixed" type: Literal["fp8_mixed"] = "fp8_mixed"
@json_schema_type @json_schema_type
class Bf16QuantizationConfig(BaseModel): class Bf16QuantizationConfig(BaseModel):
"""Configuration for BFloat16 precision (typically no quantization).
:param type: Must be "bf16" to identify this quantization type
"""
type: Literal["bf16"] = "bf16" type: Literal["bf16"] = "bf16"
@ -202,6 +225,13 @@ register_schema(Message, name="Message")
@json_schema_type @json_schema_type
class ToolResponse(BaseModel): class ToolResponse(BaseModel):
"""Response from a tool invocation.
:param call_id: Unique identifier for the tool call this response is for
:param tool_name: Name of the tool that was invoked
:param content: The response content from the tool
:param metadata: (Optional) Additional metadata about the tool response
"""
call_id: str call_id: str
tool_name: BuiltinTool | str tool_name: BuiltinTool | str
content: InterleavedContent content: InterleavedContent
@ -439,18 +469,33 @@ class EmbeddingsResponse(BaseModel):
@json_schema_type @json_schema_type
class OpenAIChatCompletionContentPartTextParam(BaseModel): class OpenAIChatCompletionContentPartTextParam(BaseModel):
"""Text content part for OpenAI-compatible chat completion messages.
:param type: Must be "text" to identify this as text content
:param text: The text content of the message
"""
type: Literal["text"] = "text" type: Literal["text"] = "text"
text: str text: str
@json_schema_type @json_schema_type
class OpenAIImageURL(BaseModel): class OpenAIImageURL(BaseModel):
"""Image URL specification for OpenAI-compatible chat completion messages.
:param url: URL of the image to include in the message
:param detail: (Optional) Level of detail for image processing. Can be "low", "high", or "auto"
"""
url: str url: str
detail: str | None = None detail: str | None = None
@json_schema_type @json_schema_type
class OpenAIChatCompletionContentPartImageParam(BaseModel): class OpenAIChatCompletionContentPartImageParam(BaseModel):
"""Image content part for OpenAI-compatible chat completion messages.
:param type: Must be "image_url" to identify this as image content
:param image_url: Image URL specification and processing details
"""
type: Literal["image_url"] = "image_url" type: Literal["image_url"] = "image_url"
image_url: OpenAIImageURL image_url: OpenAIImageURL
@ -495,12 +540,24 @@ class OpenAISystemMessageParam(BaseModel):
@json_schema_type @json_schema_type
class OpenAIChatCompletionToolCallFunction(BaseModel): class OpenAIChatCompletionToolCallFunction(BaseModel):
"""Function call details for OpenAI-compatible tool calls.
:param name: (Optional) Name of the function to call
:param arguments: (Optional) Arguments to pass to the function as a JSON string
"""
name: str | None = None name: str | None = None
arguments: str | None = None arguments: str | None = None
@json_schema_type @json_schema_type
class OpenAIChatCompletionToolCall(BaseModel): class OpenAIChatCompletionToolCall(BaseModel):
"""Tool call specification for OpenAI-compatible chat completion responses.
:param index: (Optional) Index of the tool call in the list
:param id: (Optional) Unique identifier for the tool call
:param type: Must be "function" to identify this as a function call
:param function: (Optional) Function call details
"""
index: int | None = None index: int | None = None
id: str | None = None id: str | None = None
type: Literal["function"] = "function" type: Literal["function"] = "function"
@ -564,11 +621,22 @@ register_schema(OpenAIMessageParam, name="OpenAIMessageParam")
@json_schema_type @json_schema_type
class OpenAIResponseFormatText(BaseModel): class OpenAIResponseFormatText(BaseModel):
"""Text response format for OpenAI-compatible chat completion requests.
:param type: Must be "text" to indicate plain text response format
"""
type: Literal["text"] = "text" type: Literal["text"] = "text"
@json_schema_type @json_schema_type
class OpenAIJSONSchema(TypedDict, total=False): class OpenAIJSONSchema(TypedDict, total=False):
"""JSON schema specification for OpenAI-compatible structured response format.
:param name: Name of the schema
:param description: (Optional) Description of the schema
:param strict: (Optional) Whether to enforce strict adherence to the schema
:param schema: (Optional) The JSON schema definition
"""
name: str name: str
description: str | None description: str | None
strict: bool | None strict: bool | None
@ -582,12 +650,21 @@ class OpenAIJSONSchema(TypedDict, total=False):
@json_schema_type @json_schema_type
class OpenAIResponseFormatJSONSchema(BaseModel): class OpenAIResponseFormatJSONSchema(BaseModel):
"""JSON schema response format for OpenAI-compatible chat completion requests.
:param type: Must be "json_schema" to indicate structured JSON response format
:param json_schema: The JSON schema specification for the response
"""
type: Literal["json_schema"] = "json_schema" type: Literal["json_schema"] = "json_schema"
json_schema: OpenAIJSONSchema json_schema: OpenAIJSONSchema
@json_schema_type @json_schema_type
class OpenAIResponseFormatJSONObject(BaseModel): class OpenAIResponseFormatJSONObject(BaseModel):
"""JSON object response format for OpenAI-compatible chat completion requests.
:param type: Must be "json_object" to indicate generic JSON object response format
"""
type: Literal["json_object"] = "json_object" type: Literal["json_object"] = "json_object"
@ -846,11 +923,19 @@ class EmbeddingTaskType(Enum):
@json_schema_type @json_schema_type
class BatchCompletionResponse(BaseModel): class BatchCompletionResponse(BaseModel):
"""Response from a batch completion request.
:param batch: List of completion responses, one for each input in the batch
"""
batch: list[CompletionResponse] batch: list[CompletionResponse]
@json_schema_type @json_schema_type
class BatchChatCompletionResponse(BaseModel): class BatchChatCompletionResponse(BaseModel):
"""Response from a batch chat completion request.
:param batch: List of chat completion responses, one for each conversation in the batch
"""
batch: list[ChatCompletionResponse] batch: list[ChatCompletionResponse]
@ -860,6 +945,14 @@ class OpenAICompletionWithInputMessages(OpenAIChatCompletion):
@json_schema_type @json_schema_type
class ListOpenAIChatCompletionResponse(BaseModel): class ListOpenAIChatCompletionResponse(BaseModel):
"""Response from listing OpenAI-compatible chat completions.
:param data: List of chat completion objects with their input messages
:param has_more: Whether there are more completions available beyond this list
:param first_id: ID of the first completion in this list
:param last_id: ID of the last completion in this list
:param object: Must be "list" to identify this as a list response
"""
data: list[OpenAICompletionWithInputMessages] data: list[OpenAICompletionWithInputMessages]
has_more: bool has_more: bool
first_id: str first_id: str