Inference

2025-12-24 09:20:02 +00:00 · 2025-07-01 15:33:38 -07:00 · 2025-07-01 15:33:38 -07:00 · 8fc72e4669
commit 8fc72e4669
parent c5fd9886ae
3 changed files with 297 additions and 53 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -5144,14 +5144,16 @@
                    "type": {
                        "type": "string",
                        "const": "greedy",
-                        "default": "greedy"
+                        "default": "greedy",
+                        "description": "Must be \"greedy\" to identify this sampling strategy"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type"
                ],
-                "title": "GreedySamplingStrategy"
+                "title": "GreedySamplingStrategy",
+                "description": "Greedy sampling strategy that selects the highest probability token at each step."
            },
            "ImageContentItem": {
                "type": "object",
@ -5671,10 +5673,12 @@
                    "type": {
                        "type": "string",
                        "const": "top_k",
-                        "default": "top_k"
+                        "default": "top_k",
+                        "description": "Must be \"top_k\" to identify this sampling strategy"
                    },
                    "top_k": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "Number of top tokens to consider for sampling. Must be at least 1"
                    }
                },
                "additionalProperties": false,
@ -5682,7 +5686,8 @@
                    "type",
                    "top_k"
                ],
-                "title": "TopKSamplingStrategy"
+                "title": "TopKSamplingStrategy",
+                "description": "Top-k sampling strategy that restricts sampling to the k most likely tokens."
            },
            "TopPSamplingStrategy": {
                "type": "object",
@ -5690,21 +5695,25 @@
                    "type": {
                        "type": "string",
                        "const": "top_p",
-                        "default": "top_p"
+                        "default": "top_p",
+                        "description": "Must be \"top_p\" to identify this sampling strategy"
                    },
                    "temperature": {
-                        "type": "number"
+                        "type": "number",
+                        "description": "Controls randomness in sampling. Higher values increase randomness"
                    },
                    "top_p": {
                        "type": "number",
-                        "default": 0.95
+                        "default": 0.95,
+                        "description": "Cumulative probability threshold for nucleus sampling. Defaults to 0.95"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type"
                ],
-                "title": "TopPSamplingStrategy"
+                "title": "TopPSamplingStrategy",
+                "description": "Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p."
            },
            "URL": {
                "type": "object",
@ -5808,14 +5817,16 @@
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/ChatCompletionResponse"
-                        }
+                        },
+                        "description": "List of chat completion responses, one for each conversation in the batch"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "batch"
                ],
-                "title": "BatchChatCompletionResponse"
+                "title": "BatchChatCompletionResponse",
+                "description": "Response from a batch chat completion request."
            },
            "ChatCompletionResponse": {
                "type": "object",
@ -5939,14 +5950,16 @@
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/CompletionResponse"
-                        }
+                        },
+                        "description": "List of completion responses, one for each input in the batch"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "batch"
                ],
-                "title": "BatchCompletionResponse"
+                "title": "BatchCompletionResponse",
+                "description": "Response from a batch completion request."
            },
            "CompletionResponse": {
                "type": "object",
@ -6938,7 +6951,8 @@
                "type": "object",
                "properties": {
                    "call_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "Unique identifier for the tool call this response is for"
                    },
                    "tool_name": {
                        "oneOf": [
@ -6955,10 +6969,12 @@
                            {
                                "type": "string"
                            }
-                        ]
+                        ],
+                        "description": "Name of the tool that was invoked"
                    },
                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent"
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The response content from the tool"
                    },
                    "metadata": {
                        "type": "object",
@ -6983,7 +6999,8 @@
                                    "type": "object"
                                }
                            ]
-                        }
+                        },
+                        "description": "(Optional) Additional metadata about the tool response"
                    }
                },
                "additionalProperties": false,
@ -6992,7 +7009,8 @@
                    "tool_name",
                    "content"
                ],
-                "title": "ToolResponse"
+                "title": "ToolResponse",
+                "description": "Response from a tool invocation."
            },
            "Turn": {
                "type": "object",
@ -9838,10 +9856,12 @@
                    "type": {
                        "type": "string",
                        "const": "image_url",
-                        "default": "image_url"
+                        "default": "image_url",
+                        "description": "Must be \"image_url\" to identify this as image content"
                    },
                    "image_url": {
-                        "$ref": "#/components/schemas/OpenAIImageURL"
+                        "$ref": "#/components/schemas/OpenAIImageURL",
+                        "description": "Image URL specification and processing details"
                    }
                },
                "additionalProperties": false,
@ -9849,7 +9869,8 @@
                    "type",
                    "image_url"
                ],
-                "title": "OpenAIChatCompletionContentPartImageParam"
+                "title": "OpenAIChatCompletionContentPartImageParam",
+                "description": "Image content part for OpenAI-compatible chat completion messages."
            },
            "OpenAIChatCompletionContentPartParam": {
                "oneOf": [
@ -9874,10 +9895,12 @@
                    "type": {
                        "type": "string",
                        "const": "text",
-                        "default": "text"
+                        "default": "text",
+                        "description": "Must be \"text\" to identify this as text content"
                    },
                    "text": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The text content of the message"
                    }
                },
                "additionalProperties": false,
@ -9885,44 +9908,53 @@
                    "type",
                    "text"
                ],
-                "title": "OpenAIChatCompletionContentPartTextParam"
+                "title": "OpenAIChatCompletionContentPartTextParam",
+                "description": "Text content part for OpenAI-compatible chat completion messages."
            },
            "OpenAIChatCompletionToolCall": {
                "type": "object",
                "properties": {
                    "index": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "(Optional) Index of the tool call in the list"
                    },
                    "id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) Unique identifier for the tool call"
                    },
                    "type": {
                        "type": "string",
                        "const": "function",
-                        "default": "function"
+                        "default": "function",
+                        "description": "Must be \"function\" to identify this as a function call"
                    },
                    "function": {
-                        "$ref": "#/components/schemas/OpenAIChatCompletionToolCallFunction"
+                        "$ref": "#/components/schemas/OpenAIChatCompletionToolCallFunction",
+                        "description": "(Optional) Function call details"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type"
                ],
-                "title": "OpenAIChatCompletionToolCall"
+                "title": "OpenAIChatCompletionToolCall",
+                "description": "Tool call specification for OpenAI-compatible chat completion responses."
            },
            "OpenAIChatCompletionToolCallFunction": {
                "type": "object",
                "properties": {
                    "name": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) Name of the function to call"
                    },
                    "arguments": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) Arguments to pass to the function as a JSON string"
                    }
                },
                "additionalProperties": false,
-                "title": "OpenAIChatCompletionToolCallFunction"
+                "title": "OpenAIChatCompletionToolCallFunction",
+                "description": "Function call details for OpenAI-compatible tool calls."
            },
            "OpenAIChoice": {
                "type": "object",
@ -10015,17 +10047,20 @@
                "type": "object",
                "properties": {
                    "url": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "URL of the image to include in the message"
                    },
                    "detail": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) Level of detail for image processing. Can be \"low\", \"high\", or \"auto\""
                    }
                },
                "additionalProperties": false,
                "required": [
                    "url"
                ],
-                "title": "OpenAIImageURL"
+                "title": "OpenAIImageURL",
+                "description": "Image URL specification for OpenAI-compatible chat completion messages."
            },
            "OpenAIMessageParam": {
                "oneOf": [
@ -11882,21 +11917,26 @@
                                "input_messages"
                            ],
                            "title": "OpenAICompletionWithInputMessages"
-                        }
+                        },
+                        "description": "List of chat completion objects with their input messages"
                    },
                    "has_more": {
-                        "type": "boolean"
+                        "type": "boolean",
+                        "description": "Whether there are more completions available beyond this list"
                    },
                    "first_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "ID of the first completion in this list"
                    },
                    "last_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "ID of the last completion in this list"
                    },
                    "object": {
                        "type": "string",
                        "const": "list",
-                        "default": "list"
+                        "default": "list",
+                        "description": "Must be \"list\" to identify this as a list response"
                    }
                },
                "additionalProperties": false,
@ -11907,7 +11947,8 @@
                    "last_id",
                    "object"
                ],
-                "title": "ListOpenAIChatCompletionResponse"
+                "title": "ListOpenAIChatCompletionResponse",
+                "description": "Response from listing OpenAI-compatible chat completions."
            },
            "ListDatasetsResponse": {
                "type": "object",
@ -12762,13 +12803,16 @@
                "type": "object",
                "properties": {
                    "name": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "Name of the schema"
                    },
                    "description": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) Description of the schema"
                    },
                    "strict": {
-                        "type": "boolean"
+                        "type": "boolean",
+                        "description": "(Optional) Whether to enforce strict adherence to the schema"
                    },
                    "schema": {
                        "type": "object",
@ -12793,14 +12837,16 @@
                                    "type": "object"
                                }
                            ]
-                        }
+                        },
+                        "description": "(Optional) The JSON schema definition"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "name"
                ],
-                "title": "OpenAIJSONSchema"
+                "title": "OpenAIJSONSchema",
+                "description": "JSON schema specification for OpenAI-compatible structured response format."
            },
            "OpenAIResponseFormatJSONObject": {
                "type": "object",
@ -12808,14 +12854,16 @@
                    "type": {
                        "type": "string",
                        "const": "json_object",
-                        "default": "json_object"
+                        "default": "json_object",
+                        "description": "Must be \"json_object\" to indicate generic JSON object response format"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type"
                ],
-                "title": "OpenAIResponseFormatJSONObject"
+                "title": "OpenAIResponseFormatJSONObject",
+                "description": "JSON object response format for OpenAI-compatible chat completion requests."
            },
            "OpenAIResponseFormatJSONSchema": {
                "type": "object",
@ -12823,10 +12871,12 @@
                    "type": {
                        "type": "string",
                        "const": "json_schema",
-                        "default": "json_schema"
+                        "default": "json_schema",
+                        "description": "Must be \"json_schema\" to indicate structured JSON response format"
                    },
                    "json_schema": {
-                        "$ref": "#/components/schemas/OpenAIJSONSchema"
+                        "$ref": "#/components/schemas/OpenAIJSONSchema",
+                        "description": "The JSON schema specification for the response"
                    }
                },
                "additionalProperties": false,
@ -12834,7 +12884,8 @@
                    "type",
                    "json_schema"
                ],
-                "title": "OpenAIResponseFormatJSONSchema"
+                "title": "OpenAIResponseFormatJSONSchema",
+                "description": "JSON schema response format for OpenAI-compatible chat completion requests."
            },
            "OpenAIResponseFormatParam": {
                "oneOf": [
@ -12863,14 +12914,16 @@
                    "type": {
                        "type": "string",
                        "const": "text",
-                        "default": "text"
+                        "default": "text",
+                        "description": "Must be \"text\" to indicate plain text response format"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type"
                ],
-                "title": "OpenAIResponseFormatText"
+                "title": "OpenAIResponseFormatText",
+                "description": "Text response format for OpenAI-compatible chat completion requests."
            },
            "OpenaiChatCompletionRequest": {
                "type": "object",
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -3636,10 +3636,15 @@ components:
          type: string
          const: greedy
          default: greedy
+          description: >-
+            Must be "greedy" to identify this sampling strategy
      additionalProperties: false
      required:
        - type
      title: GreedySamplingStrategy
+      description: >-
+        Greedy sampling strategy that selects the highest probability token at each
+        step.
    ImageContentItem:
      type: object
      properties:
@ -3997,13 +4002,19 @@ components:
          type: string
          const: top_k
          default: top_k
+          description: >-
+            Must be "top_k" to identify this sampling strategy
        top_k:
          type: integer
+          description: >-
+            Number of top tokens to consider for sampling. Must be at least 1
      additionalProperties: false
      required:
        - type
        - top_k
      title: TopKSamplingStrategy
+      description: >-
+        Top-k sampling strategy that restricts sampling to the k most likely tokens.
    TopPSamplingStrategy:
      type: object
      properties:
@ -4011,15 +4022,24 @@ components:
          type: string
          const: top_p
          default: top_p
+          description: >-
+            Must be "top_p" to identify this sampling strategy
        temperature:
          type: number
+          description: >-
+            Controls randomness in sampling. Higher values increase randomness
        top_p:
          type: number
          default: 0.95
+          description: >-
+            Cumulative probability threshold for nucleus sampling. Defaults to 0.95
      additionalProperties: false
      required:
        - type
      title: TopPSamplingStrategy
+      description: >-
+        Top-p (nucleus) sampling strategy that samples from the smallest set of tokens
+        with cumulative probability >= p.
    URL:
      type: object
      properties:
@ -4111,10 +4131,14 @@ components:
          type: array
          items:
            $ref: '#/components/schemas/ChatCompletionResponse'
+          description: >-
+            List of chat completion responses, one for each conversation in the batch
      additionalProperties: false
      required:
        - batch
      title: BatchChatCompletionResponse
+      description: >-
+        Response from a batch chat completion request.
    ChatCompletionResponse:
      type: object
      properties:
@ -4211,10 +4235,14 @@ components:
          type: array
          items:
            $ref: '#/components/schemas/CompletionResponse'
+          description: >-
+            List of completion responses, one for each input in the batch
      additionalProperties: false
      required:
        - batch
      title: BatchCompletionResponse
+      description: >-
+        Response from a batch completion request.
    CompletionResponse:
      type: object
      properties:
@ -4967,6 +4995,8 @@ components:
      properties:
        call_id:
          type: string
+          description: >-
+            Unique identifier for the tool call this response is for
        tool_name:
          oneOf:
            - type: string
@ -4977,8 +5007,10 @@ components:
                - code_interpreter
              title: BuiltinTool
            - type: string
+          description: Name of the tool that was invoked
        content:
          $ref: '#/components/schemas/InterleavedContent'
+          description: The response content from the tool
        metadata:
          type: object
          additionalProperties:
@ -4989,12 +5021,15 @@ components:
              - type: string
              - type: array
              - type: object
+          description: >-
+            (Optional) Additional metadata about the tool response
      additionalProperties: false
      required:
        - call_id
        - tool_name
        - content
      title: ToolResponse
+      description: Response from a tool invocation.
    Turn:
      type: object
      properties:
@ -6991,14 +7026,20 @@ components:
          type: string
          const: image_url
          default: image_url
+          description: >-
+            Must be "image_url" to identify this as image content
        image_url:
          $ref: '#/components/schemas/OpenAIImageURL'
+          description: >-
+            Image URL specification and processing details
      additionalProperties: false
      required:
        - type
        - image_url
      title: >-
        OpenAIChatCompletionContentPartImageParam
+      description: >-
+        Image content part for OpenAI-compatible chat completion messages.
    OpenAIChatCompletionContentPartParam:
      oneOf:
        - $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
@ -7015,39 +7056,58 @@ components:
          type: string
          const: text
          default: text
+          description: >-
+            Must be "text" to identify this as text content
        text:
          type: string
+          description: The text content of the message
      additionalProperties: false
      required:
        - type
        - text
      title: OpenAIChatCompletionContentPartTextParam
+      description: >-
+        Text content part for OpenAI-compatible chat completion messages.
    OpenAIChatCompletionToolCall:
      type: object
      properties:
        index:
          type: integer
+          description: >-
+            (Optional) Index of the tool call in the list
        id:
          type: string
+          description: >-
+            (Optional) Unique identifier for the tool call
        type:
          type: string
          const: function
          default: function
+          description: >-
+            Must be "function" to identify this as a function call
        function:
          $ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
+          description: (Optional) Function call details
      additionalProperties: false
      required:
        - type
      title: OpenAIChatCompletionToolCall
+      description: >-
+        Tool call specification for OpenAI-compatible chat completion responses.
    OpenAIChatCompletionToolCallFunction:
      type: object
      properties:
        name:
          type: string
+          description: (Optional) Name of the function to call
        arguments:
          type: string
+          description: >-
+            (Optional) Arguments to pass to the function as a JSON string
      additionalProperties: false
      title: OpenAIChatCompletionToolCallFunction
+      description: >-
+        Function call details for OpenAI-compatible tool calls.
    OpenAIChoice:
      type: object
      properties:
@ -7124,12 +7184,19 @@ components:
      properties:
        url:
          type: string
+          description: >-
+            URL of the image to include in the message
        detail:
          type: string
+          description: >-
+            (Optional) Level of detail for image processing. Can be "low", "high",
+            or "auto"
      additionalProperties: false
      required:
        - url
      title: OpenAIImageURL
+      description: >-
+        Image URL specification for OpenAI-compatible chat completion messages.
    OpenAIMessageParam:
      oneOf:
        - $ref: '#/components/schemas/OpenAIUserMessageParam'
@ -8405,16 +8472,24 @@ components:
              - model
              - input_messages
            title: OpenAICompletionWithInputMessages
+          description: >-
+            List of chat completion objects with their input messages
        has_more:
          type: boolean
+          description: >-
+            Whether there are more completions available beyond this list
        first_id:
          type: string
+          description: ID of the first completion in this list
        last_id:
          type: string
+          description: ID of the last completion in this list
        object:
          type: string
          const: list
          default: list
+          description: >-
+            Must be "list" to identify this as a list response
      additionalProperties: false
      required:
        - data
@ -8423,6 +8498,8 @@ components:
        - last_id
        - object
      title: ListOpenAIChatCompletionResponse
+      description: >-
+        Response from listing OpenAI-compatible chat completions.
    ListDatasetsResponse:
      type: object
      properties:
@ -8989,10 +9066,14 @@ components:
      properties:
        name:
          type: string
+          description: Name of the schema
        description:
          type: string
+          description: (Optional) Description of the schema
        strict:
          type: boolean
+          description: >-
+            (Optional) Whether to enforce strict adherence to the schema
        schema:
          type: object
          additionalProperties:
@ -9003,10 +9084,13 @@ components:
              - type: string
              - type: array
              - type: object
+          description: (Optional) The JSON schema definition
      additionalProperties: false
      required:
        - name
      title: OpenAIJSONSchema
+      description: >-
+        JSON schema specification for OpenAI-compatible structured response format.
    OpenAIResponseFormatJSONObject:
      type: object
      properties:
@ -9014,10 +9098,14 @@ components:
          type: string
          const: json_object
          default: json_object
+          description: >-
+            Must be "json_object" to indicate generic JSON object response format
      additionalProperties: false
      required:
        - type
      title: OpenAIResponseFormatJSONObject
+      description: >-
+        JSON object response format for OpenAI-compatible chat completion requests.
    OpenAIResponseFormatJSONSchema:
      type: object
      properties:
@ -9025,13 +9113,19 @@ components:
          type: string
          const: json_schema
          default: json_schema
+          description: >-
+            Must be "json_schema" to indicate structured JSON response format
        json_schema:
          $ref: '#/components/schemas/OpenAIJSONSchema'
+          description: >-
+            The JSON schema specification for the response
      additionalProperties: false
      required:
        - type
        - json_schema
      title: OpenAIResponseFormatJSONSchema
+      description: >-
+        JSON schema response format for OpenAI-compatible chat completion requests.
    OpenAIResponseFormatParam:
      oneOf:
        - $ref: '#/components/schemas/OpenAIResponseFormatText'
@ -9050,10 +9144,14 @@ components:
          type: string
          const: text
          default: text
+          description: >-
+            Must be "text" to indicate plain text response format
      additionalProperties: false
      required:
        - type
      title: OpenAIResponseFormatText
+      description: >-
+        Text response format for OpenAI-compatible chat completion requests.
    OpenaiChatCompletionRequest:
      type: object
      properties:
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -41,11 +41,21 @@ from enum import StrEnum

@json_schema_type
 class GreedySamplingStrategy(BaseModel):
+    """Greedy sampling strategy that selects the highest probability token at each step.
+
+    :param type: Must be "greedy" to identify this sampling strategy
+    """
    type: Literal["greedy"] = "greedy"


@json_schema_type
 class TopPSamplingStrategy(BaseModel):
+    """Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p.
+
+    :param type: Must be "top_p" to identify this sampling strategy
+    :param temperature: Controls randomness in sampling. Higher values increase randomness
+    :param top_p: Cumulative probability threshold for nucleus sampling. Defaults to 0.95
+    """
    type: Literal["top_p"] = "top_p"
    temperature: float | None = Field(..., gt=0.0)
    top_p: float | None = 0.95
@ -53,6 +63,11 @@ class TopPSamplingStrategy(BaseModel):

@json_schema_type
 class TopKSamplingStrategy(BaseModel):
+    """Top-k sampling strategy that restricts sampling to the k most likely tokens.
+
+    :param type: Must be "top_k" to identify this sampling strategy
+    :param top_k: Number of top tokens to consider for sampling. Must be at least 1
+    """
    type: Literal["top_k"] = "top_k"
    top_k: int = Field(..., ge=1)

@ -108,11 +123,19 @@ class QuantizationType(Enum):

@json_schema_type
 class Fp8QuantizationConfig(BaseModel):
+    """Configuration for 8-bit floating point quantization.
+
+    :param type: Must be "fp8_mixed" to identify this quantization type
+    """
    type: Literal["fp8_mixed"] = "fp8_mixed"


@json_schema_type
 class Bf16QuantizationConfig(BaseModel):
+    """Configuration for BFloat16 precision (typically no quantization).
+
+    :param type: Must be "bf16" to identify this quantization type
+    """
    type: Literal["bf16"] = "bf16"


@ -202,6 +225,13 @@ register_schema(Message, name="Message")

@json_schema_type
 class ToolResponse(BaseModel):
+    """Response from a tool invocation.
+
+    :param call_id: Unique identifier for the tool call this response is for
+    :param tool_name: Name of the tool that was invoked
+    :param content: The response content from the tool
+    :param metadata: (Optional) Additional metadata about the tool response
+    """
    call_id: str
    tool_name: BuiltinTool | str
    content: InterleavedContent
@ -439,18 +469,33 @@ class EmbeddingsResponse(BaseModel):

@json_schema_type
 class OpenAIChatCompletionContentPartTextParam(BaseModel):
+    """Text content part for OpenAI-compatible chat completion messages.
+
+    :param type: Must be "text" to identify this as text content
+    :param text: The text content of the message
+    """
    type: Literal["text"] = "text"
    text: str


@json_schema_type
 class OpenAIImageURL(BaseModel):
+    """Image URL specification for OpenAI-compatible chat completion messages.
+
+    :param url: URL of the image to include in the message
+    :param detail: (Optional) Level of detail for image processing. Can be "low", "high", or "auto"
+    """
    url: str
    detail: str | None = None


@json_schema_type
 class OpenAIChatCompletionContentPartImageParam(BaseModel):
+    """Image content part for OpenAI-compatible chat completion messages.
+
+    :param type: Must be "image_url" to identify this as image content
+    :param image_url: Image URL specification and processing details
+    """
    type: Literal["image_url"] = "image_url"
    image_url: OpenAIImageURL

@ -495,12 +540,24 @@ class OpenAISystemMessageParam(BaseModel):

@json_schema_type
 class OpenAIChatCompletionToolCallFunction(BaseModel):
+    """Function call details for OpenAI-compatible tool calls.
+
+    :param name: (Optional) Name of the function to call
+    :param arguments: (Optional) Arguments to pass to the function as a JSON string
+    """
    name: str | None = None
    arguments: str | None = None


@json_schema_type
 class OpenAIChatCompletionToolCall(BaseModel):
+    """Tool call specification for OpenAI-compatible chat completion responses.
+
+    :param index: (Optional) Index of the tool call in the list
+    :param id: (Optional) Unique identifier for the tool call
+    :param type: Must be "function" to identify this as a function call
+    :param function: (Optional) Function call details
+    """
    index: int | None = None
    id: str | None = None
    type: Literal["function"] = "function"
@ -564,11 +621,22 @@ register_schema(OpenAIMessageParam, name="OpenAIMessageParam")

@json_schema_type
 class OpenAIResponseFormatText(BaseModel):
+    """Text response format for OpenAI-compatible chat completion requests.
+
+    :param type: Must be "text" to indicate plain text response format
+    """
    type: Literal["text"] = "text"


@json_schema_type
 class OpenAIJSONSchema(TypedDict, total=False):
+    """JSON schema specification for OpenAI-compatible structured response format.
+
+    :param name: Name of the schema
+    :param description: (Optional) Description of the schema
+    :param strict: (Optional) Whether to enforce strict adherence to the schema
+    :param schema: (Optional) The JSON schema definition
+    """
    name: str
    description: str | None
    strict: bool | None
@ -582,12 +650,21 @@ class OpenAIJSONSchema(TypedDict, total=False):

@json_schema_type
 class OpenAIResponseFormatJSONSchema(BaseModel):
+    """JSON schema response format for OpenAI-compatible chat completion requests.
+
+    :param type: Must be "json_schema" to indicate structured JSON response format
+    :param json_schema: The JSON schema specification for the response
+    """
    type: Literal["json_schema"] = "json_schema"
    json_schema: OpenAIJSONSchema


@json_schema_type
 class OpenAIResponseFormatJSONObject(BaseModel):
+    """JSON object response format for OpenAI-compatible chat completion requests.
+
+    :param type: Must be "json_object" to indicate generic JSON object response format
+    """
    type: Literal["json_object"] = "json_object"


@ -846,11 +923,19 @@ class EmbeddingTaskType(Enum):

@json_schema_type
 class BatchCompletionResponse(BaseModel):
+    """Response from a batch completion request.
+
+    :param batch: List of completion responses, one for each input in the batch
+    """
    batch: list[CompletionResponse]


@json_schema_type
 class BatchChatCompletionResponse(BaseModel):
+    """Response from a batch chat completion request.
+
+    :param batch: List of chat completion responses, one for each conversation in the batch
+    """
    batch: list[ChatCompletionResponse]


@ -860,6 +945,14 @@ class OpenAICompletionWithInputMessages(OpenAIChatCompletion):

@json_schema_type
 class ListOpenAIChatCompletionResponse(BaseModel):
+    """Response from listing OpenAI-compatible chat completions.
+
+    :param data: List of chat completion objects with their input messages
+    :param has_more: Whether there are more completions available beyond this list
+    :param first_id: ID of the first completion in this list
+    :param last_id: ID of the last completion in this list
+    :param object: Must be "list" to identify this as a list response
+    """
    data: list[OpenAICompletionWithInputMessages]
    has_more: bool
    first_id: str