Inference

2025-12-23 20:51:53 +00:00 · 2025-07-01 15:33:38 -07:00 · 2025-07-01 15:33:38 -07:00 · 8fc72e4669
commit 8fc72e4669
parent c5fd9886ae
3 changed files with 297 additions and 53 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -5144,14 +5144,16 @@
                    "type": {
                        "type": "string",
                        "const": "greedy",
-                        "default": "greedy"
+                        "default": "greedy",
+                        "description": "Must be \"greedy\" to identify this sampling strategy"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type"
                ],
-                "title": "GreedySamplingStrategy"
+                "title": "GreedySamplingStrategy",
+                "description": "Greedy sampling strategy that selects the highest probability token at each step."
            },
            "ImageContentItem": {
                "type": "object",
@ -5671,10 +5673,12 @@
                    "type": {
                        "type": "string",
                        "const": "top_k",
-                        "default": "top_k"
+                        "default": "top_k",
+                        "description": "Must be \"top_k\" to identify this sampling strategy"
                    },
                    "top_k": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "Number of top tokens to consider for sampling. Must be at least 1"
                    }
                },
                "additionalProperties": false,
@ -5682,7 +5686,8 @@
                    "type",
                    "top_k"
                ],
-                "title": "TopKSamplingStrategy"
+                "title": "TopKSamplingStrategy",
+                "description": "Top-k sampling strategy that restricts sampling to the k most likely tokens."
            },
            "TopPSamplingStrategy": {
                "type": "object",
@ -5690,21 +5695,25 @@
                    "type": {
                        "type": "string",
                        "const": "top_p",
-                        "default": "top_p"
+                        "default": "top_p",
+                        "description": "Must be \"top_p\" to identify this sampling strategy"
                    },
                    "temperature": {
-                        "type": "number"
+                        "type": "number",
+                        "description": "Controls randomness in sampling. Higher values increase randomness"
                    },
                    "top_p": {
                        "type": "number",
-                        "default": 0.95
+                        "default": 0.95,
+                        "description": "Cumulative probability threshold for nucleus sampling. Defaults to 0.95"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type"
                ],
-                "title": "TopPSamplingStrategy"
+                "title": "TopPSamplingStrategy",
+                "description": "Top-p (nucleus) sampling strategy that samples from the smallest set of tokens with cumulative probability >= p."
            },
            "URL": {
                "type": "object",
@ -5808,14 +5817,16 @@
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/ChatCompletionResponse"
-                        }
+                        },
+                        "description": "List of chat completion responses, one for each conversation in the batch"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "batch"
                ],
-                "title": "BatchChatCompletionResponse"
+                "title": "BatchChatCompletionResponse",
+                "description": "Response from a batch chat completion request."
            },
            "ChatCompletionResponse": {
                "type": "object",
@ -5939,14 +5950,16 @@
                        "type": "array",
                        "items": {
                            "$ref": "#/components/schemas/CompletionResponse"
-                        }
+                        },
+                        "description": "List of completion responses, one for each input in the batch"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "batch"
                ],
-                "title": "BatchCompletionResponse"
+                "title": "BatchCompletionResponse",
+                "description": "Response from a batch completion request."
            },
            "CompletionResponse": {
                "type": "object",
@ -6938,7 +6951,8 @@
                "type": "object",
                "properties": {
                    "call_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "Unique identifier for the tool call this response is for"
                    },
                    "tool_name": {
                        "oneOf": [
@ -6955,10 +6969,12 @@
                            {
                                "type": "string"
                            }
-                        ]
+                        ],
+                        "description": "Name of the tool that was invoked"
                    },
                    "content": {
-                        "$ref": "#/components/schemas/InterleavedContent"
+                        "$ref": "#/components/schemas/InterleavedContent",
+                        "description": "The response content from the tool"
                    },
                    "metadata": {
                        "type": "object",
@ -6983,7 +6999,8 @@
                                    "type": "object"
                                }
                            ]
-                        }
+                        },
+                        "description": "(Optional) Additional metadata about the tool response"
                    }
                },
                "additionalProperties": false,
@ -6992,7 +7009,8 @@
                    "tool_name",
                    "content"
                ],
-                "title": "ToolResponse"
+                "title": "ToolResponse",
+                "description": "Response from a tool invocation."
            },
            "Turn": {
                "type": "object",
@ -9838,10 +9856,12 @@
                    "type": {
                        "type": "string",
                        "const": "image_url",
-                        "default": "image_url"
+                        "default": "image_url",
+                        "description": "Must be \"image_url\" to identify this as image content"
                    },
                    "image_url": {
-                        "$ref": "#/components/schemas/OpenAIImageURL"
+                        "$ref": "#/components/schemas/OpenAIImageURL",
+                        "description": "Image URL specification and processing details"
                    }
                },
                "additionalProperties": false,
@ -9849,7 +9869,8 @@
                    "type",
                    "image_url"
                ],
-                "title": "OpenAIChatCompletionContentPartImageParam"
+                "title": "OpenAIChatCompletionContentPartImageParam",
+                "description": "Image content part for OpenAI-compatible chat completion messages."
            },
            "OpenAIChatCompletionContentPartParam": {
                "oneOf": [
@ -9874,10 +9895,12 @@
                    "type": {
                        "type": "string",
                        "const": "text",
-                        "default": "text"
+                        "default": "text",
+                        "description": "Must be \"text\" to identify this as text content"
                    },
                    "text": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "The text content of the message"
                    }
                },
                "additionalProperties": false,
@ -9885,44 +9908,53 @@
                    "type",
                    "text"
                ],
-                "title": "OpenAIChatCompletionContentPartTextParam"
+                "title": "OpenAIChatCompletionContentPartTextParam",
+                "description": "Text content part for OpenAI-compatible chat completion messages."
            },
            "OpenAIChatCompletionToolCall": {
                "type": "object",
                "properties": {
                    "index": {
-                        "type": "integer"
+                        "type": "integer",
+                        "description": "(Optional) Index of the tool call in the list"
                    },
                    "id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) Unique identifier for the tool call"
                    },
                    "type": {
                        "type": "string",
                        "const": "function",
-                        "default": "function"
+                        "default": "function",
+                        "description": "Must be \"function\" to identify this as a function call"
                    },
                    "function": {
-                        "$ref": "#/components/schemas/OpenAIChatCompletionToolCallFunction"
+                        "$ref": "#/components/schemas/OpenAIChatCompletionToolCallFunction",
+                        "description": "(Optional) Function call details"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type"
                ],
-                "title": "OpenAIChatCompletionToolCall"
+                "title": "OpenAIChatCompletionToolCall",
+                "description": "Tool call specification for OpenAI-compatible chat completion responses."
            },
            "OpenAIChatCompletionToolCallFunction": {
                "type": "object",
                "properties": {
                    "name": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) Name of the function to call"
                    },
                    "arguments": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) Arguments to pass to the function as a JSON string"
                    }
                },
                "additionalProperties": false,
-                "title": "OpenAIChatCompletionToolCallFunction"
+                "title": "OpenAIChatCompletionToolCallFunction",
+                "description": "Function call details for OpenAI-compatible tool calls."
            },
            "OpenAIChoice": {
                "type": "object",
@ -10015,17 +10047,20 @@
                "type": "object",
                "properties": {
                    "url": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "URL of the image to include in the message"
                    },
                    "detail": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) Level of detail for image processing. Can be \"low\", \"high\", or \"auto\""
                    }
                },
                "additionalProperties": false,
                "required": [
                    "url"
                ],
-                "title": "OpenAIImageURL"
+                "title": "OpenAIImageURL",
+                "description": "Image URL specification for OpenAI-compatible chat completion messages."
            },
            "OpenAIMessageParam": {
                "oneOf": [
@ -11882,21 +11917,26 @@
                                "input_messages"
                            ],
                            "title": "OpenAICompletionWithInputMessages"
-                        }
+                        },
+                        "description": "List of chat completion objects with their input messages"
                    },
                    "has_more": {
-                        "type": "boolean"
+                        "type": "boolean",
+                        "description": "Whether there are more completions available beyond this list"
                    },
                    "first_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "ID of the first completion in this list"
                    },
                    "last_id": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "ID of the last completion in this list"
                    },
                    "object": {
                        "type": "string",
                        "const": "list",
-                        "default": "list"
+                        "default": "list",
+                        "description": "Must be \"list\" to identify this as a list response"
                    }
                },
                "additionalProperties": false,
@ -11907,7 +11947,8 @@
                    "last_id",
                    "object"
                ],
-                "title": "ListOpenAIChatCompletionResponse"
+                "title": "ListOpenAIChatCompletionResponse",
+                "description": "Response from listing OpenAI-compatible chat completions."
            },
            "ListDatasetsResponse": {
                "type": "object",
@ -12762,13 +12803,16 @@
                "type": "object",
                "properties": {
                    "name": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "Name of the schema"
                    },
                    "description": {
-                        "type": "string"
+                        "type": "string",
+                        "description": "(Optional) Description of the schema"
                    },
                    "strict": {
-                        "type": "boolean"
+                        "type": "boolean",
+                        "description": "(Optional) Whether to enforce strict adherence to the schema"
                    },
                    "schema": {
                        "type": "object",
@ -12793,14 +12837,16 @@
                                    "type": "object"
                                }
                            ]
-                        }
+                        },
+                        "description": "(Optional) The JSON schema definition"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "name"
                ],
-                "title": "OpenAIJSONSchema"
+                "title": "OpenAIJSONSchema",
+                "description": "JSON schema specification for OpenAI-compatible structured response format."
            },
            "OpenAIResponseFormatJSONObject": {
                "type": "object",
@ -12808,14 +12854,16 @@
                    "type": {
                        "type": "string",
                        "const": "json_object",
-                        "default": "json_object"
+                        "default": "json_object",
+                        "description": "Must be \"json_object\" to indicate generic JSON object response format"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type"
                ],
-                "title": "OpenAIResponseFormatJSONObject"
+                "title": "OpenAIResponseFormatJSONObject",
+                "description": "JSON object response format for OpenAI-compatible chat completion requests."
            },
            "OpenAIResponseFormatJSONSchema": {
                "type": "object",
@ -12823,10 +12871,12 @@
                    "type": {
                        "type": "string",
                        "const": "json_schema",
-                        "default": "json_schema"
+                        "default": "json_schema",
+                        "description": "Must be \"json_schema\" to indicate structured JSON response format"
                    },
                    "json_schema": {
-                        "$ref": "#/components/schemas/OpenAIJSONSchema"
+                        "$ref": "#/components/schemas/OpenAIJSONSchema",
+                        "description": "The JSON schema specification for the response"
                    }
                },
                "additionalProperties": false,
@ -12834,7 +12884,8 @@
                    "type",
                    "json_schema"
                ],
-                "title": "OpenAIResponseFormatJSONSchema"
+                "title": "OpenAIResponseFormatJSONSchema",
+                "description": "JSON schema response format for OpenAI-compatible chat completion requests."
            },
            "OpenAIResponseFormatParam": {
                "oneOf": [
@ -12863,14 +12914,16 @@
                    "type": {
                        "type": "string",
                        "const": "text",
-                        "default": "text"
+                        "default": "text",
+                        "description": "Must be \"text\" to indicate plain text response format"
                    }
                },
                "additionalProperties": false,
                "required": [
                    "type"
                ],
-                "title": "OpenAIResponseFormatText"
+                "title": "OpenAIResponseFormatText",
+                "description": "Text response format for OpenAI-compatible chat completion requests."
            },
            "OpenaiChatCompletionRequest": {
                "type": "object",