test

# What does this PR do? ## Test Plan
2025-12-06 02:30:58 +00:00 · 2025-10-09 17:28:44 -07:00 · 2025-10-09 17:28:44 -07:00 · 972f2395a1
commit 972f2395a1
parent f50ce11a3b
29 changed files with 1726 additions and 2149 deletions
--- a/docs/static/deprecated-llama-stack-spec.html
+++ b/docs/static/deprecated-llama-stack-spec.html
@ -7343,6 +7343,233 @@
                "title": "OpenAIUserMessageParam",
                "description": "A message from the user in an OpenAI-compatible chat completion request."
            },
+            "OpenAIChatCompletionRequestParams": {
+                "type": "object",
+                "properties": {
+                    "model": {
+                        "type": "string"
+                    },
+                    "messages": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIMessageParam"
+                        }
+                    },
+                    "frequency_penalty": {
+                        "type": "number"
+                    },
+                    "function_call": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "object",
+                                "additionalProperties": {
+                                    "oneOf": [
+                                        {
+                                            "type": "null"
+                                        },
+                                        {
+                                            "type": "boolean"
+                                        },
+                                        {
+                                            "type": "number"
+                                        },
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "type": "array"
+                                        },
+                                        {
+                                            "type": "object"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "functions": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        }
+                    },
+                    "logit_bias": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        }
+                    },
+                    "logprobs": {
+                        "type": "boolean"
+                    },
+                    "max_completion_tokens": {
+                        "type": "integer"
+                    },
+                    "max_tokens": {
+                        "type": "integer"
+                    },
+                    "n": {
+                        "type": "integer"
+                    },
+                    "parallel_tool_calls": {
+                        "type": "boolean"
+                    },
+                    "presence_penalty": {
+                        "type": "number"
+                    },
+                    "response_format": {
+                        "$ref": "#/components/schemas/OpenAIResponseFormatParam"
+                    },
+                    "seed": {
+                        "type": "integer"
+                    },
+                    "stop": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            }
+                        ]
+                    },
+                    "stream": {
+                        "type": "boolean"
+                    },
+                    "stream_options": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "temperature": {
+                        "type": "number"
+                    },
+                    "tool_choice": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "object",
+                                "additionalProperties": {
+                                    "oneOf": [
+                                        {
+                                            "type": "null"
+                                        },
+                                        {
+                                            "type": "boolean"
+                                        },
+                                        {
+                                            "type": "number"
+                                        },
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "type": "array"
+                                        },
+                                        {
+                                            "type": "object"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "tools": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        }
+                    },
+                    "top_logprobs": {
+                        "type": "integer"
+                    },
+                    "top_p": {
+                        "type": "number"
+                    },
+                    "user": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model",
+                    "messages"
+                ],
+                "title": "OpenAIChatCompletionRequestParams",
+                "description": "Request parameters for OpenAI-compatible chat completion endpoint.\nThis model uses extra=\"allow\" to capture provider-specific parameters\nwhich are passed through as extra_body."
+            },
            "OpenAIJSONSchema": {
                "type": "object",
                "properties": {
@ -7472,249 +7699,14 @@
            "OpenaiChatCompletionRequest": {
                "type": "object",
                "properties": {
-                    "model": {
-                        "type": "string",
-                        "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
-                    },
-                    "messages": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/OpenAIMessageParam"
-                        },
-                        "description": "List of messages in the conversation."
-                    },
-                    "frequency_penalty": {
-                        "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens."
-                    },
-                    "function_call": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "object",
-                                "additionalProperties": {
-                                    "oneOf": [
-                                        {
-                                            "type": "null"
-                                        },
-                                        {
-                                            "type": "boolean"
-                                        },
-                                        {
-                                            "type": "number"
-                                        },
-                                        {
-                                            "type": "string"
-                                        },
-                                        {
-                                            "type": "array"
-                                        },
-                                        {
-                                            "type": "object"
-                                        }
-                                    ]
-                                }
-                            }
-                        ],
-                        "description": "(Optional) The function call to use."
-                    },
-                    "functions": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "(Optional) List of functions to use."
-                    },
-                    "logit_bias": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "number"
-                        },
-                        "description": "(Optional) The logit bias to use."
-                    },
-                    "logprobs": {
-                        "type": "boolean",
-                        "description": "(Optional) The log probabilities to use."
-                    },
-                    "max_completion_tokens": {
-                        "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate."
-                    },
-                    "max_tokens": {
-                        "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate."
-                    },
-                    "n": {
-                        "type": "integer",
-                        "description": "(Optional) The number of completions to generate."
-                    },
-                    "parallel_tool_calls": {
-                        "type": "boolean",
-                        "description": "(Optional) Whether to parallelize tool calls."
-                    },
-                    "presence_penalty": {
-                        "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens."
-                    },
-                    "response_format": {
-                        "$ref": "#/components/schemas/OpenAIResponseFormatParam",
-                        "description": "(Optional) The response format to use."
-                    },
-                    "seed": {
-                        "type": "integer",
-                        "description": "(Optional) The seed to use."
-                    },
-                    "stop": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "type": "string"
-                                }
-                            }
-                        ],
-                        "description": "(Optional) The stop tokens to use."
-                    },
-                    "stream": {
-                        "type": "boolean",
-                        "description": "(Optional) Whether to stream the response."
-                    },
-                    "stream_options": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) The stream options to use."
-                    },
-                    "temperature": {
-                        "type": "number",
-                        "description": "(Optional) The temperature to use."
-                    },
-                    "tool_choice": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "object",
-                                "additionalProperties": {
-                                    "oneOf": [
-                                        {
-                                            "type": "null"
-                                        },
-                                        {
-                                            "type": "boolean"
-                                        },
-                                        {
-                                            "type": "number"
-                                        },
-                                        {
-                                            "type": "string"
-                                        },
-                                        {
-                                            "type": "array"
-                                        },
-                                        {
-                                            "type": "object"
-                                        }
-                                    ]
-                                }
-                            }
-                        ],
-                        "description": "(Optional) The tool choice to use."
-                    },
-                    "tools": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "(Optional) The tools to use."
-                    },
-                    "top_logprobs": {
-                        "type": "integer",
-                        "description": "(Optional) The top log probabilities to use."
-                    },
-                    "top_p": {
-                        "type": "number",
-                        "description": "(Optional) The top p to use."
-                    },
-                    "user": {
-                        "type": "string",
-                        "description": "(Optional) The user to use."
+                    "params": {
+                        "$ref": "#/components/schemas/OpenAIChatCompletionRequestParams",
+                        "description": "Request parameters including model, messages, and optional parameters. Use params.get_extra_body() to extract provider-specific parameters (e.g., chat_template_kwargs for vLLM)."
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "model",
-                    "messages"
+                    "params"
                ],
                "title": "OpenaiChatCompletionRequest"
            },
@ -7900,12 +7892,11 @@
                ],
                "title": "OpenAICompletionWithInputMessages"
            },
-            "OpenaiCompletionRequest": {
+            "OpenAICompletionRequestParams": {
                "type": "object",
                "properties": {
                    "model": {
-                        "type": "string",
-                        "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
+                        "type": "string"
                    },
                    "prompt": {
                        "oneOf": [
@ -7933,47 +7924,37 @@
                                    }
                                }
                            }
-                        ],
-                        "description": "The prompt to generate a completion for."
+                        ]
                    },
                    "best_of": {
-                        "type": "integer",
-                        "description": "(Optional) The number of completions to generate."
+                        "type": "integer"
                    },
                    "echo": {
-                        "type": "boolean",
-                        "description": "(Optional) Whether to echo the prompt."
+                        "type": "boolean"
                    },
                    "frequency_penalty": {
-                        "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens."
+                        "type": "number"
                    },
                    "logit_bias": {
                        "type": "object",
                        "additionalProperties": {
                            "type": "number"
-                        },
-                        "description": "(Optional) The logit bias to use."
+                        }
                    },
                    "logprobs": {
-                        "type": "boolean",
-                        "description": "(Optional) The log probabilities to use."
+                        "type": "boolean"
                    },
                    "max_tokens": {
-                        "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate."
+                        "type": "integer"
                    },
                    "n": {
-                        "type": "integer",
-                        "description": "(Optional) The number of completions to generate."
+                        "type": "integer"
                    },
                    "presence_penalty": {
-                        "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens."
+                        "type": "number"
                    },
                    "seed": {
-                        "type": "integer",
-                        "description": "(Optional) The seed to use."
+                        "type": "integer"
                    },
                    "stop": {
                        "oneOf": [
@ -7986,12 +7967,10 @@
                                    "type": "string"
                                }
                            }
-                        ],
-                        "description": "(Optional) The stop tokens to use."
+                        ]
                    },
                    "stream": {
-                        "type": "boolean",
-                        "description": "(Optional) Whether to stream the response."
+                        "type": "boolean"
                    },
                    "stream_options": {
                        "type": "object",
@ -8016,20 +7995,19 @@
                                    "type": "object"
                                }
                            ]
-                        },
-                        "description": "(Optional) The stream options to use."
+                        }
                    },
                    "temperature": {
-                        "type": "number",
-                        "description": "(Optional) The temperature to use."
+                        "type": "number"
                    },
                    "top_p": {
-                        "type": "number",
-                        "description": "(Optional) The top p to use."
+                        "type": "number"
                    },
                    "user": {
-                        "type": "string",
-                        "description": "(Optional) The user to use."
+                        "type": "string"
+                    },
+                    "suffix": {
+                        "type": "string"
                    },
                    "guided_choice": {
                        "type": "array",
@ -8039,10 +8017,6 @@
                    },
                    "prompt_logprobs": {
                        "type": "integer"
-                    },
-                    "suffix": {
-                        "type": "string",
-                        "description": "(Optional) The suffix that should be appended to the completion."
                    }
                },
                "additionalProperties": false,
@ -8050,6 +8024,21 @@
                    "model",
                    "prompt"
                ],
+                "title": "OpenAICompletionRequestParams",
+                "description": "Request parameters for OpenAI-compatible completion endpoint.\nThis model uses extra=\"allow\" to capture provider-specific parameters\n(like vLLM's guided_choice) which are passed through as extra_body."
+            },
+            "OpenaiCompletionRequest": {
+                "type": "object",
+                "properties": {
+                    "params": {
+                        "$ref": "#/components/schemas/OpenAICompletionRequestParams",
+                        "description": "Request parameters including model, prompt, and optional parameters. Use params.get_extra_body() to extract provider-specific parameters."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "params"
+                ],
                "title": "OpenaiCompletionRequest"
            },
            "OpenAICompletion": {
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -5437,6 +5437,122 @@ components:
      title: OpenAIUserMessageParam
      description: >-
        A message from the user in an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionRequestParams:
+      type: object
+      properties:
+        model:
+          type: string
+        messages:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIMessageParam'
+        frequency_penalty:
+          type: number
+        function_call:
+          oneOf:
+            - type: string
+            - type: object
+              additionalProperties:
+                oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+        functions:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+        logit_bias:
+          type: object
+          additionalProperties:
+            type: number
+        logprobs:
+          type: boolean
+        max_completion_tokens:
+          type: integer
+        max_tokens:
+          type: integer
+        n:
+          type: integer
+        parallel_tool_calls:
+          type: boolean
+        presence_penalty:
+          type: number
+        response_format:
+          $ref: '#/components/schemas/OpenAIResponseFormatParam'
+        seed:
+          type: integer
+        stop:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                type: string
+        stream:
+          type: boolean
+        stream_options:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+        temperature:
+          type: number
+        tool_choice:
+          oneOf:
+            - type: string
+            - type: object
+              additionalProperties:
+                oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+        tools:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+        top_logprobs:
+          type: integer
+        top_p:
+          type: number
+        user:
+          type: string
+      additionalProperties: false
+      required:
+        - model
+        - messages
+      title: OpenAIChatCompletionRequestParams
+      description: >-
+        Request parameters for OpenAI-compatible chat completion endpoint.
+
+        This model uses extra="allow" to capture provider-specific parameters
+
+        which are passed through as extra_body.
    OpenAIJSONSchema:
      type: object
      properties:
@ -5531,145 +5647,15 @@ components:
    OpenaiChatCompletionRequest:
      type: object
      properties:
-        model:
-          type: string
+        params:
+          $ref: '#/components/schemas/OpenAIChatCompletionRequestParams'
          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
-        messages:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-          description: List of messages in the conversation.
-        frequency_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        function_call:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The function call to use.
-        functions:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) List of functions to use.
-        logit_bias:
-          type: object
-          additionalProperties:
-            type: number
-          description: (Optional) The logit bias to use.
-        logprobs:
-          type: boolean
-          description: (Optional) The log probabilities to use.
-        max_completion_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        max_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        n:
-          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
-        parallel_tool_calls:
-          type: boolean
-          description: >-
-            (Optional) Whether to parallelize tool calls.
-        presence_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        response_format:
-          $ref: '#/components/schemas/OpenAIResponseFormatParam'
-          description: (Optional) The response format to use.
-        seed:
-          type: integer
-          description: (Optional) The seed to use.
-        stop:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: (Optional) The stop tokens to use.
-        stream:
-          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
-        stream_options:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: (Optional) The stream options to use.
-        temperature:
-          type: number
-          description: (Optional) The temperature to use.
-        tool_choice:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The tool choice to use.
-        tools:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) The tools to use.
-        top_logprobs:
-          type: integer
-          description: >-
-            (Optional) The top log probabilities to use.
-        top_p:
-          type: number
-          description: (Optional) The top p to use.
-        user:
-          type: string
-          description: (Optional) The user to use.
+            Request parameters including model, messages, and optional parameters.
+            Use params.get_extra_body() to extract provider-specific parameters (e.g.,
+            chat_template_kwargs for vLLM).
      additionalProperties: false
      required:
-        - model
-        - messages
+        - params
      title: OpenaiChatCompletionRequest
    OpenAIChatCompletion:
      type: object
@ -5824,14 +5810,11 @@ components:
        - model
        - input_messages
      title: OpenAICompletionWithInputMessages
-    OpenaiCompletionRequest:
+    OpenAICompletionRequestParams:
      type: object
      properties:
        model:
          type: string
-          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
        prompt:
          oneOf:
            - type: string
@ -5846,52 +5829,34 @@ components:
                type: array
                items:
                  type: integer
-          description: The prompt to generate a completion for.
        best_of:
          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
        echo:
          type: boolean
-          description: (Optional) Whether to echo the prompt.
        frequency_penalty:
          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
        logit_bias:
          type: object
          additionalProperties:
            type: number
-          description: (Optional) The logit bias to use.
        logprobs:
          type: boolean
-          description: (Optional) The log probabilities to use.
        max_tokens:
          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
        n:
          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
        presence_penalty:
          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
        seed:
          type: integer
-          description: (Optional) The seed to use.
        stop:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
-          description: (Optional) The stop tokens to use.
        stream:
          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
        stream_options:
          type: object
          additionalProperties:
@ -5902,30 +5867,42 @@ components:
              - type: string
              - type: array
              - type: object
-          description: (Optional) The stream options to use.
        temperature:
          type: number
-          description: (Optional) The temperature to use.
        top_p:
          type: number
-          description: (Optional) The top p to use.
        user:
          type: string
-          description: (Optional) The user to use.
+        suffix:
+          type: string
        guided_choice:
          type: array
          items:
            type: string
        prompt_logprobs:
          type: integer
-        suffix:
-          type: string
-          description: >-
-            (Optional) The suffix that should be appended to the completion.
      additionalProperties: false
      required:
        - model
        - prompt
+      title: OpenAICompletionRequestParams
+      description: >-
+        Request parameters for OpenAI-compatible completion endpoint.
+
+        This model uses extra="allow" to capture provider-specific parameters
+
+        (like vLLM's guided_choice) which are passed through as extra_body.
+    OpenaiCompletionRequest:
+      type: object
+      properties:
+        params:
+          $ref: '#/components/schemas/OpenAICompletionRequestParams'
+          description: >-
+            Request parameters including model, prompt, and optional parameters. Use
+            params.get_extra_body() to extract provider-specific parameters.
+      additionalProperties: false
+      required:
+        - params
      title: OpenaiCompletionRequest
    OpenAICompletion:
      type: object
--- a/docs/static/llama-stack-spec.html
+++ b/docs/static/llama-stack-spec.html
@ -4839,6 +4839,233 @@
                "title": "OpenAIUserMessageParam",
                "description": "A message from the user in an OpenAI-compatible chat completion request."
            },
+            "OpenAIChatCompletionRequestParams": {
+                "type": "object",
+                "properties": {
+                    "model": {
+                        "type": "string"
+                    },
+                    "messages": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIMessageParam"
+                        }
+                    },
+                    "frequency_penalty": {
+                        "type": "number"
+                    },
+                    "function_call": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "object",
+                                "additionalProperties": {
+                                    "oneOf": [
+                                        {
+                                            "type": "null"
+                                        },
+                                        {
+                                            "type": "boolean"
+                                        },
+                                        {
+                                            "type": "number"
+                                        },
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "type": "array"
+                                        },
+                                        {
+                                            "type": "object"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "functions": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        }
+                    },
+                    "logit_bias": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        }
+                    },
+                    "logprobs": {
+                        "type": "boolean"
+                    },
+                    "max_completion_tokens": {
+                        "type": "integer"
+                    },
+                    "max_tokens": {
+                        "type": "integer"
+                    },
+                    "n": {
+                        "type": "integer"
+                    },
+                    "parallel_tool_calls": {
+                        "type": "boolean"
+                    },
+                    "presence_penalty": {
+                        "type": "number"
+                    },
+                    "response_format": {
+                        "$ref": "#/components/schemas/OpenAIResponseFormatParam"
+                    },
+                    "seed": {
+                        "type": "integer"
+                    },
+                    "stop": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            }
+                        ]
+                    },
+                    "stream": {
+                        "type": "boolean"
+                    },
+                    "stream_options": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "temperature": {
+                        "type": "number"
+                    },
+                    "tool_choice": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "object",
+                                "additionalProperties": {
+                                    "oneOf": [
+                                        {
+                                            "type": "null"
+                                        },
+                                        {
+                                            "type": "boolean"
+                                        },
+                                        {
+                                            "type": "number"
+                                        },
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "type": "array"
+                                        },
+                                        {
+                                            "type": "object"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "tools": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        }
+                    },
+                    "top_logprobs": {
+                        "type": "integer"
+                    },
+                    "top_p": {
+                        "type": "number"
+                    },
+                    "user": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model",
+                    "messages"
+                ],
+                "title": "OpenAIChatCompletionRequestParams",
+                "description": "Request parameters for OpenAI-compatible chat completion endpoint.\nThis model uses extra=\"allow\" to capture provider-specific parameters\nwhich are passed through as extra_body."
+            },
            "OpenAIJSONSchema": {
                "type": "object",
                "properties": {
@ -4968,249 +5195,14 @@
            "OpenaiChatCompletionRequest": {
                "type": "object",
                "properties": {
-                    "model": {
-                        "type": "string",
-                        "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
-                    },
-                    "messages": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/OpenAIMessageParam"
-                        },
-                        "description": "List of messages in the conversation."
-                    },
-                    "frequency_penalty": {
-                        "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens."
-                    },
-                    "function_call": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "object",
-                                "additionalProperties": {
-                                    "oneOf": [
-                                        {
-                                            "type": "null"
-                                        },
-                                        {
-                                            "type": "boolean"
-                                        },
-                                        {
-                                            "type": "number"
-                                        },
-                                        {
-                                            "type": "string"
-                                        },
-                                        {
-                                            "type": "array"
-                                        },
-                                        {
-                                            "type": "object"
-                                        }
-                                    ]
-                                }
-                            }
-                        ],
-                        "description": "(Optional) The function call to use."
-                    },
-                    "functions": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "(Optional) List of functions to use."
-                    },
-                    "logit_bias": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "number"
-                        },
-                        "description": "(Optional) The logit bias to use."
-                    },
-                    "logprobs": {
-                        "type": "boolean",
-                        "description": "(Optional) The log probabilities to use."
-                    },
-                    "max_completion_tokens": {
-                        "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate."
-                    },
-                    "max_tokens": {
-                        "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate."
-                    },
-                    "n": {
-                        "type": "integer",
-                        "description": "(Optional) The number of completions to generate."
-                    },
-                    "parallel_tool_calls": {
-                        "type": "boolean",
-                        "description": "(Optional) Whether to parallelize tool calls."
-                    },
-                    "presence_penalty": {
-                        "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens."
-                    },
-                    "response_format": {
-                        "$ref": "#/components/schemas/OpenAIResponseFormatParam",
-                        "description": "(Optional) The response format to use."
-                    },
-                    "seed": {
-                        "type": "integer",
-                        "description": "(Optional) The seed to use."
-                    },
-                    "stop": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "type": "string"
-                                }
-                            }
-                        ],
-                        "description": "(Optional) The stop tokens to use."
-                    },
-                    "stream": {
-                        "type": "boolean",
-                        "description": "(Optional) Whether to stream the response."
-                    },
-                    "stream_options": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) The stream options to use."
-                    },
-                    "temperature": {
-                        "type": "number",
-                        "description": "(Optional) The temperature to use."
-                    },
-                    "tool_choice": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "object",
-                                "additionalProperties": {
-                                    "oneOf": [
-                                        {
-                                            "type": "null"
-                                        },
-                                        {
-                                            "type": "boolean"
-                                        },
-                                        {
-                                            "type": "number"
-                                        },
-                                        {
-                                            "type": "string"
-                                        },
-                                        {
-                                            "type": "array"
-                                        },
-                                        {
-                                            "type": "object"
-                                        }
-                                    ]
-                                }
-                            }
-                        ],
-                        "description": "(Optional) The tool choice to use."
-                    },
-                    "tools": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "(Optional) The tools to use."
-                    },
-                    "top_logprobs": {
-                        "type": "integer",
-                        "description": "(Optional) The top log probabilities to use."
-                    },
-                    "top_p": {
-                        "type": "number",
-                        "description": "(Optional) The top p to use."
-                    },
-                    "user": {
-                        "type": "string",
-                        "description": "(Optional) The user to use."
+                    "params": {
+                        "$ref": "#/components/schemas/OpenAIChatCompletionRequestParams",
+                        "description": "Request parameters including model, messages, and optional parameters. Use params.get_extra_body() to extract provider-specific parameters (e.g., chat_template_kwargs for vLLM)."
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "model",
-                    "messages"
+                    "params"
                ],
                "title": "OpenaiChatCompletionRequest"
            },
@ -5396,12 +5388,11 @@
                ],
                "title": "OpenAICompletionWithInputMessages"
            },
-            "OpenaiCompletionRequest": {
+            "OpenAICompletionRequestParams": {
                "type": "object",
                "properties": {
                    "model": {
-                        "type": "string",
-                        "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
+                        "type": "string"
                    },
                    "prompt": {
                        "oneOf": [
@ -5429,47 +5420,37 @@
                                    }
                                }
                            }
-                        ],
-                        "description": "The prompt to generate a completion for."
+                        ]
                    },
                    "best_of": {
-                        "type": "integer",
-                        "description": "(Optional) The number of completions to generate."
+                        "type": "integer"
                    },
                    "echo": {
-                        "type": "boolean",
-                        "description": "(Optional) Whether to echo the prompt."
+                        "type": "boolean"
                    },
                    "frequency_penalty": {
-                        "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens."
+                        "type": "number"
                    },
                    "logit_bias": {
                        "type": "object",
                        "additionalProperties": {
                            "type": "number"
-                        },
-                        "description": "(Optional) The logit bias to use."
+                        }
                    },
                    "logprobs": {
-                        "type": "boolean",
-                        "description": "(Optional) The log probabilities to use."
+                        "type": "boolean"
                    },
                    "max_tokens": {
-                        "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate."
+                        "type": "integer"
                    },
                    "n": {
-                        "type": "integer",
-                        "description": "(Optional) The number of completions to generate."
+                        "type": "integer"
                    },
                    "presence_penalty": {
-                        "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens."
+                        "type": "number"
                    },
                    "seed": {
-                        "type": "integer",
-                        "description": "(Optional) The seed to use."
+                        "type": "integer"
                    },
                    "stop": {
                        "oneOf": [
@ -5482,12 +5463,10 @@
                                    "type": "string"
                                }
                            }
-                        ],
-                        "description": "(Optional) The stop tokens to use."
+                        ]
                    },
                    "stream": {
-                        "type": "boolean",
-                        "description": "(Optional) Whether to stream the response."
+                        "type": "boolean"
                    },
                    "stream_options": {
                        "type": "object",
@ -5512,20 +5491,19 @@
                                    "type": "object"
                                }
                            ]
-                        },
-                        "description": "(Optional) The stream options to use."
+                        }
                    },
                    "temperature": {
-                        "type": "number",
-                        "description": "(Optional) The temperature to use."
+                        "type": "number"
                    },
                    "top_p": {
-                        "type": "number",
-                        "description": "(Optional) The top p to use."
+                        "type": "number"
                    },
                    "user": {
-                        "type": "string",
-                        "description": "(Optional) The user to use."
+                        "type": "string"
+                    },
+                    "suffix": {
+                        "type": "string"
                    },
                    "guided_choice": {
                        "type": "array",
@ -5535,10 +5513,6 @@
                    },
                    "prompt_logprobs": {
                        "type": "integer"
-                    },
-                    "suffix": {
-                        "type": "string",
-                        "description": "(Optional) The suffix that should be appended to the completion."
                    }
                },
                "additionalProperties": false,
@ -5546,6 +5520,21 @@
                    "model",
                    "prompt"
                ],
+                "title": "OpenAICompletionRequestParams",
+                "description": "Request parameters for OpenAI-compatible completion endpoint.\nThis model uses extra=\"allow\" to capture provider-specific parameters\n(like vLLM's guided_choice) which are passed through as extra_body."
+            },
+            "OpenaiCompletionRequest": {
+                "type": "object",
+                "properties": {
+                    "params": {
+                        "$ref": "#/components/schemas/OpenAICompletionRequestParams",
+                        "description": "Request parameters including model, prompt, and optional parameters. Use params.get_extra_body() to extract provider-specific parameters."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "params"
+                ],
                "title": "OpenaiCompletionRequest"
            },
            "OpenAICompletion": {
--- a/docs/static/llama-stack-spec.yaml
+++ b/docs/static/llama-stack-spec.yaml
@ -3686,6 +3686,122 @@ components:
      title: OpenAIUserMessageParam
      description: >-
        A message from the user in an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionRequestParams:
+      type: object
+      properties:
+        model:
+          type: string
+        messages:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIMessageParam'
+        frequency_penalty:
+          type: number
+        function_call:
+          oneOf:
+            - type: string
+            - type: object
+              additionalProperties:
+                oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+        functions:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+        logit_bias:
+          type: object
+          additionalProperties:
+            type: number
+        logprobs:
+          type: boolean
+        max_completion_tokens:
+          type: integer
+        max_tokens:
+          type: integer
+        n:
+          type: integer
+        parallel_tool_calls:
+          type: boolean
+        presence_penalty:
+          type: number
+        response_format:
+          $ref: '#/components/schemas/OpenAIResponseFormatParam'
+        seed:
+          type: integer
+        stop:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                type: string
+        stream:
+          type: boolean
+        stream_options:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+        temperature:
+          type: number
+        tool_choice:
+          oneOf:
+            - type: string
+            - type: object
+              additionalProperties:
+                oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+        tools:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+        top_logprobs:
+          type: integer
+        top_p:
+          type: number
+        user:
+          type: string
+      additionalProperties: false
+      required:
+        - model
+        - messages
+      title: OpenAIChatCompletionRequestParams
+      description: >-
+        Request parameters for OpenAI-compatible chat completion endpoint.
+
+        This model uses extra="allow" to capture provider-specific parameters
+
+        which are passed through as extra_body.
    OpenAIJSONSchema:
      type: object
      properties:
@ -3780,145 +3896,15 @@ components:
    OpenaiChatCompletionRequest:
      type: object
      properties:
-        model:
-          type: string
+        params:
+          $ref: '#/components/schemas/OpenAIChatCompletionRequestParams'
          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
-        messages:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-          description: List of messages in the conversation.
-        frequency_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        function_call:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The function call to use.
-        functions:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) List of functions to use.
-        logit_bias:
-          type: object
-          additionalProperties:
-            type: number
-          description: (Optional) The logit bias to use.
-        logprobs:
-          type: boolean
-          description: (Optional) The log probabilities to use.
-        max_completion_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        max_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        n:
-          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
-        parallel_tool_calls:
-          type: boolean
-          description: >-
-            (Optional) Whether to parallelize tool calls.
-        presence_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        response_format:
-          $ref: '#/components/schemas/OpenAIResponseFormatParam'
-          description: (Optional) The response format to use.
-        seed:
-          type: integer
-          description: (Optional) The seed to use.
-        stop:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: (Optional) The stop tokens to use.
-        stream:
-          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
-        stream_options:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: (Optional) The stream options to use.
-        temperature:
-          type: number
-          description: (Optional) The temperature to use.
-        tool_choice:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The tool choice to use.
-        tools:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) The tools to use.
-        top_logprobs:
-          type: integer
-          description: >-
-            (Optional) The top log probabilities to use.
-        top_p:
-          type: number
-          description: (Optional) The top p to use.
-        user:
-          type: string
-          description: (Optional) The user to use.
+            Request parameters including model, messages, and optional parameters.
+            Use params.get_extra_body() to extract provider-specific parameters (e.g.,
+            chat_template_kwargs for vLLM).
      additionalProperties: false
      required:
-        - model
-        - messages
+        - params
      title: OpenaiChatCompletionRequest
    OpenAIChatCompletion:
      type: object
@ -4073,14 +4059,11 @@ components:
        - model
        - input_messages
      title: OpenAICompletionWithInputMessages
-    OpenaiCompletionRequest:
+    OpenAICompletionRequestParams:
      type: object
      properties:
        model:
          type: string
-          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
        prompt:
          oneOf:
            - type: string
@ -4095,52 +4078,34 @@ components:
                type: array
                items:
                  type: integer
-          description: The prompt to generate a completion for.
        best_of:
          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
        echo:
          type: boolean
-          description: (Optional) Whether to echo the prompt.
        frequency_penalty:
          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
        logit_bias:
          type: object
          additionalProperties:
            type: number
-          description: (Optional) The logit bias to use.
        logprobs:
          type: boolean
-          description: (Optional) The log probabilities to use.
        max_tokens:
          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
        n:
          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
        presence_penalty:
          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
        seed:
          type: integer
-          description: (Optional) The seed to use.
        stop:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
-          description: (Optional) The stop tokens to use.
        stream:
          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
        stream_options:
          type: object
          additionalProperties:
@ -4151,30 +4116,42 @@ components:
              - type: string
              - type: array
              - type: object
-          description: (Optional) The stream options to use.
        temperature:
          type: number
-          description: (Optional) The temperature to use.
        top_p:
          type: number
-          description: (Optional) The top p to use.
        user:
          type: string
-          description: (Optional) The user to use.
+        suffix:
+          type: string
        guided_choice:
          type: array
          items:
            type: string
        prompt_logprobs:
          type: integer
-        suffix:
-          type: string
-          description: >-
-            (Optional) The suffix that should be appended to the completion.
      additionalProperties: false
      required:
        - model
        - prompt
+      title: OpenAICompletionRequestParams
+      description: >-
+        Request parameters for OpenAI-compatible completion endpoint.
+
+        This model uses extra="allow" to capture provider-specific parameters
+
+        (like vLLM's guided_choice) which are passed through as extra_body.
+    OpenaiCompletionRequest:
+      type: object
+      properties:
+        params:
+          $ref: '#/components/schemas/OpenAICompletionRequestParams'
+          description: >-
+            Request parameters including model, prompt, and optional parameters. Use
+            params.get_extra_body() to extract provider-specific parameters.
+      additionalProperties: false
+      required:
+        - params
      title: OpenaiCompletionRequest
    OpenAICompletion:
      type: object
--- a/docs/static/stainless-llama-stack-spec.html
+++ b/docs/static/stainless-llama-stack-spec.html
@ -6848,6 +6848,233 @@
                "title": "OpenAIUserMessageParam",
                "description": "A message from the user in an OpenAI-compatible chat completion request."
            },
+            "OpenAIChatCompletionRequestParams": {
+                "type": "object",
+                "properties": {
+                    "model": {
+                        "type": "string"
+                    },
+                    "messages": {
+                        "type": "array",
+                        "items": {
+                            "$ref": "#/components/schemas/OpenAIMessageParam"
+                        }
+                    },
+                    "frequency_penalty": {
+                        "type": "number"
+                    },
+                    "function_call": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "object",
+                                "additionalProperties": {
+                                    "oneOf": [
+                                        {
+                                            "type": "null"
+                                        },
+                                        {
+                                            "type": "boolean"
+                                        },
+                                        {
+                                            "type": "number"
+                                        },
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "type": "array"
+                                        },
+                                        {
+                                            "type": "object"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "functions": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        }
+                    },
+                    "logit_bias": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "type": "number"
+                        }
+                    },
+                    "logprobs": {
+                        "type": "boolean"
+                    },
+                    "max_completion_tokens": {
+                        "type": "integer"
+                    },
+                    "max_tokens": {
+                        "type": "integer"
+                    },
+                    "n": {
+                        "type": "integer"
+                    },
+                    "parallel_tool_calls": {
+                        "type": "boolean"
+                    },
+                    "presence_penalty": {
+                        "type": "number"
+                    },
+                    "response_format": {
+                        "$ref": "#/components/schemas/OpenAIResponseFormatParam"
+                    },
+                    "seed": {
+                        "type": "integer"
+                    },
+                    "stop": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "array",
+                                "items": {
+                                    "type": "string"
+                                }
+                            }
+                        ]
+                    },
+                    "stream": {
+                        "type": "boolean"
+                    },
+                    "stream_options": {
+                        "type": "object",
+                        "additionalProperties": {
+                            "oneOf": [
+                                {
+                                    "type": "null"
+                                },
+                                {
+                                    "type": "boolean"
+                                },
+                                {
+                                    "type": "number"
+                                },
+                                {
+                                    "type": "string"
+                                },
+                                {
+                                    "type": "array"
+                                },
+                                {
+                                    "type": "object"
+                                }
+                            ]
+                        }
+                    },
+                    "temperature": {
+                        "type": "number"
+                    },
+                    "tool_choice": {
+                        "oneOf": [
+                            {
+                                "type": "string"
+                            },
+                            {
+                                "type": "object",
+                                "additionalProperties": {
+                                    "oneOf": [
+                                        {
+                                            "type": "null"
+                                        },
+                                        {
+                                            "type": "boolean"
+                                        },
+                                        {
+                                            "type": "number"
+                                        },
+                                        {
+                                            "type": "string"
+                                        },
+                                        {
+                                            "type": "array"
+                                        },
+                                        {
+                                            "type": "object"
+                                        }
+                                    ]
+                                }
+                            }
+                        ]
+                    },
+                    "tools": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "additionalProperties": {
+                                "oneOf": [
+                                    {
+                                        "type": "null"
+                                    },
+                                    {
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "type": "number"
+                                    },
+                                    {
+                                        "type": "string"
+                                    },
+                                    {
+                                        "type": "array"
+                                    },
+                                    {
+                                        "type": "object"
+                                    }
+                                ]
+                            }
+                        }
+                    },
+                    "top_logprobs": {
+                        "type": "integer"
+                    },
+                    "top_p": {
+                        "type": "number"
+                    },
+                    "user": {
+                        "type": "string"
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "model",
+                    "messages"
+                ],
+                "title": "OpenAIChatCompletionRequestParams",
+                "description": "Request parameters for OpenAI-compatible chat completion endpoint.\nThis model uses extra=\"allow\" to capture provider-specific parameters\nwhich are passed through as extra_body."
+            },
            "OpenAIJSONSchema": {
                "type": "object",
                "properties": {
@ -6977,249 +7204,14 @@
            "OpenaiChatCompletionRequest": {
                "type": "object",
                "properties": {
-                    "model": {
-                        "type": "string",
-                        "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
-                    },
-                    "messages": {
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/components/schemas/OpenAIMessageParam"
-                        },
-                        "description": "List of messages in the conversation."
-                    },
-                    "frequency_penalty": {
-                        "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens."
-                    },
-                    "function_call": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "object",
-                                "additionalProperties": {
-                                    "oneOf": [
-                                        {
-                                            "type": "null"
-                                        },
-                                        {
-                                            "type": "boolean"
-                                        },
-                                        {
-                                            "type": "number"
-                                        },
-                                        {
-                                            "type": "string"
-                                        },
-                                        {
-                                            "type": "array"
-                                        },
-                                        {
-                                            "type": "object"
-                                        }
-                                    ]
-                                }
-                            }
-                        ],
-                        "description": "(Optional) The function call to use."
-                    },
-                    "functions": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "(Optional) List of functions to use."
-                    },
-                    "logit_bias": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "type": "number"
-                        },
-                        "description": "(Optional) The logit bias to use."
-                    },
-                    "logprobs": {
-                        "type": "boolean",
-                        "description": "(Optional) The log probabilities to use."
-                    },
-                    "max_completion_tokens": {
-                        "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate."
-                    },
-                    "max_tokens": {
-                        "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate."
-                    },
-                    "n": {
-                        "type": "integer",
-                        "description": "(Optional) The number of completions to generate."
-                    },
-                    "parallel_tool_calls": {
-                        "type": "boolean",
-                        "description": "(Optional) Whether to parallelize tool calls."
-                    },
-                    "presence_penalty": {
-                        "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens."
-                    },
-                    "response_format": {
-                        "$ref": "#/components/schemas/OpenAIResponseFormatParam",
-                        "description": "(Optional) The response format to use."
-                    },
-                    "seed": {
-                        "type": "integer",
-                        "description": "(Optional) The seed to use."
-                    },
-                    "stop": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "array",
-                                "items": {
-                                    "type": "string"
-                                }
-                            }
-                        ],
-                        "description": "(Optional) The stop tokens to use."
-                    },
-                    "stream": {
-                        "type": "boolean",
-                        "description": "(Optional) Whether to stream the response."
-                    },
-                    "stream_options": {
-                        "type": "object",
-                        "additionalProperties": {
-                            "oneOf": [
-                                {
-                                    "type": "null"
-                                },
-                                {
-                                    "type": "boolean"
-                                },
-                                {
-                                    "type": "number"
-                                },
-                                {
-                                    "type": "string"
-                                },
-                                {
-                                    "type": "array"
-                                },
-                                {
-                                    "type": "object"
-                                }
-                            ]
-                        },
-                        "description": "(Optional) The stream options to use."
-                    },
-                    "temperature": {
-                        "type": "number",
-                        "description": "(Optional) The temperature to use."
-                    },
-                    "tool_choice": {
-                        "oneOf": [
-                            {
-                                "type": "string"
-                            },
-                            {
-                                "type": "object",
-                                "additionalProperties": {
-                                    "oneOf": [
-                                        {
-                                            "type": "null"
-                                        },
-                                        {
-                                            "type": "boolean"
-                                        },
-                                        {
-                                            "type": "number"
-                                        },
-                                        {
-                                            "type": "string"
-                                        },
-                                        {
-                                            "type": "array"
-                                        },
-                                        {
-                                            "type": "object"
-                                        }
-                                    ]
-                                }
-                            }
-                        ],
-                        "description": "(Optional) The tool choice to use."
-                    },
-                    "tools": {
-                        "type": "array",
-                        "items": {
-                            "type": "object",
-                            "additionalProperties": {
-                                "oneOf": [
-                                    {
-                                        "type": "null"
-                                    },
-                                    {
-                                        "type": "boolean"
-                                    },
-                                    {
-                                        "type": "number"
-                                    },
-                                    {
-                                        "type": "string"
-                                    },
-                                    {
-                                        "type": "array"
-                                    },
-                                    {
-                                        "type": "object"
-                                    }
-                                ]
-                            }
-                        },
-                        "description": "(Optional) The tools to use."
-                    },
-                    "top_logprobs": {
-                        "type": "integer",
-                        "description": "(Optional) The top log probabilities to use."
-                    },
-                    "top_p": {
-                        "type": "number",
-                        "description": "(Optional) The top p to use."
-                    },
-                    "user": {
-                        "type": "string",
-                        "description": "(Optional) The user to use."
+                    "params": {
+                        "$ref": "#/components/schemas/OpenAIChatCompletionRequestParams",
+                        "description": "Request parameters including model, messages, and optional parameters. Use params.get_extra_body() to extract provider-specific parameters (e.g., chat_template_kwargs for vLLM)."
                    }
                },
                "additionalProperties": false,
                "required": [
-                    "model",
-                    "messages"
+                    "params"
                ],
                "title": "OpenaiChatCompletionRequest"
            },
@ -7405,12 +7397,11 @@
                ],
                "title": "OpenAICompletionWithInputMessages"
            },
-            "OpenaiCompletionRequest": {
+            "OpenAICompletionRequestParams": {
                "type": "object",
                "properties": {
                    "model": {
-                        "type": "string",
-                        "description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
+                        "type": "string"
                    },
                    "prompt": {
                        "oneOf": [
@ -7438,47 +7429,37 @@
                                    }
                                }
                            }
-                        ],
-                        "description": "The prompt to generate a completion for."
+                        ]
                    },
                    "best_of": {
-                        "type": "integer",
-                        "description": "(Optional) The number of completions to generate."
+                        "type": "integer"
                    },
                    "echo": {
-                        "type": "boolean",
-                        "description": "(Optional) Whether to echo the prompt."
+                        "type": "boolean"
                    },
                    "frequency_penalty": {
-                        "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens."
+                        "type": "number"
                    },
                    "logit_bias": {
                        "type": "object",
                        "additionalProperties": {
                            "type": "number"
-                        },
-                        "description": "(Optional) The logit bias to use."
+                        }
                    },
                    "logprobs": {
-                        "type": "boolean",
-                        "description": "(Optional) The log probabilities to use."
+                        "type": "boolean"
                    },
                    "max_tokens": {
-                        "type": "integer",
-                        "description": "(Optional) The maximum number of tokens to generate."
+                        "type": "integer"
                    },
                    "n": {
-                        "type": "integer",
-                        "description": "(Optional) The number of completions to generate."
+                        "type": "integer"
                    },
                    "presence_penalty": {
-                        "type": "number",
-                        "description": "(Optional) The penalty for repeated tokens."
+                        "type": "number"
                    },
                    "seed": {
-                        "type": "integer",
-                        "description": "(Optional) The seed to use."
+                        "type": "integer"
                    },
                    "stop": {
                        "oneOf": [
@ -7491,12 +7472,10 @@
                                    "type": "string"
                                }
                            }
-                        ],
-                        "description": "(Optional) The stop tokens to use."
+                        ]
                    },
                    "stream": {
-                        "type": "boolean",
-                        "description": "(Optional) Whether to stream the response."
+                        "type": "boolean"
                    },
                    "stream_options": {
                        "type": "object",
@ -7521,20 +7500,19 @@
                                    "type": "object"
                                }
                            ]
-                        },
-                        "description": "(Optional) The stream options to use."
+                        }
                    },
                    "temperature": {
-                        "type": "number",
-                        "description": "(Optional) The temperature to use."
+                        "type": "number"
                    },
                    "top_p": {
-                        "type": "number",
-                        "description": "(Optional) The top p to use."
+                        "type": "number"
                    },
                    "user": {
-                        "type": "string",
-                        "description": "(Optional) The user to use."
+                        "type": "string"
+                    },
+                    "suffix": {
+                        "type": "string"
                    },
                    "guided_choice": {
                        "type": "array",
@ -7544,10 +7522,6 @@
                    },
                    "prompt_logprobs": {
                        "type": "integer"
-                    },
-                    "suffix": {
-                        "type": "string",
-                        "description": "(Optional) The suffix that should be appended to the completion."
                    }
                },
                "additionalProperties": false,
@ -7555,6 +7529,21 @@
                    "model",
                    "prompt"
                ],
+                "title": "OpenAICompletionRequestParams",
+                "description": "Request parameters for OpenAI-compatible completion endpoint.\nThis model uses extra=\"allow\" to capture provider-specific parameters\n(like vLLM's guided_choice) which are passed through as extra_body."
+            },
+            "OpenaiCompletionRequest": {
+                "type": "object",
+                "properties": {
+                    "params": {
+                        "$ref": "#/components/schemas/OpenAICompletionRequestParams",
+                        "description": "Request parameters including model, prompt, and optional parameters. Use params.get_extra_body() to extract provider-specific parameters."
+                    }
+                },
+                "additionalProperties": false,
+                "required": [
+                    "params"
+                ],
                "title": "OpenaiCompletionRequest"
            },
            "OpenAICompletion": {
--- a/docs/static/stainless-llama-stack-spec.yaml
+++ b/docs/static/stainless-llama-stack-spec.yaml
@ -5131,6 +5131,122 @@ components:
      title: OpenAIUserMessageParam
      description: >-
        A message from the user in an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionRequestParams:
+      type: object
+      properties:
+        model:
+          type: string
+        messages:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIMessageParam'
+        frequency_penalty:
+          type: number
+        function_call:
+          oneOf:
+            - type: string
+            - type: object
+              additionalProperties:
+                oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+        functions:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+        logit_bias:
+          type: object
+          additionalProperties:
+            type: number
+        logprobs:
+          type: boolean
+        max_completion_tokens:
+          type: integer
+        max_tokens:
+          type: integer
+        n:
+          type: integer
+        parallel_tool_calls:
+          type: boolean
+        presence_penalty:
+          type: number
+        response_format:
+          $ref: '#/components/schemas/OpenAIResponseFormatParam'
+        seed:
+          type: integer
+        stop:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                type: string
+        stream:
+          type: boolean
+        stream_options:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+        temperature:
+          type: number
+        tool_choice:
+          oneOf:
+            - type: string
+            - type: object
+              additionalProperties:
+                oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+        tools:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+        top_logprobs:
+          type: integer
+        top_p:
+          type: number
+        user:
+          type: string
+      additionalProperties: false
+      required:
+        - model
+        - messages
+      title: OpenAIChatCompletionRequestParams
+      description: >-
+        Request parameters for OpenAI-compatible chat completion endpoint.
+
+        This model uses extra="allow" to capture provider-specific parameters
+
+        which are passed through as extra_body.
    OpenAIJSONSchema:
      type: object
      properties:
@ -5225,145 +5341,15 @@ components:
    OpenaiChatCompletionRequest:
      type: object
      properties:
-        model:
-          type: string
+        params:
+          $ref: '#/components/schemas/OpenAIChatCompletionRequestParams'
          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
-        messages:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-          description: List of messages in the conversation.
-        frequency_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        function_call:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The function call to use.
-        functions:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) List of functions to use.
-        logit_bias:
-          type: object
-          additionalProperties:
-            type: number
-          description: (Optional) The logit bias to use.
-        logprobs:
-          type: boolean
-          description: (Optional) The log probabilities to use.
-        max_completion_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        max_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        n:
-          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
-        parallel_tool_calls:
-          type: boolean
-          description: >-
-            (Optional) Whether to parallelize tool calls.
-        presence_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        response_format:
-          $ref: '#/components/schemas/OpenAIResponseFormatParam'
-          description: (Optional) The response format to use.
-        seed:
-          type: integer
-          description: (Optional) The seed to use.
-        stop:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: (Optional) The stop tokens to use.
-        stream:
-          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
-        stream_options:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: (Optional) The stream options to use.
-        temperature:
-          type: number
-          description: (Optional) The temperature to use.
-        tool_choice:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The tool choice to use.
-        tools:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) The tools to use.
-        top_logprobs:
-          type: integer
-          description: >-
-            (Optional) The top log probabilities to use.
-        top_p:
-          type: number
-          description: (Optional) The top p to use.
-        user:
-          type: string
-          description: (Optional) The user to use.
+            Request parameters including model, messages, and optional parameters.
+            Use params.get_extra_body() to extract provider-specific parameters (e.g.,
+            chat_template_kwargs for vLLM).
      additionalProperties: false
      required:
-        - model
-        - messages
+        - params
      title: OpenaiChatCompletionRequest
    OpenAIChatCompletion:
      type: object
@ -5518,14 +5504,11 @@ components:
        - model
        - input_messages
      title: OpenAICompletionWithInputMessages
-    OpenaiCompletionRequest:
+    OpenAICompletionRequestParams:
      type: object
      properties:
        model:
          type: string
-          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
        prompt:
          oneOf:
            - type: string
@ -5540,52 +5523,34 @@ components:
                type: array
                items:
                  type: integer
-          description: The prompt to generate a completion for.
        best_of:
          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
        echo:
          type: boolean
-          description: (Optional) Whether to echo the prompt.
        frequency_penalty:
          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
        logit_bias:
          type: object
          additionalProperties:
            type: number
-          description: (Optional) The logit bias to use.
        logprobs:
          type: boolean
-          description: (Optional) The log probabilities to use.
        max_tokens:
          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
        n:
          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
        presence_penalty:
          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
        seed:
          type: integer
-          description: (Optional) The seed to use.
        stop:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
-          description: (Optional) The stop tokens to use.
        stream:
          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
        stream_options:
          type: object
          additionalProperties:
@ -5596,30 +5561,42 @@ components:
              - type: string
              - type: array
              - type: object
-          description: (Optional) The stream options to use.
        temperature:
          type: number
-          description: (Optional) The temperature to use.
        top_p:
          type: number
-          description: (Optional) The top p to use.
        user:
          type: string
-          description: (Optional) The user to use.
+        suffix:
+          type: string
        guided_choice:
          type: array
          items:
            type: string
        prompt_logprobs:
          type: integer
-        suffix:
-          type: string
-          description: >-
-            (Optional) The suffix that should be appended to the completion.
      additionalProperties: false
      required:
        - model
        - prompt
+      title: OpenAICompletionRequestParams
+      description: >-
+        Request parameters for OpenAI-compatible completion endpoint.
+
+        This model uses extra="allow" to capture provider-specific parameters
+
+        (like vLLM's guided_choice) which are passed through as extra_body.
+    OpenaiCompletionRequest:
+      type: object
+      properties:
+        params:
+          $ref: '#/components/schemas/OpenAICompletionRequestParams'
+          description: >-
+            Request parameters including model, prompt, and optional parameters. Use
+            params.get_extra_body() to extract provider-specific parameters.
+      additionalProperties: false
+      required:
+        - params
      title: OpenaiCompletionRequest
    OpenAICompletion:
      type: object