test

# What does this PR do? ## Test Plan
2025-10-11 21:48:36 +00:00 · 2025-10-09 17:28:44 -07:00 · 2025-10-09 17:28:44 -07:00 · 972f2395a1
commit 972f2395a1
parent f50ce11a3b
29 changed files with 1726 additions and 2149 deletions
--- a/docs/static/deprecated-llama-stack-spec.yaml
+++ b/docs/static/deprecated-llama-stack-spec.yaml
@ -5437,6 +5437,122 @@ components:
      title: OpenAIUserMessageParam
      description: >-
        A message from the user in an OpenAI-compatible chat completion request.
+    OpenAIChatCompletionRequestParams:
+      type: object
+      properties:
+        model:
+          type: string
+        messages:
+          type: array
+          items:
+            $ref: '#/components/schemas/OpenAIMessageParam'
+        frequency_penalty:
+          type: number
+        function_call:
+          oneOf:
+            - type: string
+            - type: object
+              additionalProperties:
+                oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+        functions:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+        logit_bias:
+          type: object
+          additionalProperties:
+            type: number
+        logprobs:
+          type: boolean
+        max_completion_tokens:
+          type: integer
+        max_tokens:
+          type: integer
+        n:
+          type: integer
+        parallel_tool_calls:
+          type: boolean
+        presence_penalty:
+          type: number
+        response_format:
+          $ref: '#/components/schemas/OpenAIResponseFormatParam'
+        seed:
+          type: integer
+        stop:
+          oneOf:
+            - type: string
+            - type: array
+              items:
+                type: string
+        stream:
+          type: boolean
+        stream_options:
+          type: object
+          additionalProperties:
+            oneOf:
+              - type: 'null'
+              - type: boolean
+              - type: number
+              - type: string
+              - type: array
+              - type: object
+        temperature:
+          type: number
+        tool_choice:
+          oneOf:
+            - type: string
+            - type: object
+              additionalProperties:
+                oneOf:
+                  - type: 'null'
+                  - type: boolean
+                  - type: number
+                  - type: string
+                  - type: array
+                  - type: object
+        tools:
+          type: array
+          items:
+            type: object
+            additionalProperties:
+              oneOf:
+                - type: 'null'
+                - type: boolean
+                - type: number
+                - type: string
+                - type: array
+                - type: object
+        top_logprobs:
+          type: integer
+        top_p:
+          type: number
+        user:
+          type: string
+      additionalProperties: false
+      required:
+        - model
+        - messages
+      title: OpenAIChatCompletionRequestParams
+      description: >-
+        Request parameters for OpenAI-compatible chat completion endpoint.
+
+        This model uses extra="allow" to capture provider-specific parameters
+
+        which are passed through as extra_body.
    OpenAIJSONSchema:
      type: object
      properties:
@ -5531,145 +5647,15 @@ components:
    OpenaiChatCompletionRequest:
      type: object
      properties:
-        model:
-          type: string
+        params:
+          $ref: '#/components/schemas/OpenAIChatCompletionRequestParams'
          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
-        messages:
-          type: array
-          items:
-            $ref: '#/components/schemas/OpenAIMessageParam'
-          description: List of messages in the conversation.
-        frequency_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        function_call:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The function call to use.
-        functions:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) List of functions to use.
-        logit_bias:
-          type: object
-          additionalProperties:
-            type: number
-          description: (Optional) The logit bias to use.
-        logprobs:
-          type: boolean
-          description: (Optional) The log probabilities to use.
-        max_completion_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        max_tokens:
-          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
-        n:
-          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
-        parallel_tool_calls:
-          type: boolean
-          description: >-
-            (Optional) Whether to parallelize tool calls.
-        presence_penalty:
-          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
-        response_format:
-          $ref: '#/components/schemas/OpenAIResponseFormatParam'
-          description: (Optional) The response format to use.
-        seed:
-          type: integer
-          description: (Optional) The seed to use.
-        stop:
-          oneOf:
-            - type: string
-            - type: array
-              items:
-                type: string
-          description: (Optional) The stop tokens to use.
-        stream:
-          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
-        stream_options:
-          type: object
-          additionalProperties:
-            oneOf:
-              - type: 'null'
-              - type: boolean
-              - type: number
-              - type: string
-              - type: array
-              - type: object
-          description: (Optional) The stream options to use.
-        temperature:
-          type: number
-          description: (Optional) The temperature to use.
-        tool_choice:
-          oneOf:
-            - type: string
-            - type: object
-              additionalProperties:
-                oneOf:
-                  - type: 'null'
-                  - type: boolean
-                  - type: number
-                  - type: string
-                  - type: array
-                  - type: object
-          description: (Optional) The tool choice to use.
-        tools:
-          type: array
-          items:
-            type: object
-            additionalProperties:
-              oneOf:
-                - type: 'null'
-                - type: boolean
-                - type: number
-                - type: string
-                - type: array
-                - type: object
-          description: (Optional) The tools to use.
-        top_logprobs:
-          type: integer
-          description: >-
-            (Optional) The top log probabilities to use.
-        top_p:
-          type: number
-          description: (Optional) The top p to use.
-        user:
-          type: string
-          description: (Optional) The user to use.
+            Request parameters including model, messages, and optional parameters.
+            Use params.get_extra_body() to extract provider-specific parameters (e.g.,
+            chat_template_kwargs for vLLM).
      additionalProperties: false
      required:
-        - model
-        - messages
+        - params
      title: OpenaiChatCompletionRequest
    OpenAIChatCompletion:
      type: object
@ -5824,14 +5810,11 @@ components:
        - model
        - input_messages
      title: OpenAICompletionWithInputMessages
-    OpenaiCompletionRequest:
+    OpenAICompletionRequestParams:
      type: object
      properties:
        model:
          type: string
-          description: >-
-            The identifier of the model to use. The model must be registered with
-            Llama Stack and available via the /models endpoint.
        prompt:
          oneOf:
            - type: string
@ -5846,52 +5829,34 @@ components:
                type: array
                items:
                  type: integer
-          description: The prompt to generate a completion for.
        best_of:
          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
        echo:
          type: boolean
-          description: (Optional) Whether to echo the prompt.
        frequency_penalty:
          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
        logit_bias:
          type: object
          additionalProperties:
            type: number
-          description: (Optional) The logit bias to use.
        logprobs:
          type: boolean
-          description: (Optional) The log probabilities to use.
        max_tokens:
          type: integer
-          description: >-
-            (Optional) The maximum number of tokens to generate.
        n:
          type: integer
-          description: >-
-            (Optional) The number of completions to generate.
        presence_penalty:
          type: number
-          description: >-
-            (Optional) The penalty for repeated tokens.
        seed:
          type: integer
-          description: (Optional) The seed to use.
        stop:
          oneOf:
            - type: string
            - type: array
              items:
                type: string
-          description: (Optional) The stop tokens to use.
        stream:
          type: boolean
-          description: >-
-            (Optional) Whether to stream the response.
        stream_options:
          type: object
          additionalProperties:
@ -5902,30 +5867,42 @@ components:
              - type: string
              - type: array
              - type: object
-          description: (Optional) The stream options to use.
        temperature:
          type: number
-          description: (Optional) The temperature to use.
        top_p:
          type: number
-          description: (Optional) The top p to use.
        user:
          type: string
-          description: (Optional) The user to use.
+        suffix:
+          type: string
        guided_choice:
          type: array
          items:
            type: string
        prompt_logprobs:
          type: integer
-        suffix:
-          type: string
-          description: >-
-            (Optional) The suffix that should be appended to the completion.
      additionalProperties: false
      required:
        - model
        - prompt
+      title: OpenAICompletionRequestParams
+      description: >-
+        Request parameters for OpenAI-compatible completion endpoint.
+
+        This model uses extra="allow" to capture provider-specific parameters
+
+        (like vLLM's guided_choice) which are passed through as extra_body.
+    OpenaiCompletionRequest:
+      type: object
+      properties:
+        params:
+          $ref: '#/components/schemas/OpenAICompletionRequestParams'
+          description: >-
+            Request parameters including model, prompt, and optional parameters. Use
+            params.get_extra_body() to extract provider-specific parameters.
+      additionalProperties: false
+      required:
+        - params
      title: OpenaiCompletionRequest
    OpenAICompletion:
      type: object