# What does this PR do?


## Test Plan
This commit is contained in:
Eric Huang 2025-10-09 17:28:44 -07:00
parent f50ce11a3b
commit 972f2395a1
29 changed files with 1726 additions and 2149 deletions

View file

@ -5437,6 +5437,122 @@ components:
title: OpenAIUserMessageParam
description: >-
A message from the user in an OpenAI-compatible chat completion request.
OpenAIChatCompletionRequestParams:
type: object
properties:
model:
type: string
messages:
type: array
items:
$ref: '#/components/schemas/OpenAIMessageParam'
frequency_penalty:
type: number
function_call:
oneOf:
- type: string
- type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
functions:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
logit_bias:
type: object
additionalProperties:
type: number
logprobs:
type: boolean
max_completion_tokens:
type: integer
max_tokens:
type: integer
n:
type: integer
parallel_tool_calls:
type: boolean
presence_penalty:
type: number
response_format:
$ref: '#/components/schemas/OpenAIResponseFormatParam'
seed:
type: integer
stop:
oneOf:
- type: string
- type: array
items:
type: string
stream:
type: boolean
stream_options:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
temperature:
type: number
tool_choice:
oneOf:
- type: string
- type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
tools:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
top_logprobs:
type: integer
top_p:
type: number
user:
type: string
additionalProperties: false
required:
- model
- messages
title: OpenAIChatCompletionRequestParams
description: >-
Request parameters for OpenAI-compatible chat completion endpoint.
This model uses extra="allow" to capture provider-specific parameters
which are passed through as extra_body.
OpenAIJSONSchema:
type: object
properties:
@ -5531,145 +5647,15 @@ components:
OpenaiChatCompletionRequest:
type: object
properties:
model:
type: string
params:
$ref: '#/components/schemas/OpenAIChatCompletionRequestParams'
description: >-
The identifier of the model to use. The model must be registered with
Llama Stack and available via the /models endpoint.
messages:
type: array
items:
$ref: '#/components/schemas/OpenAIMessageParam'
description: List of messages in the conversation.
frequency_penalty:
type: number
description: >-
(Optional) The penalty for repeated tokens.
function_call:
oneOf:
- type: string
- type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: (Optional) The function call to use.
functions:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: (Optional) List of functions to use.
logit_bias:
type: object
additionalProperties:
type: number
description: (Optional) The logit bias to use.
logprobs:
type: boolean
description: (Optional) The log probabilities to use.
max_completion_tokens:
type: integer
description: >-
(Optional) The maximum number of tokens to generate.
max_tokens:
type: integer
description: >-
(Optional) The maximum number of tokens to generate.
n:
type: integer
description: >-
(Optional) The number of completions to generate.
parallel_tool_calls:
type: boolean
description: >-
(Optional) Whether to parallelize tool calls.
presence_penalty:
type: number
description: >-
(Optional) The penalty for repeated tokens.
response_format:
$ref: '#/components/schemas/OpenAIResponseFormatParam'
description: (Optional) The response format to use.
seed:
type: integer
description: (Optional) The seed to use.
stop:
oneOf:
- type: string
- type: array
items:
type: string
description: (Optional) The stop tokens to use.
stream:
type: boolean
description: >-
(Optional) Whether to stream the response.
stream_options:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: (Optional) The stream options to use.
temperature:
type: number
description: (Optional) The temperature to use.
tool_choice:
oneOf:
- type: string
- type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: (Optional) The tool choice to use.
tools:
type: array
items:
type: object
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
description: (Optional) The tools to use.
top_logprobs:
type: integer
description: >-
(Optional) The top log probabilities to use.
top_p:
type: number
description: (Optional) The top p to use.
user:
type: string
description: (Optional) The user to use.
Request parameters including model, messages, and optional parameters.
Use params.get_extra_body() to extract provider-specific parameters (e.g.,
chat_template_kwargs for vLLM).
additionalProperties: false
required:
- model
- messages
- params
title: OpenaiChatCompletionRequest
OpenAIChatCompletion:
type: object
@ -5824,14 +5810,11 @@ components:
- model
- input_messages
title: OpenAICompletionWithInputMessages
OpenaiCompletionRequest:
OpenAICompletionRequestParams:
type: object
properties:
model:
type: string
description: >-
The identifier of the model to use. The model must be registered with
Llama Stack and available via the /models endpoint.
prompt:
oneOf:
- type: string
@ -5846,52 +5829,34 @@ components:
type: array
items:
type: integer
description: The prompt to generate a completion for.
best_of:
type: integer
description: >-
(Optional) The number of completions to generate.
echo:
type: boolean
description: (Optional) Whether to echo the prompt.
frequency_penalty:
type: number
description: >-
(Optional) The penalty for repeated tokens.
logit_bias:
type: object
additionalProperties:
type: number
description: (Optional) The logit bias to use.
logprobs:
type: boolean
description: (Optional) The log probabilities to use.
max_tokens:
type: integer
description: >-
(Optional) The maximum number of tokens to generate.
n:
type: integer
description: >-
(Optional) The number of completions to generate.
presence_penalty:
type: number
description: >-
(Optional) The penalty for repeated tokens.
seed:
type: integer
description: (Optional) The seed to use.
stop:
oneOf:
- type: string
- type: array
items:
type: string
description: (Optional) The stop tokens to use.
stream:
type: boolean
description: >-
(Optional) Whether to stream the response.
stream_options:
type: object
additionalProperties:
@ -5902,30 +5867,42 @@ components:
- type: string
- type: array
- type: object
description: (Optional) The stream options to use.
temperature:
type: number
description: (Optional) The temperature to use.
top_p:
type: number
description: (Optional) The top p to use.
user:
type: string
description: (Optional) The user to use.
suffix:
type: string
guided_choice:
type: array
items:
type: string
prompt_logprobs:
type: integer
suffix:
type: string
description: >-
(Optional) The suffix that should be appended to the completion.
additionalProperties: false
required:
- model
- prompt
title: OpenAICompletionRequestParams
description: >-
Request parameters for OpenAI-compatible completion endpoint.
This model uses extra="allow" to capture provider-specific parameters
(like vLLM's guided_choice) which are passed through as extra_body.
OpenaiCompletionRequest:
type: object
properties:
params:
$ref: '#/components/schemas/OpenAICompletionRequestParams'
description: >-
Request parameters including model, prompt, and optional parameters. Use
params.get_extra_body() to extract provider-specific parameters.
additionalProperties: false
required:
- params
title: OpenaiCompletionRequest
OpenAICompletion:
type: object