mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-29 18:34:20 +00:00
Merge branch 'main' into nvidia-eval-integration
This commit is contained in:
commit
43993cc29c
230 changed files with 220164 additions and 7988 deletions
665
docs/_static/llama-stack-spec.yaml
vendored
665
docs/_static/llama-stack-spec.yaml
vendored
|
|
@ -2131,6 +2131,91 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/LogEventRequest'
|
||||
required: true
|
||||
/v1/openai/v1/chat/completions:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIChatCompletion'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
description: >-
|
||||
Generate an OpenAI-compatible chat completion for the given messages using
|
||||
the specified model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenaiChatCompletionRequest'
|
||||
required: true
|
||||
/v1/openai/v1/completions:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAICompletion'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
description: >-
|
||||
Generate an OpenAI-compatible completion for the given prompt using the specified
|
||||
model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenaiCompletionRequest'
|
||||
required: true
|
||||
/v1/openai/v1/models:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIListModelsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Models
|
||||
description: ''
|
||||
parameters: []
|
||||
/v1/post-training/preference-optimize:
|
||||
post:
|
||||
responses:
|
||||
|
|
@ -5989,6 +6074,586 @@ components:
|
|||
- event
|
||||
- ttl_seconds
|
||||
title: LogEventRequest
|
||||
OpenAIAssistantMessageParam:
|
||||
type: object
|
||||
properties:
|
||||
role:
|
||||
type: string
|
||||
const: assistant
|
||||
default: assistant
|
||||
description: >-
|
||||
Must be "assistant" to identify this as the model's response
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: The content of the model's response
|
||||
name:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The name of the assistant message participant.
|
||||
tool_calls:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolCall'
|
||||
description: >-
|
||||
List of tool calls. Each tool call is a ToolCall object.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- role
|
||||
- content
|
||||
title: OpenAIAssistantMessageParam
|
||||
description: >-
|
||||
A message containing the model's (assistant) response in an OpenAI-compatible
|
||||
chat completion request.
|
||||
OpenAIDeveloperMessageParam:
|
||||
type: object
|
||||
properties:
|
||||
role:
|
||||
type: string
|
||||
const: developer
|
||||
default: developer
|
||||
description: >-
|
||||
Must be "developer" to identify this as a developer message
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: The content of the developer message
|
||||
name:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The name of the developer message participant.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- role
|
||||
- content
|
||||
title: OpenAIDeveloperMessageParam
|
||||
description: >-
|
||||
A message from the developer in an OpenAI-compatible chat completion request.
|
||||
OpenAIMessageParam:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIUserMessageParam'
|
||||
- $ref: '#/components/schemas/OpenAISystemMessageParam'
|
||||
- $ref: '#/components/schemas/OpenAIAssistantMessageParam'
|
||||
- $ref: '#/components/schemas/OpenAIToolMessageParam'
|
||||
- $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
|
||||
discriminator:
|
||||
propertyName: role
|
||||
mapping:
|
||||
user: '#/components/schemas/OpenAIUserMessageParam'
|
||||
system: '#/components/schemas/OpenAISystemMessageParam'
|
||||
assistant: '#/components/schemas/OpenAIAssistantMessageParam'
|
||||
tool: '#/components/schemas/OpenAIToolMessageParam'
|
||||
developer: '#/components/schemas/OpenAIDeveloperMessageParam'
|
||||
OpenAISystemMessageParam:
|
||||
type: object
|
||||
properties:
|
||||
role:
|
||||
type: string
|
||||
const: system
|
||||
default: system
|
||||
description: >-
|
||||
Must be "system" to identify this as a system message
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
The content of the "system prompt". If multiple system messages are provided,
|
||||
they are concatenated. The underlying Llama Stack code may also add other
|
||||
system messages (for example, for formatting tool definitions).
|
||||
name:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The name of the system message participant.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- role
|
||||
- content
|
||||
title: OpenAISystemMessageParam
|
||||
description: >-
|
||||
A system message providing instructions or context to the model.
|
||||
OpenAIToolMessageParam:
|
||||
type: object
|
||||
properties:
|
||||
role:
|
||||
type: string
|
||||
const: tool
|
||||
default: tool
|
||||
description: >-
|
||||
Must be "tool" to identify this as a tool response
|
||||
tool_call_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the tool call this response is for
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: The response content from the tool
|
||||
additionalProperties: false
|
||||
required:
|
||||
- role
|
||||
- tool_call_id
|
||||
- content
|
||||
title: OpenAIToolMessageParam
|
||||
description: >-
|
||||
A message representing the result of a tool invocation in an OpenAI-compatible
|
||||
chat completion request.
|
||||
OpenAIUserMessageParam:
|
||||
type: object
|
||||
properties:
|
||||
role:
|
||||
type: string
|
||||
const: user
|
||||
default: user
|
||||
description: >-
|
||||
Must be "user" to identify this as a user message
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
The content of the message, which can include text and other media
|
||||
name:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The name of the user message participant.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- role
|
||||
- content
|
||||
title: OpenAIUserMessageParam
|
||||
description: >-
|
||||
A message from the user in an OpenAI-compatible chat completion request.
|
||||
OpenaiChatCompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the model to use. The model must be registered with
|
||||
Llama Stack and available via the /models endpoint.
|
||||
messages:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIMessageParam'
|
||||
description: List of messages in the conversation
|
||||
frequency_penalty:
|
||||
type: number
|
||||
description: >-
|
||||
(Optional) The penalty for repeated tokens
|
||||
function_call:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: (Optional) The function call to use
|
||||
functions:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: (Optional) List of functions to use
|
||||
logit_bias:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: number
|
||||
description: (Optional) The logit bias to use
|
||||
logprobs:
|
||||
type: boolean
|
||||
description: (Optional) The log probabilities to use
|
||||
max_completion_tokens:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) The maximum number of tokens to generate
|
||||
max_tokens:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) The maximum number of tokens to generate
|
||||
n:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) The number of completions to generate
|
||||
parallel_tool_calls:
|
||||
type: boolean
|
||||
description: >-
|
||||
(Optional) Whether to parallelize tool calls
|
||||
presence_penalty:
|
||||
type: number
|
||||
description: >-
|
||||
(Optional) The penalty for repeated tokens
|
||||
response_format:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: (Optional) The response format to use
|
||||
seed:
|
||||
type: integer
|
||||
description: (Optional) The seed to use
|
||||
stop:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
type: string
|
||||
description: (Optional) The stop tokens to use
|
||||
stream:
|
||||
type: boolean
|
||||
description: >-
|
||||
(Optional) Whether to stream the response
|
||||
stream_options:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: (Optional) The stream options to use
|
||||
temperature:
|
||||
type: number
|
||||
description: (Optional) The temperature to use
|
||||
tool_choice:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: (Optional) The tool choice to use
|
||||
tools:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: (Optional) The tools to use
|
||||
top_logprobs:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) The top log probabilities to use
|
||||
top_p:
|
||||
type: number
|
||||
description: (Optional) The top p to use
|
||||
user:
|
||||
type: string
|
||||
description: (Optional) The user to use
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model
|
||||
- messages
|
||||
title: OpenaiChatCompletionRequest
|
||||
OpenAIChatCompletion:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: The ID of the chat completion
|
||||
choices:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIChoice'
|
||||
description: List of choices
|
||||
object:
|
||||
type: string
|
||||
const: chat.completion
|
||||
default: chat.completion
|
||||
description: >-
|
||||
The object type, which will be "chat.completion"
|
||||
created:
|
||||
type: integer
|
||||
description: >-
|
||||
The Unix timestamp in seconds when the chat completion was created
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The model that was used to generate the chat completion
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- choices
|
||||
- object
|
||||
- created
|
||||
- model
|
||||
title: OpenAIChatCompletion
|
||||
description: >-
|
||||
Response from an OpenAI-compatible chat completion request.
|
||||
OpenAIChoice:
|
||||
type: object
|
||||
properties:
|
||||
message:
|
||||
$ref: '#/components/schemas/OpenAIMessageParam'
|
||||
description: The message from the model
|
||||
finish_reason:
|
||||
type: string
|
||||
description: The reason the model stopped generating
|
||||
index:
|
||||
type: integer
|
||||
logprobs:
|
||||
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- message
|
||||
- finish_reason
|
||||
- index
|
||||
title: OpenAIChoice
|
||||
description: >-
|
||||
A choice from an OpenAI-compatible chat completion response.
|
||||
OpenAIChoiceLogprobs:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAITokenLogProb'
|
||||
refusal:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAITokenLogProb'
|
||||
additionalProperties: false
|
||||
title: OpenAIChoiceLogprobs
|
||||
description: >-
|
||||
The log probabilities for the tokens in the message from an OpenAI-compatible
|
||||
chat completion response.
|
||||
OpenAITokenLogProb:
|
||||
type: object
|
||||
properties:
|
||||
token:
|
||||
type: string
|
||||
bytes:
|
||||
type: array
|
||||
items:
|
||||
type: integer
|
||||
logprob:
|
||||
type: number
|
||||
top_logprobs:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAITopLogProb'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- token
|
||||
- logprob
|
||||
- top_logprobs
|
||||
title: OpenAITokenLogProb
|
||||
description: >-
|
||||
The log probability for a token from an OpenAI-compatible chat completion
|
||||
response.
|
||||
OpenAITopLogProb:
|
||||
type: object
|
||||
properties:
|
||||
token:
|
||||
type: string
|
||||
bytes:
|
||||
type: array
|
||||
items:
|
||||
type: integer
|
||||
logprob:
|
||||
type: number
|
||||
additionalProperties: false
|
||||
required:
|
||||
- token
|
||||
- logprob
|
||||
title: OpenAITopLogProb
|
||||
description: >-
|
||||
The top log probability for a token from an OpenAI-compatible chat completion
|
||||
response.
|
||||
OpenaiCompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the model to use. The model must be registered with
|
||||
Llama Stack and available via the /models endpoint.
|
||||
prompt:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
type: string
|
||||
- type: array
|
||||
items:
|
||||
type: integer
|
||||
- type: array
|
||||
items:
|
||||
type: array
|
||||
items:
|
||||
type: integer
|
||||
description: The prompt to generate a completion for
|
||||
best_of:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) The number of completions to generate
|
||||
echo:
|
||||
type: boolean
|
||||
description: (Optional) Whether to echo the prompt
|
||||
frequency_penalty:
|
||||
type: number
|
||||
description: >-
|
||||
(Optional) The penalty for repeated tokens
|
||||
logit_bias:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: number
|
||||
description: (Optional) The logit bias to use
|
||||
logprobs:
|
||||
type: boolean
|
||||
description: (Optional) The log probabilities to use
|
||||
max_tokens:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) The maximum number of tokens to generate
|
||||
n:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) The number of completions to generate
|
||||
presence_penalty:
|
||||
type: number
|
||||
description: >-
|
||||
(Optional) The penalty for repeated tokens
|
||||
seed:
|
||||
type: integer
|
||||
description: (Optional) The seed to use
|
||||
stop:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
type: string
|
||||
description: (Optional) The stop tokens to use
|
||||
stream:
|
||||
type: boolean
|
||||
description: >-
|
||||
(Optional) Whether to stream the response
|
||||
stream_options:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
description: (Optional) The stream options to use
|
||||
temperature:
|
||||
type: number
|
||||
description: (Optional) The temperature to use
|
||||
top_p:
|
||||
type: number
|
||||
description: (Optional) The top p to use
|
||||
user:
|
||||
type: string
|
||||
description: (Optional) The user to use
|
||||
guided_choice:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
prompt_logprobs:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model
|
||||
- prompt
|
||||
title: OpenaiCompletionRequest
|
||||
OpenAICompletion:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
choices:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAICompletionChoice'
|
||||
created:
|
||||
type: integer
|
||||
model:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: text_completion
|
||||
default: text_completion
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- choices
|
||||
- created
|
||||
- model
|
||||
- object
|
||||
title: OpenAICompletion
|
||||
description: >-
|
||||
Response from an OpenAI-compatible completion request.
|
||||
OpenAICompletionChoice:
|
||||
type: object
|
||||
properties:
|
||||
finish_reason:
|
||||
type: string
|
||||
text:
|
||||
type: string
|
||||
index:
|
||||
type: integer
|
||||
logprobs:
|
||||
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- finish_reason
|
||||
- text
|
||||
- index
|
||||
title: OpenAICompletionChoice
|
||||
description: >-
|
||||
A choice from an OpenAI-compatible completion response.
|
||||
OpenAIModel:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: model
|
||||
default: model
|
||||
created:
|
||||
type: integer
|
||||
owned_by:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- object
|
||||
- created
|
||||
- owned_by
|
||||
title: OpenAIModel
|
||||
description: A model from OpenAI.
|
||||
OpenAIListModelsResponse:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIModel'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- data
|
||||
title: OpenAIListModelsResponse
|
||||
DPOAlignmentConfig:
|
||||
type: object
|
||||
properties:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue