mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-01 16:24:44 +00:00
Merge branch 'main' into patch-metadata
This commit is contained in:
commit
f0a142f5a8
21 changed files with 1405 additions and 887 deletions
1202
docs/_static/llama-stack-spec.html
vendored
1202
docs/_static/llama-stack-spec.html
vendored
File diff suppressed because it is too large
Load diff
900
docs/_static/llama-stack-spec.yaml
vendored
900
docs/_static/llama-stack-spec.yaml
vendored
|
@ -827,6 +827,35 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
/v1/openai/v1/chat/completions/{completion_id}:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A OpenAICompletionWithInputMessages.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/OpenAICompletionWithInputMessages'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Inference
|
||||||
|
description: Describe a chat completion by its ID.
|
||||||
|
parameters:
|
||||||
|
- name: completion_id
|
||||||
|
in: path
|
||||||
|
description: ID of the chat completion.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
/v1/datasets/{dataset_id}:
|
/v1/datasets/{dataset_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
@ -1795,6 +1824,89 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
||||||
required: true
|
required: true
|
||||||
|
/v1/openai/v1/chat/completions:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A ListOpenAIChatCompletionResponse.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ListOpenAIChatCompletionResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Inference
|
||||||
|
description: List all chat completions.
|
||||||
|
parameters:
|
||||||
|
- name: after
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
The ID of the last chat completion to return.
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: limit
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
The maximum number of chat completions to return.
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: integer
|
||||||
|
- name: model
|
||||||
|
in: query
|
||||||
|
description: The model to filter by.
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: order
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
The order to sort the chat completions by: "asc" or "desc". Defaults to
|
||||||
|
"desc".
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Order'
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: An OpenAIChatCompletion.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/OpenAIChatCompletion'
|
||||||
|
- $ref: '#/components/schemas/OpenAIChatCompletionChunk'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Inference
|
||||||
|
description: >-
|
||||||
|
Generate an OpenAI-compatible chat completion for the given messages using
|
||||||
|
the specified model.
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/OpenaiChatCompletionRequest'
|
||||||
|
required: true
|
||||||
/v1/datasets:
|
/v1/datasets:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
@ -2261,39 +2373,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/LogEventRequest'
|
$ref: '#/components/schemas/LogEventRequest'
|
||||||
required: true
|
required: true
|
||||||
/v1/openai/v1/chat/completions:
|
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: An OpenAIChatCompletion.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/OpenAIChatCompletion'
|
|
||||||
- $ref: '#/components/schemas/OpenAIChatCompletionChunk'
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Inference
|
|
||||||
description: >-
|
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
|
||||||
the specified model.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/OpenaiChatCompletionRequest'
|
|
||||||
required: true
|
|
||||||
/v1/openai/v1/completions:
|
/v1/openai/v1/completions:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
@ -5479,6 +5558,369 @@ components:
|
||||||
- scoring_functions
|
- scoring_functions
|
||||||
- metadata
|
- metadata
|
||||||
title: Benchmark
|
title: Benchmark
|
||||||
|
OpenAIAssistantMessageParam:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
const: assistant
|
||||||
|
default: assistant
|
||||||
|
description: >-
|
||||||
|
Must be "assistant" to identify this as the model's response
|
||||||
|
content:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||||
|
description: The content of the model's response
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The name of the assistant message participant.
|
||||||
|
tool_calls:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionToolCall'
|
||||||
|
description: >-
|
||||||
|
List of tool calls. Each tool call is an OpenAIChatCompletionToolCall
|
||||||
|
object.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- role
|
||||||
|
title: OpenAIAssistantMessageParam
|
||||||
|
description: >-
|
||||||
|
A message containing the model's (assistant) response in an OpenAI-compatible
|
||||||
|
chat completion request.
|
||||||
|
"OpenAIChatCompletionContentPartImageParam":
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: image_url
|
||||||
|
default: image_url
|
||||||
|
image_url:
|
||||||
|
$ref: '#/components/schemas/OpenAIImageURL'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- image_url
|
||||||
|
title: >-
|
||||||
|
OpenAIChatCompletionContentPartImageParam
|
||||||
|
OpenAIChatCompletionContentPartParam:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
||||||
|
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
||||||
|
image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
||||||
|
OpenAIChatCompletionContentPartTextParam:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: text
|
||||||
|
default: text
|
||||||
|
text:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- text
|
||||||
|
title: OpenAIChatCompletionContentPartTextParam
|
||||||
|
OpenAIChatCompletionToolCall:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
index:
|
||||||
|
type: integer
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: function
|
||||||
|
default: function
|
||||||
|
function:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
title: OpenAIChatCompletionToolCall
|
||||||
|
OpenAIChatCompletionToolCallFunction:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
arguments:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: OpenAIChatCompletionToolCallFunction
|
||||||
|
OpenAIChoice:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
message:
|
||||||
|
$ref: '#/components/schemas/OpenAIMessageParam'
|
||||||
|
description: The message from the model
|
||||||
|
finish_reason:
|
||||||
|
type: string
|
||||||
|
description: The reason the model stopped generating
|
||||||
|
index:
|
||||||
|
type: integer
|
||||||
|
description: The index of the choice
|
||||||
|
logprobs:
|
||||||
|
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
|
||||||
|
description: >-
|
||||||
|
(Optional) The log probabilities for the tokens in the message
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- message
|
||||||
|
- finish_reason
|
||||||
|
- index
|
||||||
|
title: OpenAIChoice
|
||||||
|
description: >-
|
||||||
|
A choice from an OpenAI-compatible chat completion response.
|
||||||
|
OpenAIChoiceLogprobs:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
content:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAITokenLogProb'
|
||||||
|
description: >-
|
||||||
|
(Optional) The log probabilities for the tokens in the message
|
||||||
|
refusal:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAITokenLogProb'
|
||||||
|
description: >-
|
||||||
|
(Optional) The log probabilities for the tokens in the message
|
||||||
|
additionalProperties: false
|
||||||
|
title: OpenAIChoiceLogprobs
|
||||||
|
description: >-
|
||||||
|
The log probabilities for the tokens in the message from an OpenAI-compatible
|
||||||
|
chat completion response.
|
||||||
|
OpenAIDeveloperMessageParam:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
const: developer
|
||||||
|
default: developer
|
||||||
|
description: >-
|
||||||
|
Must be "developer" to identify this as a developer message
|
||||||
|
content:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||||
|
description: The content of the developer message
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The name of the developer message participant.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- role
|
||||||
|
- content
|
||||||
|
title: OpenAIDeveloperMessageParam
|
||||||
|
description: >-
|
||||||
|
A message from the developer in an OpenAI-compatible chat completion request.
|
||||||
|
OpenAIImageURL:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
url:
|
||||||
|
type: string
|
||||||
|
detail:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- url
|
||||||
|
title: OpenAIImageURL
|
||||||
|
OpenAIMessageParam:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/OpenAIUserMessageParam'
|
||||||
|
- $ref: '#/components/schemas/OpenAISystemMessageParam'
|
||||||
|
- $ref: '#/components/schemas/OpenAIAssistantMessageParam'
|
||||||
|
- $ref: '#/components/schemas/OpenAIToolMessageParam'
|
||||||
|
- $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
|
||||||
|
discriminator:
|
||||||
|
propertyName: role
|
||||||
|
mapping:
|
||||||
|
user: '#/components/schemas/OpenAIUserMessageParam'
|
||||||
|
system: '#/components/schemas/OpenAISystemMessageParam'
|
||||||
|
assistant: '#/components/schemas/OpenAIAssistantMessageParam'
|
||||||
|
tool: '#/components/schemas/OpenAIToolMessageParam'
|
||||||
|
developer: '#/components/schemas/OpenAIDeveloperMessageParam'
|
||||||
|
OpenAISystemMessageParam:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
const: system
|
||||||
|
default: system
|
||||||
|
description: >-
|
||||||
|
Must be "system" to identify this as a system message
|
||||||
|
content:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||||
|
description: >-
|
||||||
|
The content of the "system prompt". If multiple system messages are provided,
|
||||||
|
they are concatenated. The underlying Llama Stack code may also add other
|
||||||
|
system messages (for example, for formatting tool definitions).
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The name of the system message participant.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- role
|
||||||
|
- content
|
||||||
|
title: OpenAISystemMessageParam
|
||||||
|
description: >-
|
||||||
|
A system message providing instructions or context to the model.
|
||||||
|
OpenAITokenLogProb:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
token:
|
||||||
|
type: string
|
||||||
|
bytes:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: integer
|
||||||
|
logprob:
|
||||||
|
type: number
|
||||||
|
top_logprobs:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAITopLogProb'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- token
|
||||||
|
- logprob
|
||||||
|
- top_logprobs
|
||||||
|
title: OpenAITokenLogProb
|
||||||
|
description: >-
|
||||||
|
The log probability for a token from an OpenAI-compatible chat completion
|
||||||
|
response.
|
||||||
|
OpenAIToolMessageParam:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
const: tool
|
||||||
|
default: tool
|
||||||
|
description: >-
|
||||||
|
Must be "tool" to identify this as a tool response
|
||||||
|
tool_call_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Unique identifier for the tool call this response is for
|
||||||
|
content:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||||
|
description: The response content from the tool
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- role
|
||||||
|
- tool_call_id
|
||||||
|
- content
|
||||||
|
title: OpenAIToolMessageParam
|
||||||
|
description: >-
|
||||||
|
A message representing the result of a tool invocation in an OpenAI-compatible
|
||||||
|
chat completion request.
|
||||||
|
OpenAITopLogProb:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
token:
|
||||||
|
type: string
|
||||||
|
bytes:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: integer
|
||||||
|
logprob:
|
||||||
|
type: number
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- token
|
||||||
|
- logprob
|
||||||
|
title: OpenAITopLogProb
|
||||||
|
description: >-
|
||||||
|
The top log probability for a token from an OpenAI-compatible chat completion
|
||||||
|
response.
|
||||||
|
OpenAIUserMessageParam:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
const: user
|
||||||
|
default: user
|
||||||
|
description: >-
|
||||||
|
Must be "user" to identify this as a user message
|
||||||
|
content:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||||
|
description: >-
|
||||||
|
The content of the message, which can include text and other media
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The name of the user message participant.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- role
|
||||||
|
- content
|
||||||
|
title: OpenAIUserMessageParam
|
||||||
|
description: >-
|
||||||
|
A message from the user in an OpenAI-compatible chat completion request.
|
||||||
|
OpenAICompletionWithInputMessages:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
description: The ID of the chat completion
|
||||||
|
choices:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChoice'
|
||||||
|
description: List of choices
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: chat.completion
|
||||||
|
default: chat.completion
|
||||||
|
description: >-
|
||||||
|
The object type, which will be "chat.completion"
|
||||||
|
created:
|
||||||
|
type: integer
|
||||||
|
description: >-
|
||||||
|
The Unix timestamp in seconds when the chat completion was created
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The model that was used to generate the chat completion
|
||||||
|
input_messages:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIMessageParam'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- choices
|
||||||
|
- object
|
||||||
|
- created
|
||||||
|
- model
|
||||||
|
- input_messages
|
||||||
|
title: OpenAICompletionWithInputMessages
|
||||||
DataSource:
|
DataSource:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/URIDataSource'
|
- $ref: '#/components/schemas/URIDataSource'
|
||||||
|
@ -6497,6 +6939,73 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: ListBenchmarksResponse
|
title: ListBenchmarksResponse
|
||||||
|
Order:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- asc
|
||||||
|
- desc
|
||||||
|
title: Order
|
||||||
|
ListOpenAIChatCompletionResponse:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
description: The ID of the chat completion
|
||||||
|
choices:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChoice'
|
||||||
|
description: List of choices
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: chat.completion
|
||||||
|
default: chat.completion
|
||||||
|
description: >-
|
||||||
|
The object type, which will be "chat.completion"
|
||||||
|
created:
|
||||||
|
type: integer
|
||||||
|
description: >-
|
||||||
|
The Unix timestamp in seconds when the chat completion was created
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The model that was used to generate the chat completion
|
||||||
|
input_messages:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIMessageParam'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- choices
|
||||||
|
- object
|
||||||
|
- created
|
||||||
|
- model
|
||||||
|
- input_messages
|
||||||
|
title: OpenAICompletionWithInputMessages
|
||||||
|
has_more:
|
||||||
|
type: boolean
|
||||||
|
first_id:
|
||||||
|
type: string
|
||||||
|
last_id:
|
||||||
|
type: string
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: list
|
||||||
|
default: list
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- data
|
||||||
|
- has_more
|
||||||
|
- first_id
|
||||||
|
- last_id
|
||||||
|
- object
|
||||||
|
title: ListOpenAIChatCompletionResponse
|
||||||
ListDatasetsResponse:
|
ListDatasetsResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -6835,142 +7344,6 @@ components:
|
||||||
- event
|
- event
|
||||||
- ttl_seconds
|
- ttl_seconds
|
||||||
title: LogEventRequest
|
title: LogEventRequest
|
||||||
OpenAIAssistantMessageParam:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
role:
|
|
||||||
type: string
|
|
||||||
const: assistant
|
|
||||||
default: assistant
|
|
||||||
description: >-
|
|
||||||
Must be "assistant" to identify this as the model's response
|
|
||||||
content:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
|
||||||
description: The content of the model's response
|
|
||||||
name:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
(Optional) The name of the assistant message participant.
|
|
||||||
tool_calls:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionToolCall'
|
|
||||||
description: >-
|
|
||||||
List of tool calls. Each tool call is an OpenAIChatCompletionToolCall
|
|
||||||
object.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- role
|
|
||||||
title: OpenAIAssistantMessageParam
|
|
||||||
description: >-
|
|
||||||
A message containing the model's (assistant) response in an OpenAI-compatible
|
|
||||||
chat completion request.
|
|
||||||
"OpenAIChatCompletionContentPartImageParam":
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
const: image_url
|
|
||||||
default: image_url
|
|
||||||
image_url:
|
|
||||||
$ref: '#/components/schemas/OpenAIImageURL'
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
- image_url
|
|
||||||
title: >-
|
|
||||||
OpenAIChatCompletionContentPartImageParam
|
|
||||||
OpenAIChatCompletionContentPartParam:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
|
||||||
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
|
||||||
discriminator:
|
|
||||||
propertyName: type
|
|
||||||
mapping:
|
|
||||||
text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
|
||||||
image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
|
||||||
OpenAIChatCompletionContentPartTextParam:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
const: text
|
|
||||||
default: text
|
|
||||||
text:
|
|
||||||
type: string
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
- text
|
|
||||||
title: OpenAIChatCompletionContentPartTextParam
|
|
||||||
OpenAIChatCompletionToolCall:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
index:
|
|
||||||
type: integer
|
|
||||||
id:
|
|
||||||
type: string
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
const: function
|
|
||||||
default: function
|
|
||||||
function:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
title: OpenAIChatCompletionToolCall
|
|
||||||
OpenAIChatCompletionToolCallFunction:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
name:
|
|
||||||
type: string
|
|
||||||
arguments:
|
|
||||||
type: string
|
|
||||||
additionalProperties: false
|
|
||||||
title: OpenAIChatCompletionToolCallFunction
|
|
||||||
OpenAIDeveloperMessageParam:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
role:
|
|
||||||
type: string
|
|
||||||
const: developer
|
|
||||||
default: developer
|
|
||||||
description: >-
|
|
||||||
Must be "developer" to identify this as a developer message
|
|
||||||
content:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
|
||||||
description: The content of the developer message
|
|
||||||
name:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
(Optional) The name of the developer message participant.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- role
|
|
||||||
- content
|
|
||||||
title: OpenAIDeveloperMessageParam
|
|
||||||
description: >-
|
|
||||||
A message from the developer in an OpenAI-compatible chat completion request.
|
|
||||||
OpenAIImageURL:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
url:
|
|
||||||
type: string
|
|
||||||
detail:
|
|
||||||
type: string
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- url
|
|
||||||
title: OpenAIImageURL
|
|
||||||
OpenAIJSONSchema:
|
OpenAIJSONSchema:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -6994,21 +7367,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- name
|
- name
|
||||||
title: OpenAIJSONSchema
|
title: OpenAIJSONSchema
|
||||||
OpenAIMessageParam:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/OpenAIUserMessageParam'
|
|
||||||
- $ref: '#/components/schemas/OpenAISystemMessageParam'
|
|
||||||
- $ref: '#/components/schemas/OpenAIAssistantMessageParam'
|
|
||||||
- $ref: '#/components/schemas/OpenAIToolMessageParam'
|
|
||||||
- $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
|
|
||||||
discriminator:
|
|
||||||
propertyName: role
|
|
||||||
mapping:
|
|
||||||
user: '#/components/schemas/OpenAIUserMessageParam'
|
|
||||||
system: '#/components/schemas/OpenAISystemMessageParam'
|
|
||||||
assistant: '#/components/schemas/OpenAIAssistantMessageParam'
|
|
||||||
tool: '#/components/schemas/OpenAIToolMessageParam'
|
|
||||||
developer: '#/components/schemas/OpenAIDeveloperMessageParam'
|
|
||||||
OpenAIResponseFormatJSONObject:
|
OpenAIResponseFormatJSONObject:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -7056,93 +7414,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
title: OpenAIResponseFormatText
|
title: OpenAIResponseFormatText
|
||||||
OpenAISystemMessageParam:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
role:
|
|
||||||
type: string
|
|
||||||
const: system
|
|
||||||
default: system
|
|
||||||
description: >-
|
|
||||||
Must be "system" to identify this as a system message
|
|
||||||
content:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
|
||||||
description: >-
|
|
||||||
The content of the "system prompt". If multiple system messages are provided,
|
|
||||||
they are concatenated. The underlying Llama Stack code may also add other
|
|
||||||
system messages (for example, for formatting tool definitions).
|
|
||||||
name:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
(Optional) The name of the system message participant.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- role
|
|
||||||
- content
|
|
||||||
title: OpenAISystemMessageParam
|
|
||||||
description: >-
|
|
||||||
A system message providing instructions or context to the model.
|
|
||||||
OpenAIToolMessageParam:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
role:
|
|
||||||
type: string
|
|
||||||
const: tool
|
|
||||||
default: tool
|
|
||||||
description: >-
|
|
||||||
Must be "tool" to identify this as a tool response
|
|
||||||
tool_call_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
Unique identifier for the tool call this response is for
|
|
||||||
content:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
|
||||||
description: The response content from the tool
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- role
|
|
||||||
- tool_call_id
|
|
||||||
- content
|
|
||||||
title: OpenAIToolMessageParam
|
|
||||||
description: >-
|
|
||||||
A message representing the result of a tool invocation in an OpenAI-compatible
|
|
||||||
chat completion request.
|
|
||||||
OpenAIUserMessageParam:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
role:
|
|
||||||
type: string
|
|
||||||
const: user
|
|
||||||
default: user
|
|
||||||
description: >-
|
|
||||||
Must be "user" to identify this as a user message
|
|
||||||
content:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
|
||||||
description: >-
|
|
||||||
The content of the message, which can include text and other media
|
|
||||||
name:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
(Optional) The name of the user message participant.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- role
|
|
||||||
- content
|
|
||||||
title: OpenAIUserMessageParam
|
|
||||||
description: >-
|
|
||||||
A message from the user in an OpenAI-compatible chat completion request.
|
|
||||||
OpenaiChatCompletionRequest:
|
OpenaiChatCompletionRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -7356,30 +7627,6 @@ components:
|
||||||
title: OpenAIChatCompletionChunk
|
title: OpenAIChatCompletionChunk
|
||||||
description: >-
|
description: >-
|
||||||
Chunk from a streaming response to an OpenAI-compatible chat completion request.
|
Chunk from a streaming response to an OpenAI-compatible chat completion request.
|
||||||
OpenAIChoice:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
message:
|
|
||||||
$ref: '#/components/schemas/OpenAIMessageParam'
|
|
||||||
description: The message from the model
|
|
||||||
finish_reason:
|
|
||||||
type: string
|
|
||||||
description: The reason the model stopped generating
|
|
||||||
index:
|
|
||||||
type: integer
|
|
||||||
description: The index of the choice
|
|
||||||
logprobs:
|
|
||||||
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
|
|
||||||
description: >-
|
|
||||||
(Optional) The log probabilities for the tokens in the message
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- message
|
|
||||||
- finish_reason
|
|
||||||
- index
|
|
||||||
title: OpenAIChoice
|
|
||||||
description: >-
|
|
||||||
A choice from an OpenAI-compatible chat completion response.
|
|
||||||
OpenAIChoiceDelta:
|
OpenAIChoiceDelta:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -7401,26 +7648,6 @@ components:
|
||||||
title: OpenAIChoiceDelta
|
title: OpenAIChoiceDelta
|
||||||
description: >-
|
description: >-
|
||||||
A delta from an OpenAI-compatible chat completion streaming response.
|
A delta from an OpenAI-compatible chat completion streaming response.
|
||||||
OpenAIChoiceLogprobs:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
content:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAITokenLogProb'
|
|
||||||
description: >-
|
|
||||||
(Optional) The log probabilities for the tokens in the message
|
|
||||||
refusal:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAITokenLogProb'
|
|
||||||
description: >-
|
|
||||||
(Optional) The log probabilities for the tokens in the message
|
|
||||||
additionalProperties: false
|
|
||||||
title: OpenAIChoiceLogprobs
|
|
||||||
description: >-
|
|
||||||
The log probabilities for the tokens in the message from an OpenAI-compatible
|
|
||||||
chat completion response.
|
|
||||||
OpenAIChunkChoice:
|
OpenAIChunkChoice:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -7445,49 +7672,6 @@ components:
|
||||||
title: OpenAIChunkChoice
|
title: OpenAIChunkChoice
|
||||||
description: >-
|
description: >-
|
||||||
A chunk choice from an OpenAI-compatible chat completion streaming response.
|
A chunk choice from an OpenAI-compatible chat completion streaming response.
|
||||||
OpenAITokenLogProb:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
token:
|
|
||||||
type: string
|
|
||||||
bytes:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: integer
|
|
||||||
logprob:
|
|
||||||
type: number
|
|
||||||
top_logprobs:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAITopLogProb'
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- token
|
|
||||||
- logprob
|
|
||||||
- top_logprobs
|
|
||||||
title: OpenAITokenLogProb
|
|
||||||
description: >-
|
|
||||||
The log probability for a token from an OpenAI-compatible chat completion
|
|
||||||
response.
|
|
||||||
OpenAITopLogProb:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
token:
|
|
||||||
type: string
|
|
||||||
bytes:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: integer
|
|
||||||
logprob:
|
|
||||||
type: number
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- token
|
|
||||||
- logprob
|
|
||||||
title: OpenAITopLogProb
|
|
||||||
description: >-
|
|
||||||
The top log probability for a token from an OpenAI-compatible chat completion
|
|
||||||
response.
|
|
||||||
OpenaiCompletionRequest:
|
OpenaiCompletionRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
@ -759,7 +759,7 @@ class Generator:
|
||||||
)
|
)
|
||||||
|
|
||||||
return Operation(
|
return Operation(
|
||||||
tags=[op.defining_class.__name__],
|
tags=[getattr(op.defining_class, "API_NAMESPACE", op.defining_class.__name__)],
|
||||||
summary=None,
|
summary=None,
|
||||||
# summary=doc_string.short_description,
|
# summary=doc_string.short_description,
|
||||||
description=description,
|
description=description,
|
||||||
|
@ -805,6 +805,8 @@ class Generator:
|
||||||
operation_tags: List[Tag] = []
|
operation_tags: List[Tag] = []
|
||||||
for cls in endpoint_classes:
|
for cls in endpoint_classes:
|
||||||
doc_string = parse_type(cls)
|
doc_string = parse_type(cls)
|
||||||
|
if hasattr(cls, "API_NAMESPACE") and cls.API_NAMESPACE != cls.__name__:
|
||||||
|
continue
|
||||||
operation_tags.append(
|
operation_tags.append(
|
||||||
Tag(
|
Tag(
|
||||||
name=cls.__name__,
|
name=cls.__name__,
|
||||||
|
|
|
@ -820,15 +820,32 @@ class BatchChatCompletionResponse(BaseModel):
|
||||||
batch: list[ChatCompletionResponse]
|
batch: list[ChatCompletionResponse]
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAICompletionWithInputMessages(OpenAIChatCompletion):
|
||||||
|
input_messages: list[OpenAIMessageParam]
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ListOpenAIChatCompletionResponse(BaseModel):
|
||||||
|
data: list[OpenAICompletionWithInputMessages]
|
||||||
|
has_more: bool
|
||||||
|
first_id: str
|
||||||
|
last_id: str
|
||||||
|
object: Literal["list"] = "list"
|
||||||
|
|
||||||
|
|
||||||
|
class Order(Enum):
|
||||||
|
asc = "asc"
|
||||||
|
desc = "desc"
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@trace_protocol
|
@trace_protocol
|
||||||
class Inference(Protocol):
|
class InferenceProvider(Protocol):
|
||||||
"""Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
|
||||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
|
||||||
- Embedding models: these models generate embeddings to be used for semantic search.
|
|
||||||
"""
|
"""
|
||||||
|
This protocol defines the interface that should be implemented by all inference providers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
API_NAMESPACE: str = "Inference"
|
||||||
|
|
||||||
model_store: ModelStore | None = None
|
model_store: ModelStore | None = None
|
||||||
|
|
||||||
|
@ -1062,3 +1079,39 @@ class Inference(Protocol):
|
||||||
:returns: An OpenAIChatCompletion.
|
:returns: An OpenAIChatCompletion.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class Inference(InferenceProvider):
|
||||||
|
"""Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||||
|
|
||||||
|
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||||
|
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||||
|
- Embedding models: these models generate embeddings to be used for semantic search.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/chat/completions", method="GET")
|
||||||
|
async def list_chat_completions(
|
||||||
|
self,
|
||||||
|
after: str | None = None,
|
||||||
|
limit: int | None = 20,
|
||||||
|
model: str | None = None,
|
||||||
|
order: Order | None = Order.desc,
|
||||||
|
) -> ListOpenAIChatCompletionResponse:
|
||||||
|
"""List all chat completions.
|
||||||
|
|
||||||
|
:param after: The ID of the last chat completion to return.
|
||||||
|
:param limit: The maximum number of chat completions to return.
|
||||||
|
:param model: The model to filter by.
|
||||||
|
:param order: The order to sort the chat completions by: "asc" or "desc". Defaults to "desc".
|
||||||
|
:returns: A ListOpenAIChatCompletionResponse.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError("List chat completions is not implemented")
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET")
|
||||||
|
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
||||||
|
"""Describe a chat completion by its ID.
|
||||||
|
|
||||||
|
:param completion_id: ID of the chat completion.
|
||||||
|
:returns: A OpenAICompletionWithInputMessages.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError("Get chat completion is not implemented")
|
||||||
|
|
|
@ -12,6 +12,7 @@ import shutil
|
||||||
import sys
|
import sys
|
||||||
import textwrap
|
import textwrap
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
from importlib.abc import Traversable
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
@ -250,11 +251,10 @@ def run_stack_build_command(args: argparse.Namespace) -> None:
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
if args.run:
|
if args.run:
|
||||||
run_config = Path(run_config)
|
|
||||||
config_dict = yaml.safe_load(run_config.read_text())
|
config_dict = yaml.safe_load(run_config.read_text())
|
||||||
config = parse_and_maybe_upgrade_config(config_dict)
|
config = parse_and_maybe_upgrade_config(config_dict)
|
||||||
if not os.path.exists(str(config.external_providers_dir)):
|
if not os.path.exists(config.external_providers_dir):
|
||||||
os.makedirs(str(config.external_providers_dir), exist_ok=True)
|
os.makedirs(config.external_providers_dir, exist_ok=True)
|
||||||
run_args = formulate_run_args(args.image_type, args.image_name, config, args.template)
|
run_args = formulate_run_args(args.image_type, args.image_name, config, args.template)
|
||||||
run_args.extend([str(os.getenv("LLAMA_STACK_PORT", 8321)), "--config", run_config])
|
run_args.extend([str(os.getenv("LLAMA_STACK_PORT", 8321)), "--config", run_config])
|
||||||
run_command(run_args)
|
run_command(run_args)
|
||||||
|
@ -264,7 +264,7 @@ def _generate_run_config(
|
||||||
build_config: BuildConfig,
|
build_config: BuildConfig,
|
||||||
build_dir: Path,
|
build_dir: Path,
|
||||||
image_name: str,
|
image_name: str,
|
||||||
) -> str:
|
) -> Path:
|
||||||
"""
|
"""
|
||||||
Generate a run.yaml template file for user to edit from a build.yaml file
|
Generate a run.yaml template file for user to edit from a build.yaml file
|
||||||
"""
|
"""
|
||||||
|
@ -343,7 +343,7 @@ def _run_stack_build_command_from_build_config(
|
||||||
image_name: str | None = None,
|
image_name: str | None = None,
|
||||||
template_name: str | None = None,
|
template_name: str | None = None,
|
||||||
config_path: str | None = None,
|
config_path: str | None = None,
|
||||||
) -> str:
|
) -> Path | Traversable:
|
||||||
image_name = image_name or build_config.image_name
|
image_name = image_name or build_config.image_name
|
||||||
if build_config.image_type == LlamaStackImageType.CONTAINER.value:
|
if build_config.image_type == LlamaStackImageType.CONTAINER.value:
|
||||||
if template_name:
|
if template_name:
|
||||||
|
|
|
@ -340,8 +340,17 @@ class BuildConfig(BaseModel):
|
||||||
default=None,
|
default=None,
|
||||||
description="Name of the distribution to build",
|
description="Name of the distribution to build",
|
||||||
)
|
)
|
||||||
external_providers_dir: str | None = Field(
|
external_providers_dir: Path | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. "
|
description="Path to directory containing external provider implementations. The providers packages will be resolved from this directory. "
|
||||||
"pip_packages MUST contain the provider package name.",
|
"pip_packages MUST contain the provider package name.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@field_validator("external_providers_dir")
|
||||||
|
@classmethod
|
||||||
|
def validate_external_providers_dir(cls, v):
|
||||||
|
if v is None:
|
||||||
|
return None
|
||||||
|
if isinstance(v, str):
|
||||||
|
return Path(v)
|
||||||
|
return v
|
||||||
|
|
|
@ -226,6 +226,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
||||||
distribution_spec=DistributionSpec(
|
distribution_spec=DistributionSpec(
|
||||||
providers=provider_types,
|
providers=provider_types,
|
||||||
),
|
),
|
||||||
|
external_providers_dir=self.config.external_providers_dir,
|
||||||
)
|
)
|
||||||
print_pip_install_help(build_config)
|
print_pip_install_help(build_config)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -13,7 +13,7 @@ from llama_stack.apis.datasetio import DatasetIO
|
||||||
from llama_stack.apis.datasets import Datasets
|
from llama_stack.apis.datasets import Datasets
|
||||||
from llama_stack.apis.eval import Eval
|
from llama_stack.apis.eval import Eval
|
||||||
from llama_stack.apis.files import Files
|
from llama_stack.apis.files import Files
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import Inference, InferenceProvider
|
||||||
from llama_stack.apis.inspect import Inspect
|
from llama_stack.apis.inspect import Inspect
|
||||||
from llama_stack.apis.models import Models
|
from llama_stack.apis.models import Models
|
||||||
from llama_stack.apis.post_training import PostTraining
|
from llama_stack.apis.post_training import PostTraining
|
||||||
|
@ -83,6 +83,13 @@ def api_protocol_map() -> dict[Api, Any]:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def api_protocol_map_for_compliance_check() -> dict[Api, Any]:
|
||||||
|
return {
|
||||||
|
**api_protocol_map(),
|
||||||
|
Api.inference: InferenceProvider,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def additional_protocols_map() -> dict[Api, Any]:
|
def additional_protocols_map() -> dict[Api, Any]:
|
||||||
return {
|
return {
|
||||||
Api.inference: (ModelsProtocolPrivate, Models, Api.models),
|
Api.inference: (ModelsProtocolPrivate, Models, Api.models),
|
||||||
|
@ -302,9 +309,6 @@ async def instantiate_provider(
|
||||||
inner_impls: dict[str, Any],
|
inner_impls: dict[str, Any],
|
||||||
dist_registry: DistributionRegistry,
|
dist_registry: DistributionRegistry,
|
||||||
):
|
):
|
||||||
protocols = api_protocol_map()
|
|
||||||
additional_protocols = additional_protocols_map()
|
|
||||||
|
|
||||||
provider_spec = provider.spec
|
provider_spec = provider.spec
|
||||||
if not hasattr(provider_spec, "module"):
|
if not hasattr(provider_spec, "module"):
|
||||||
raise AttributeError(f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute")
|
raise AttributeError(f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute")
|
||||||
|
@ -342,6 +346,8 @@ async def instantiate_provider(
|
||||||
impl.__provider_spec__ = provider_spec
|
impl.__provider_spec__ = provider_spec
|
||||||
impl.__provider_config__ = config
|
impl.__provider_config__ = config
|
||||||
|
|
||||||
|
protocols = api_protocol_map_for_compliance_check()
|
||||||
|
additional_protocols = additional_protocols_map()
|
||||||
# TODO: check compliance for special tool groups
|
# TODO: check compliance for special tool groups
|
||||||
# the impl should be for Api.tool_runtime, the name should be the special tool group, the protocol should be the special tool group protocol
|
# the impl should be for Api.tool_runtime, the name should be the special tool group, the protocol should be the special tool group protocol
|
||||||
check_protocol_compliance(impl, protocols[provider_spec.api])
|
check_protocol_compliance(impl, protocols[provider_spec.api])
|
||||||
|
|
|
@ -280,7 +280,18 @@ class TracingMiddleware:
|
||||||
logger.debug(f"No matching endpoint found for path: {path}, falling back to FastAPI")
|
logger.debug(f"No matching endpoint found for path: {path}, falling back to FastAPI")
|
||||||
return await self.app(scope, receive, send)
|
return await self.app(scope, receive, send)
|
||||||
|
|
||||||
trace_context = await start_trace(trace_path, {"__location__": "server", "raw_path": path})
|
trace_attributes = {"__location__": "server", "raw_path": path}
|
||||||
|
|
||||||
|
# Extract W3C trace context headers and store as trace attributes
|
||||||
|
headers = dict(scope.get("headers", []))
|
||||||
|
traceparent = headers.get(b"traceparent", b"").decode()
|
||||||
|
if traceparent:
|
||||||
|
trace_attributes["traceparent"] = traceparent
|
||||||
|
tracestate = headers.get(b"tracestate", b"").decode()
|
||||||
|
if tracestate:
|
||||||
|
trace_attributes["tracestate"] = tracestate
|
||||||
|
|
||||||
|
trace_context = await start_trace(trace_path, trace_attributes)
|
||||||
|
|
||||||
async def send_with_trace_id(message):
|
async def send_with_trace_id(message):
|
||||||
if message["type"] == "http.response.start":
|
if message["type"] == "http.response.start":
|
||||||
|
@ -370,14 +381,6 @@ def main(args: argparse.Namespace | None = None):
|
||||||
if args is None:
|
if args is None:
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Check for deprecated argument usage
|
|
||||||
if "--config" in sys.argv:
|
|
||||||
warnings.warn(
|
|
||||||
"The '--config' argument is deprecated and will be removed in a future version. Use '--config' instead.",
|
|
||||||
DeprecationWarning,
|
|
||||||
stacklevel=2,
|
|
||||||
)
|
|
||||||
|
|
||||||
log_line = ""
|
log_line = ""
|
||||||
if args.config:
|
if args.config:
|
||||||
# if the user provided a config file, use it, even if template was specified
|
# if the user provided a config file, use it, even if template was specified
|
||||||
|
|
|
@ -28,7 +28,7 @@ from llama_stack.apis.inference import (
|
||||||
CompletionRequest,
|
CompletionRequest,
|
||||||
CompletionResponse,
|
CompletionResponse,
|
||||||
CompletionResponseStreamChunk,
|
CompletionResponseStreamChunk,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
InterleavedContent,
|
InterleavedContent,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
@ -86,7 +86,7 @@ class MetaReferenceInferenceImpl(
|
||||||
OpenAICompletionToLlamaStackMixin,
|
OpenAICompletionToLlamaStackMixin,
|
||||||
OpenAIChatCompletionToLlamaStackMixin,
|
OpenAIChatCompletionToLlamaStackMixin,
|
||||||
SentenceTransformerEmbeddingMixin,
|
SentenceTransformerEmbeddingMixin,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
ModelsProtocolPrivate,
|
ModelsProtocolPrivate,
|
||||||
):
|
):
|
||||||
def __init__(self, config: MetaReferenceInferenceConfig) -> None:
|
def __init__(self, config: MetaReferenceInferenceConfig) -> None:
|
||||||
|
|
|
@ -9,7 +9,7 @@ from collections.abc import AsyncGenerator
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
CompletionResponse,
|
CompletionResponse,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
InterleavedContent,
|
InterleavedContent,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
@ -38,7 +38,7 @@ class SentenceTransformersInferenceImpl(
|
||||||
OpenAIChatCompletionToLlamaStackMixin,
|
OpenAIChatCompletionToLlamaStackMixin,
|
||||||
OpenAICompletionToLlamaStackMixin,
|
OpenAICompletionToLlamaStackMixin,
|
||||||
SentenceTransformerEmbeddingMixin,
|
SentenceTransformerEmbeddingMixin,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
ModelsProtocolPrivate,
|
ModelsProtocolPrivate,
|
||||||
):
|
):
|
||||||
def __init__(self, config: SentenceTransformersInferenceConfig) -> None:
|
def __init__(self, config: SentenceTransformersInferenceConfig) -> None:
|
||||||
|
|
|
@ -16,6 +16,7 @@ from opentelemetry.sdk.resources import Resource
|
||||||
from opentelemetry.sdk.trace import TracerProvider
|
from opentelemetry.sdk.trace import TracerProvider
|
||||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
||||||
from opentelemetry.semconv.resource import ResourceAttributes
|
from opentelemetry.semconv.resource import ResourceAttributes
|
||||||
|
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
||||||
|
|
||||||
from llama_stack.apis.telemetry import (
|
from llama_stack.apis.telemetry import (
|
||||||
Event,
|
Event,
|
||||||
|
@ -44,6 +45,7 @@ from llama_stack.providers.inline.telemetry.meta_reference.sqlite_span_processor
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.telemetry.dataset_mixin import TelemetryDatasetMixin
|
from llama_stack.providers.utils.telemetry.dataset_mixin import TelemetryDatasetMixin
|
||||||
from llama_stack.providers.utils.telemetry.sqlite_trace_store import SQLiteTraceStore
|
from llama_stack.providers.utils.telemetry.sqlite_trace_store import SQLiteTraceStore
|
||||||
|
from llama_stack.providers.utils.telemetry.tracing import ROOT_SPAN_MARKERS
|
||||||
|
|
||||||
from .config import TelemetryConfig, TelemetrySink
|
from .config import TelemetryConfig, TelemetrySink
|
||||||
|
|
||||||
|
@ -206,6 +208,15 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
|
||||||
event.attributes = {}
|
event.attributes = {}
|
||||||
event.attributes["__ttl__"] = ttl_seconds
|
event.attributes["__ttl__"] = ttl_seconds
|
||||||
|
|
||||||
|
# Extract these W3C trace context attributes so they are not written to
|
||||||
|
# underlying storage, as we just need them to propagate the trace context.
|
||||||
|
traceparent = event.attributes.pop("traceparent", None)
|
||||||
|
tracestate = event.attributes.pop("tracestate", None)
|
||||||
|
if traceparent:
|
||||||
|
# If we have a traceparent header value, we're not the root span.
|
||||||
|
for root_attribute in ROOT_SPAN_MARKERS:
|
||||||
|
event.attributes.pop(root_attribute, None)
|
||||||
|
|
||||||
if isinstance(event.payload, SpanStartPayload):
|
if isinstance(event.payload, SpanStartPayload):
|
||||||
# Check if span already exists to prevent duplicates
|
# Check if span already exists to prevent duplicates
|
||||||
if span_id in _GLOBAL_STORAGE["active_spans"]:
|
if span_id in _GLOBAL_STORAGE["active_spans"]:
|
||||||
|
@ -216,8 +227,12 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
|
||||||
parent_span_id = int(event.payload.parent_span_id, 16)
|
parent_span_id = int(event.payload.parent_span_id, 16)
|
||||||
parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
|
parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
|
||||||
context = trace.set_span_in_context(parent_span)
|
context = trace.set_span_in_context(parent_span)
|
||||||
else:
|
elif traceparent:
|
||||||
event.attributes["__root_span__"] = "true"
|
carrier = {
|
||||||
|
"traceparent": traceparent,
|
||||||
|
"tracestate": tracestate,
|
||||||
|
}
|
||||||
|
context = TraceContextTextMapPropagator().extract(carrier=carrier)
|
||||||
|
|
||||||
span = tracer.start_span(
|
span = tracer.start_span(
|
||||||
name=event.payload.name,
|
name=event.payload.name,
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import InferenceProvider
|
||||||
|
|
||||||
from .config import CerebrasCompatConfig
|
from .config import CerebrasCompatConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: CerebrasCompatConfig, _deps) -> Inference:
|
async def get_adapter_impl(config: CerebrasCompatConfig, _deps) -> InferenceProvider:
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .cerebras import CerebrasCompatInferenceAdapter
|
from .cerebras import CerebrasCompatInferenceAdapter
|
||||||
|
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import InferenceProvider
|
||||||
|
|
||||||
from .config import FireworksCompatConfig
|
from .config import FireworksCompatConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: FireworksCompatConfig, _deps) -> Inference:
|
async def get_adapter_impl(config: FireworksCompatConfig, _deps) -> InferenceProvider:
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .fireworks import FireworksCompatInferenceAdapter
|
from .fireworks import FireworksCompatInferenceAdapter
|
||||||
|
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import InferenceProvider
|
||||||
|
|
||||||
from .config import GroqCompatConfig
|
from .config import GroqCompatConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: GroqCompatConfig, _deps) -> Inference:
|
async def get_adapter_impl(config: GroqCompatConfig, _deps) -> InferenceProvider:
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .groq import GroqCompatInferenceAdapter
|
from .groq import GroqCompatInferenceAdapter
|
||||||
|
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import InferenceProvider
|
||||||
|
|
||||||
from .config import LlamaCompatConfig
|
from .config import LlamaCompatConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: LlamaCompatConfig, _deps) -> Inference:
|
async def get_adapter_impl(config: LlamaCompatConfig, _deps) -> InferenceProvider:
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .llama import LlamaCompatInferenceAdapter
|
from .llama import LlamaCompatInferenceAdapter
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ from llama_stack.apis.inference import (
|
||||||
EmbeddingsResponse,
|
EmbeddingsResponse,
|
||||||
EmbeddingTaskType,
|
EmbeddingTaskType,
|
||||||
GrammarResponseFormat,
|
GrammarResponseFormat,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
JsonSchemaResponseFormat,
|
JsonSchemaResponseFormat,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
@ -82,7 +82,7 @@ logger = get_logger(name=__name__, category="inference")
|
||||||
|
|
||||||
|
|
||||||
class OllamaInferenceAdapter(
|
class OllamaInferenceAdapter(
|
||||||
Inference,
|
InferenceProvider,
|
||||||
ModelsProtocolPrivate,
|
ModelsProtocolPrivate,
|
||||||
):
|
):
|
||||||
def __init__(self, url: str) -> None:
|
def __init__(self, url: str) -> None:
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import InferenceProvider
|
||||||
|
|
||||||
from .config import SambaNovaCompatConfig
|
from .config import SambaNovaCompatConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: SambaNovaCompatConfig, _deps) -> Inference:
|
async def get_adapter_impl(config: SambaNovaCompatConfig, _deps) -> InferenceProvider:
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .sambanova import SambaNovaCompatInferenceAdapter
|
from .sambanova import SambaNovaCompatInferenceAdapter
|
||||||
|
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import InferenceProvider
|
||||||
|
|
||||||
from .config import TogetherCompatConfig
|
from .config import TogetherCompatConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: TogetherCompatConfig, _deps) -> Inference:
|
async def get_adapter_impl(config: TogetherCompatConfig, _deps) -> InferenceProvider:
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .together import TogetherCompatInferenceAdapter
|
from .together import TogetherCompatInferenceAdapter
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ from llama_stack.apis.inference import (
|
||||||
ChatCompletionResponseStreamChunk,
|
ChatCompletionResponseStreamChunk,
|
||||||
EmbeddingsResponse,
|
EmbeddingsResponse,
|
||||||
EmbeddingTaskType,
|
EmbeddingTaskType,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
JsonSchemaResponseFormat,
|
JsonSchemaResponseFormat,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
@ -59,7 +59,7 @@ logger = get_logger(name=__name__, category="inference")
|
||||||
|
|
||||||
class LiteLLMOpenAIMixin(
|
class LiteLLMOpenAIMixin(
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
NeedsRequestProviderData,
|
NeedsRequestProviderData,
|
||||||
):
|
):
|
||||||
# TODO: avoid exposing the litellm specific model names to the user.
|
# TODO: avoid exposing the litellm specific model names to the user.
|
||||||
|
|
|
@ -34,6 +34,8 @@ logger = get_logger(__name__, category="core")
|
||||||
INVALID_SPAN_ID = 0x0000000000000000
|
INVALID_SPAN_ID = 0x0000000000000000
|
||||||
INVALID_TRACE_ID = 0x00000000000000000000000000000000
|
INVALID_TRACE_ID = 0x00000000000000000000000000000000
|
||||||
|
|
||||||
|
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
|
||||||
|
|
||||||
|
|
||||||
def trace_id_to_str(trace_id: int) -> str:
|
def trace_id_to_str(trace_id: int) -> str:
|
||||||
"""Convenience trace ID formatting method
|
"""Convenience trace ID formatting method
|
||||||
|
@ -178,7 +180,8 @@ async def start_trace(name: str, attributes: dict[str, Any] = None) -> TraceCont
|
||||||
|
|
||||||
trace_id = generate_trace_id()
|
trace_id = generate_trace_id()
|
||||||
context = TraceContext(BACKGROUND_LOGGER, trace_id)
|
context = TraceContext(BACKGROUND_LOGGER, trace_id)
|
||||||
context.push_span(name, {"__root__": True, **(attributes or {})})
|
attributes = {marker: True for marker in ROOT_SPAN_MARKERS} | (attributes or {})
|
||||||
|
context.push_span(name, attributes)
|
||||||
|
|
||||||
CURRENT_TRACE_CONTEXT.set(context)
|
CURRENT_TRACE_CONTEXT.set(context)
|
||||||
return context
|
return context
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue