forked from phoenix-oss/llama-stack-mirror
feat: introduce APIs for retrieving chat completion requests (#2145)
# What does this PR do? This PR introduces APIs to retrieve past chat completion requests, which will be used in the LS UI. Our current `Telemetry` is ill-suited for this purpose as it's untyped so we'd need to filter by obscure attribute names, making it brittle. Since these APIs are 'provided by stack' and don't need to be implemented by inference providers, we introduce a new InferenceProvider class, containing the existing inference protocol, which is implemented by inference providers. The APIs are OpenAI-compliant, with an additional `input_messages` field. ## Test Plan This PR just adds the API and marks them provided_by_stack. S tart stack server -> doesn't crash
This commit is contained in:
parent
c7015d3d60
commit
047303e339
15 changed files with 1356 additions and 869 deletions
1202
docs/_static/llama-stack-spec.html
vendored
1202
docs/_static/llama-stack-spec.html
vendored
File diff suppressed because it is too large
Load diff
900
docs/_static/llama-stack-spec.yaml
vendored
900
docs/_static/llama-stack-spec.yaml
vendored
|
@ -827,6 +827,35 @@ paths:
|
||||||
required: true
|
required: true
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
|
/v1/openai/v1/chat/completions/{completion_id}:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A OpenAICompletionWithInputMessages.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/OpenAICompletionWithInputMessages'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Inference
|
||||||
|
description: Describe a chat completion by its ID.
|
||||||
|
parameters:
|
||||||
|
- name: completion_id
|
||||||
|
in: path
|
||||||
|
description: ID of the chat completion.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
/v1/datasets/{dataset_id}:
|
/v1/datasets/{dataset_id}:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
@ -1795,6 +1824,89 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
$ref: '#/components/schemas/RegisterBenchmarkRequest'
|
||||||
required: true
|
required: true
|
||||||
|
/v1/openai/v1/chat/completions:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A ListOpenAIChatCompletionResponse.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ListOpenAIChatCompletionResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Inference
|
||||||
|
description: List all chat completions.
|
||||||
|
parameters:
|
||||||
|
- name: after
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
The ID of the last chat completion to return.
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: limit
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
The maximum number of chat completions to return.
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: integer
|
||||||
|
- name: model
|
||||||
|
in: query
|
||||||
|
description: The model to filter by.
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: order
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
The order to sort the chat completions by: "asc" or "desc". Defaults to
|
||||||
|
"desc".
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Order'
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: An OpenAIChatCompletion.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/OpenAIChatCompletion'
|
||||||
|
- $ref: '#/components/schemas/OpenAIChatCompletionChunk'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Inference
|
||||||
|
description: >-
|
||||||
|
Generate an OpenAI-compatible chat completion for the given messages using
|
||||||
|
the specified model.
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/OpenaiChatCompletionRequest'
|
||||||
|
required: true
|
||||||
/v1/datasets:
|
/v1/datasets:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
@ -2261,39 +2373,6 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/LogEventRequest'
|
$ref: '#/components/schemas/LogEventRequest'
|
||||||
required: true
|
required: true
|
||||||
/v1/openai/v1/chat/completions:
|
|
||||||
post:
|
|
||||||
responses:
|
|
||||||
'200':
|
|
||||||
description: An OpenAIChatCompletion.
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/OpenAIChatCompletion'
|
|
||||||
- $ref: '#/components/schemas/OpenAIChatCompletionChunk'
|
|
||||||
'400':
|
|
||||||
$ref: '#/components/responses/BadRequest400'
|
|
||||||
'429':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/TooManyRequests429
|
|
||||||
'500':
|
|
||||||
$ref: >-
|
|
||||||
#/components/responses/InternalServerError500
|
|
||||||
default:
|
|
||||||
$ref: '#/components/responses/DefaultError'
|
|
||||||
tags:
|
|
||||||
- Inference
|
|
||||||
description: >-
|
|
||||||
Generate an OpenAI-compatible chat completion for the given messages using
|
|
||||||
the specified model.
|
|
||||||
parameters: []
|
|
||||||
requestBody:
|
|
||||||
content:
|
|
||||||
application/json:
|
|
||||||
schema:
|
|
||||||
$ref: '#/components/schemas/OpenaiChatCompletionRequest'
|
|
||||||
required: true
|
|
||||||
/v1/openai/v1/completions:
|
/v1/openai/v1/completions:
|
||||||
post:
|
post:
|
||||||
responses:
|
responses:
|
||||||
|
@ -5479,6 +5558,369 @@ components:
|
||||||
- scoring_functions
|
- scoring_functions
|
||||||
- metadata
|
- metadata
|
||||||
title: Benchmark
|
title: Benchmark
|
||||||
|
OpenAIAssistantMessageParam:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
const: assistant
|
||||||
|
default: assistant
|
||||||
|
description: >-
|
||||||
|
Must be "assistant" to identify this as the model's response
|
||||||
|
content:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||||
|
description: The content of the model's response
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The name of the assistant message participant.
|
||||||
|
tool_calls:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionToolCall'
|
||||||
|
description: >-
|
||||||
|
List of tool calls. Each tool call is an OpenAIChatCompletionToolCall
|
||||||
|
object.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- role
|
||||||
|
title: OpenAIAssistantMessageParam
|
||||||
|
description: >-
|
||||||
|
A message containing the model's (assistant) response in an OpenAI-compatible
|
||||||
|
chat completion request.
|
||||||
|
"OpenAIChatCompletionContentPartImageParam":
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: image_url
|
||||||
|
default: image_url
|
||||||
|
image_url:
|
||||||
|
$ref: '#/components/schemas/OpenAIImageURL'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- image_url
|
||||||
|
title: >-
|
||||||
|
OpenAIChatCompletionContentPartImageParam
|
||||||
|
OpenAIChatCompletionContentPartParam:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
||||||
|
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
||||||
|
discriminator:
|
||||||
|
propertyName: type
|
||||||
|
mapping:
|
||||||
|
text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
||||||
|
image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
||||||
|
OpenAIChatCompletionContentPartTextParam:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: text
|
||||||
|
default: text
|
||||||
|
text:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
- text
|
||||||
|
title: OpenAIChatCompletionContentPartTextParam
|
||||||
|
OpenAIChatCompletionToolCall:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
index:
|
||||||
|
type: integer
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: function
|
||||||
|
default: function
|
||||||
|
function:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
title: OpenAIChatCompletionToolCall
|
||||||
|
OpenAIChatCompletionToolCallFunction:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
arguments:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: OpenAIChatCompletionToolCallFunction
|
||||||
|
OpenAIChoice:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
message:
|
||||||
|
$ref: '#/components/schemas/OpenAIMessageParam'
|
||||||
|
description: The message from the model
|
||||||
|
finish_reason:
|
||||||
|
type: string
|
||||||
|
description: The reason the model stopped generating
|
||||||
|
index:
|
||||||
|
type: integer
|
||||||
|
description: The index of the choice
|
||||||
|
logprobs:
|
||||||
|
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
|
||||||
|
description: >-
|
||||||
|
(Optional) The log probabilities for the tokens in the message
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- message
|
||||||
|
- finish_reason
|
||||||
|
- index
|
||||||
|
title: OpenAIChoice
|
||||||
|
description: >-
|
||||||
|
A choice from an OpenAI-compatible chat completion response.
|
||||||
|
OpenAIChoiceLogprobs:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
content:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAITokenLogProb'
|
||||||
|
description: >-
|
||||||
|
(Optional) The log probabilities for the tokens in the message
|
||||||
|
refusal:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAITokenLogProb'
|
||||||
|
description: >-
|
||||||
|
(Optional) The log probabilities for the tokens in the message
|
||||||
|
additionalProperties: false
|
||||||
|
title: OpenAIChoiceLogprobs
|
||||||
|
description: >-
|
||||||
|
The log probabilities for the tokens in the message from an OpenAI-compatible
|
||||||
|
chat completion response.
|
||||||
|
OpenAIDeveloperMessageParam:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
const: developer
|
||||||
|
default: developer
|
||||||
|
description: >-
|
||||||
|
Must be "developer" to identify this as a developer message
|
||||||
|
content:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||||
|
description: The content of the developer message
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The name of the developer message participant.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- role
|
||||||
|
- content
|
||||||
|
title: OpenAIDeveloperMessageParam
|
||||||
|
description: >-
|
||||||
|
A message from the developer in an OpenAI-compatible chat completion request.
|
||||||
|
OpenAIImageURL:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
url:
|
||||||
|
type: string
|
||||||
|
detail:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- url
|
||||||
|
title: OpenAIImageURL
|
||||||
|
OpenAIMessageParam:
|
||||||
|
oneOf:
|
||||||
|
- $ref: '#/components/schemas/OpenAIUserMessageParam'
|
||||||
|
- $ref: '#/components/schemas/OpenAISystemMessageParam'
|
||||||
|
- $ref: '#/components/schemas/OpenAIAssistantMessageParam'
|
||||||
|
- $ref: '#/components/schemas/OpenAIToolMessageParam'
|
||||||
|
- $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
|
||||||
|
discriminator:
|
||||||
|
propertyName: role
|
||||||
|
mapping:
|
||||||
|
user: '#/components/schemas/OpenAIUserMessageParam'
|
||||||
|
system: '#/components/schemas/OpenAISystemMessageParam'
|
||||||
|
assistant: '#/components/schemas/OpenAIAssistantMessageParam'
|
||||||
|
tool: '#/components/schemas/OpenAIToolMessageParam'
|
||||||
|
developer: '#/components/schemas/OpenAIDeveloperMessageParam'
|
||||||
|
OpenAISystemMessageParam:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
const: system
|
||||||
|
default: system
|
||||||
|
description: >-
|
||||||
|
Must be "system" to identify this as a system message
|
||||||
|
content:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||||
|
description: >-
|
||||||
|
The content of the "system prompt". If multiple system messages are provided,
|
||||||
|
they are concatenated. The underlying Llama Stack code may also add other
|
||||||
|
system messages (for example, for formatting tool definitions).
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The name of the system message participant.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- role
|
||||||
|
- content
|
||||||
|
title: OpenAISystemMessageParam
|
||||||
|
description: >-
|
||||||
|
A system message providing instructions or context to the model.
|
||||||
|
OpenAITokenLogProb:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
token:
|
||||||
|
type: string
|
||||||
|
bytes:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: integer
|
||||||
|
logprob:
|
||||||
|
type: number
|
||||||
|
top_logprobs:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAITopLogProb'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- token
|
||||||
|
- logprob
|
||||||
|
- top_logprobs
|
||||||
|
title: OpenAITokenLogProb
|
||||||
|
description: >-
|
||||||
|
The log probability for a token from an OpenAI-compatible chat completion
|
||||||
|
response.
|
||||||
|
OpenAIToolMessageParam:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
const: tool
|
||||||
|
default: tool
|
||||||
|
description: >-
|
||||||
|
Must be "tool" to identify this as a tool response
|
||||||
|
tool_call_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Unique identifier for the tool call this response is for
|
||||||
|
content:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||||
|
description: The response content from the tool
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- role
|
||||||
|
- tool_call_id
|
||||||
|
- content
|
||||||
|
title: OpenAIToolMessageParam
|
||||||
|
description: >-
|
||||||
|
A message representing the result of a tool invocation in an OpenAI-compatible
|
||||||
|
chat completion request.
|
||||||
|
OpenAITopLogProb:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
token:
|
||||||
|
type: string
|
||||||
|
bytes:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: integer
|
||||||
|
logprob:
|
||||||
|
type: number
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- token
|
||||||
|
- logprob
|
||||||
|
title: OpenAITopLogProb
|
||||||
|
description: >-
|
||||||
|
The top log probability for a token from an OpenAI-compatible chat completion
|
||||||
|
response.
|
||||||
|
OpenAIUserMessageParam:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
role:
|
||||||
|
type: string
|
||||||
|
const: user
|
||||||
|
default: user
|
||||||
|
description: >-
|
||||||
|
Must be "user" to identify this as a user message
|
||||||
|
content:
|
||||||
|
oneOf:
|
||||||
|
- type: string
|
||||||
|
- type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||||
|
description: >-
|
||||||
|
The content of the message, which can include text and other media
|
||||||
|
name:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The name of the user message participant.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- role
|
||||||
|
- content
|
||||||
|
title: OpenAIUserMessageParam
|
||||||
|
description: >-
|
||||||
|
A message from the user in an OpenAI-compatible chat completion request.
|
||||||
|
OpenAICompletionWithInputMessages:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
description: The ID of the chat completion
|
||||||
|
choices:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChoice'
|
||||||
|
description: List of choices
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: chat.completion
|
||||||
|
default: chat.completion
|
||||||
|
description: >-
|
||||||
|
The object type, which will be "chat.completion"
|
||||||
|
created:
|
||||||
|
type: integer
|
||||||
|
description: >-
|
||||||
|
The Unix timestamp in seconds when the chat completion was created
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The model that was used to generate the chat completion
|
||||||
|
input_messages:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIMessageParam'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- choices
|
||||||
|
- object
|
||||||
|
- created
|
||||||
|
- model
|
||||||
|
- input_messages
|
||||||
|
title: OpenAICompletionWithInputMessages
|
||||||
DataSource:
|
DataSource:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/URIDataSource'
|
- $ref: '#/components/schemas/URIDataSource'
|
||||||
|
@ -6497,6 +6939,73 @@ components:
|
||||||
required:
|
required:
|
||||||
- data
|
- data
|
||||||
title: ListBenchmarksResponse
|
title: ListBenchmarksResponse
|
||||||
|
Order:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- asc
|
||||||
|
- desc
|
||||||
|
title: Order
|
||||||
|
ListOpenAIChatCompletionResponse:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
description: The ID of the chat completion
|
||||||
|
choices:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIChoice'
|
||||||
|
description: List of choices
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: chat.completion
|
||||||
|
default: chat.completion
|
||||||
|
description: >-
|
||||||
|
The object type, which will be "chat.completion"
|
||||||
|
created:
|
||||||
|
type: integer
|
||||||
|
description: >-
|
||||||
|
The Unix timestamp in seconds when the chat completion was created
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The model that was used to generate the chat completion
|
||||||
|
input_messages:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
$ref: '#/components/schemas/OpenAIMessageParam'
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- choices
|
||||||
|
- object
|
||||||
|
- created
|
||||||
|
- model
|
||||||
|
- input_messages
|
||||||
|
title: OpenAICompletionWithInputMessages
|
||||||
|
has_more:
|
||||||
|
type: boolean
|
||||||
|
first_id:
|
||||||
|
type: string
|
||||||
|
last_id:
|
||||||
|
type: string
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: list
|
||||||
|
default: list
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- data
|
||||||
|
- has_more
|
||||||
|
- first_id
|
||||||
|
- last_id
|
||||||
|
- object
|
||||||
|
title: ListOpenAIChatCompletionResponse
|
||||||
ListDatasetsResponse:
|
ListDatasetsResponse:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -6835,142 +7344,6 @@ components:
|
||||||
- event
|
- event
|
||||||
- ttl_seconds
|
- ttl_seconds
|
||||||
title: LogEventRequest
|
title: LogEventRequest
|
||||||
OpenAIAssistantMessageParam:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
role:
|
|
||||||
type: string
|
|
||||||
const: assistant
|
|
||||||
default: assistant
|
|
||||||
description: >-
|
|
||||||
Must be "assistant" to identify this as the model's response
|
|
||||||
content:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
|
||||||
description: The content of the model's response
|
|
||||||
name:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
(Optional) The name of the assistant message participant.
|
|
||||||
tool_calls:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionToolCall'
|
|
||||||
description: >-
|
|
||||||
List of tool calls. Each tool call is an OpenAIChatCompletionToolCall
|
|
||||||
object.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- role
|
|
||||||
title: OpenAIAssistantMessageParam
|
|
||||||
description: >-
|
|
||||||
A message containing the model's (assistant) response in an OpenAI-compatible
|
|
||||||
chat completion request.
|
|
||||||
"OpenAIChatCompletionContentPartImageParam":
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
const: image_url
|
|
||||||
default: image_url
|
|
||||||
image_url:
|
|
||||||
$ref: '#/components/schemas/OpenAIImageURL'
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
- image_url
|
|
||||||
title: >-
|
|
||||||
OpenAIChatCompletionContentPartImageParam
|
|
||||||
OpenAIChatCompletionContentPartParam:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
|
||||||
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
|
||||||
discriminator:
|
|
||||||
propertyName: type
|
|
||||||
mapping:
|
|
||||||
text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
|
||||||
image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
|
||||||
OpenAIChatCompletionContentPartTextParam:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
const: text
|
|
||||||
default: text
|
|
||||||
text:
|
|
||||||
type: string
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
- text
|
|
||||||
title: OpenAIChatCompletionContentPartTextParam
|
|
||||||
OpenAIChatCompletionToolCall:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
index:
|
|
||||||
type: integer
|
|
||||||
id:
|
|
||||||
type: string
|
|
||||||
type:
|
|
||||||
type: string
|
|
||||||
const: function
|
|
||||||
default: function
|
|
||||||
function:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
title: OpenAIChatCompletionToolCall
|
|
||||||
OpenAIChatCompletionToolCallFunction:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
name:
|
|
||||||
type: string
|
|
||||||
arguments:
|
|
||||||
type: string
|
|
||||||
additionalProperties: false
|
|
||||||
title: OpenAIChatCompletionToolCallFunction
|
|
||||||
OpenAIDeveloperMessageParam:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
role:
|
|
||||||
type: string
|
|
||||||
const: developer
|
|
||||||
default: developer
|
|
||||||
description: >-
|
|
||||||
Must be "developer" to identify this as a developer message
|
|
||||||
content:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
|
||||||
description: The content of the developer message
|
|
||||||
name:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
(Optional) The name of the developer message participant.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- role
|
|
||||||
- content
|
|
||||||
title: OpenAIDeveloperMessageParam
|
|
||||||
description: >-
|
|
||||||
A message from the developer in an OpenAI-compatible chat completion request.
|
|
||||||
OpenAIImageURL:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
url:
|
|
||||||
type: string
|
|
||||||
detail:
|
|
||||||
type: string
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- url
|
|
||||||
title: OpenAIImageURL
|
|
||||||
OpenAIJSONSchema:
|
OpenAIJSONSchema:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -6994,21 +7367,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- name
|
- name
|
||||||
title: OpenAIJSONSchema
|
title: OpenAIJSONSchema
|
||||||
OpenAIMessageParam:
|
|
||||||
oneOf:
|
|
||||||
- $ref: '#/components/schemas/OpenAIUserMessageParam'
|
|
||||||
- $ref: '#/components/schemas/OpenAISystemMessageParam'
|
|
||||||
- $ref: '#/components/schemas/OpenAIAssistantMessageParam'
|
|
||||||
- $ref: '#/components/schemas/OpenAIToolMessageParam'
|
|
||||||
- $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
|
|
||||||
discriminator:
|
|
||||||
propertyName: role
|
|
||||||
mapping:
|
|
||||||
user: '#/components/schemas/OpenAIUserMessageParam'
|
|
||||||
system: '#/components/schemas/OpenAISystemMessageParam'
|
|
||||||
assistant: '#/components/schemas/OpenAIAssistantMessageParam'
|
|
||||||
tool: '#/components/schemas/OpenAIToolMessageParam'
|
|
||||||
developer: '#/components/schemas/OpenAIDeveloperMessageParam'
|
|
||||||
OpenAIResponseFormatJSONObject:
|
OpenAIResponseFormatJSONObject:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -7056,93 +7414,6 @@ components:
|
||||||
required:
|
required:
|
||||||
- type
|
- type
|
||||||
title: OpenAIResponseFormatText
|
title: OpenAIResponseFormatText
|
||||||
OpenAISystemMessageParam:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
role:
|
|
||||||
type: string
|
|
||||||
const: system
|
|
||||||
default: system
|
|
||||||
description: >-
|
|
||||||
Must be "system" to identify this as a system message
|
|
||||||
content:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
|
||||||
description: >-
|
|
||||||
The content of the "system prompt". If multiple system messages are provided,
|
|
||||||
they are concatenated. The underlying Llama Stack code may also add other
|
|
||||||
system messages (for example, for formatting tool definitions).
|
|
||||||
name:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
(Optional) The name of the system message participant.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- role
|
|
||||||
- content
|
|
||||||
title: OpenAISystemMessageParam
|
|
||||||
description: >-
|
|
||||||
A system message providing instructions or context to the model.
|
|
||||||
OpenAIToolMessageParam:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
role:
|
|
||||||
type: string
|
|
||||||
const: tool
|
|
||||||
default: tool
|
|
||||||
description: >-
|
|
||||||
Must be "tool" to identify this as a tool response
|
|
||||||
tool_call_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
Unique identifier for the tool call this response is for
|
|
||||||
content:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
|
||||||
description: The response content from the tool
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- role
|
|
||||||
- tool_call_id
|
|
||||||
- content
|
|
||||||
title: OpenAIToolMessageParam
|
|
||||||
description: >-
|
|
||||||
A message representing the result of a tool invocation in an OpenAI-compatible
|
|
||||||
chat completion request.
|
|
||||||
OpenAIUserMessageParam:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
role:
|
|
||||||
type: string
|
|
||||||
const: user
|
|
||||||
default: user
|
|
||||||
description: >-
|
|
||||||
Must be "user" to identify this as a user message
|
|
||||||
content:
|
|
||||||
oneOf:
|
|
||||||
- type: string
|
|
||||||
- type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
|
||||||
description: >-
|
|
||||||
The content of the message, which can include text and other media
|
|
||||||
name:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
(Optional) The name of the user message participant.
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- role
|
|
||||||
- content
|
|
||||||
title: OpenAIUserMessageParam
|
|
||||||
description: >-
|
|
||||||
A message from the user in an OpenAI-compatible chat completion request.
|
|
||||||
OpenaiChatCompletionRequest:
|
OpenaiChatCompletionRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -7356,30 +7627,6 @@ components:
|
||||||
title: OpenAIChatCompletionChunk
|
title: OpenAIChatCompletionChunk
|
||||||
description: >-
|
description: >-
|
||||||
Chunk from a streaming response to an OpenAI-compatible chat completion request.
|
Chunk from a streaming response to an OpenAI-compatible chat completion request.
|
||||||
OpenAIChoice:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
message:
|
|
||||||
$ref: '#/components/schemas/OpenAIMessageParam'
|
|
||||||
description: The message from the model
|
|
||||||
finish_reason:
|
|
||||||
type: string
|
|
||||||
description: The reason the model stopped generating
|
|
||||||
index:
|
|
||||||
type: integer
|
|
||||||
description: The index of the choice
|
|
||||||
logprobs:
|
|
||||||
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
|
|
||||||
description: >-
|
|
||||||
(Optional) The log probabilities for the tokens in the message
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- message
|
|
||||||
- finish_reason
|
|
||||||
- index
|
|
||||||
title: OpenAIChoice
|
|
||||||
description: >-
|
|
||||||
A choice from an OpenAI-compatible chat completion response.
|
|
||||||
OpenAIChoiceDelta:
|
OpenAIChoiceDelta:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -7401,26 +7648,6 @@ components:
|
||||||
title: OpenAIChoiceDelta
|
title: OpenAIChoiceDelta
|
||||||
description: >-
|
description: >-
|
||||||
A delta from an OpenAI-compatible chat completion streaming response.
|
A delta from an OpenAI-compatible chat completion streaming response.
|
||||||
OpenAIChoiceLogprobs:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
content:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAITokenLogProb'
|
|
||||||
description: >-
|
|
||||||
(Optional) The log probabilities for the tokens in the message
|
|
||||||
refusal:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAITokenLogProb'
|
|
||||||
description: >-
|
|
||||||
(Optional) The log probabilities for the tokens in the message
|
|
||||||
additionalProperties: false
|
|
||||||
title: OpenAIChoiceLogprobs
|
|
||||||
description: >-
|
|
||||||
The log probabilities for the tokens in the message from an OpenAI-compatible
|
|
||||||
chat completion response.
|
|
||||||
OpenAIChunkChoice:
|
OpenAIChunkChoice:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
@ -7445,49 +7672,6 @@ components:
|
||||||
title: OpenAIChunkChoice
|
title: OpenAIChunkChoice
|
||||||
description: >-
|
description: >-
|
||||||
A chunk choice from an OpenAI-compatible chat completion streaming response.
|
A chunk choice from an OpenAI-compatible chat completion streaming response.
|
||||||
OpenAITokenLogProb:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
token:
|
|
||||||
type: string
|
|
||||||
bytes:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: integer
|
|
||||||
logprob:
|
|
||||||
type: number
|
|
||||||
top_logprobs:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
$ref: '#/components/schemas/OpenAITopLogProb'
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- token
|
|
||||||
- logprob
|
|
||||||
- top_logprobs
|
|
||||||
title: OpenAITokenLogProb
|
|
||||||
description: >-
|
|
||||||
The log probability for a token from an OpenAI-compatible chat completion
|
|
||||||
response.
|
|
||||||
OpenAITopLogProb:
|
|
||||||
type: object
|
|
||||||
properties:
|
|
||||||
token:
|
|
||||||
type: string
|
|
||||||
bytes:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: integer
|
|
||||||
logprob:
|
|
||||||
type: number
|
|
||||||
additionalProperties: false
|
|
||||||
required:
|
|
||||||
- token
|
|
||||||
- logprob
|
|
||||||
title: OpenAITopLogProb
|
|
||||||
description: >-
|
|
||||||
The top log probability for a token from an OpenAI-compatible chat completion
|
|
||||||
response.
|
|
||||||
OpenaiCompletionRequest:
|
OpenaiCompletionRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
|
@ -759,7 +759,7 @@ class Generator:
|
||||||
)
|
)
|
||||||
|
|
||||||
return Operation(
|
return Operation(
|
||||||
tags=[op.defining_class.__name__],
|
tags=[getattr(op.defining_class, "API_NAMESPACE", op.defining_class.__name__)],
|
||||||
summary=None,
|
summary=None,
|
||||||
# summary=doc_string.short_description,
|
# summary=doc_string.short_description,
|
||||||
description=description,
|
description=description,
|
||||||
|
@ -805,6 +805,8 @@ class Generator:
|
||||||
operation_tags: List[Tag] = []
|
operation_tags: List[Tag] = []
|
||||||
for cls in endpoint_classes:
|
for cls in endpoint_classes:
|
||||||
doc_string = parse_type(cls)
|
doc_string = parse_type(cls)
|
||||||
|
if hasattr(cls, "API_NAMESPACE") and cls.API_NAMESPACE != cls.__name__:
|
||||||
|
continue
|
||||||
operation_tags.append(
|
operation_tags.append(
|
||||||
Tag(
|
Tag(
|
||||||
name=cls.__name__,
|
name=cls.__name__,
|
||||||
|
|
|
@ -820,15 +820,32 @@ class BatchChatCompletionResponse(BaseModel):
|
||||||
batch: list[ChatCompletionResponse]
|
batch: list[ChatCompletionResponse]
|
||||||
|
|
||||||
|
|
||||||
|
class OpenAICompletionWithInputMessages(OpenAIChatCompletion):
|
||||||
|
input_messages: list[OpenAIMessageParam]
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class ListOpenAIChatCompletionResponse(BaseModel):
|
||||||
|
data: list[OpenAICompletionWithInputMessages]
|
||||||
|
has_more: bool
|
||||||
|
first_id: str
|
||||||
|
last_id: str
|
||||||
|
object: Literal["list"] = "list"
|
||||||
|
|
||||||
|
|
||||||
|
class Order(Enum):
|
||||||
|
asc = "asc"
|
||||||
|
desc = "desc"
|
||||||
|
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
@trace_protocol
|
@trace_protocol
|
||||||
class Inference(Protocol):
|
class InferenceProvider(Protocol):
|
||||||
"""Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
|
||||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
|
||||||
- Embedding models: these models generate embeddings to be used for semantic search.
|
|
||||||
"""
|
"""
|
||||||
|
This protocol defines the interface that should be implemented by all inference providers.
|
||||||
|
"""
|
||||||
|
|
||||||
|
API_NAMESPACE: str = "Inference"
|
||||||
|
|
||||||
model_store: ModelStore | None = None
|
model_store: ModelStore | None = None
|
||||||
|
|
||||||
|
@ -1062,3 +1079,39 @@ class Inference(Protocol):
|
||||||
:returns: An OpenAIChatCompletion.
|
:returns: An OpenAIChatCompletion.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
|
class Inference(InferenceProvider):
|
||||||
|
"""Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||||
|
|
||||||
|
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||||
|
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||||
|
- Embedding models: these models generate embeddings to be used for semantic search.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/chat/completions", method="GET")
|
||||||
|
async def list_chat_completions(
|
||||||
|
self,
|
||||||
|
after: str | None = None,
|
||||||
|
limit: int | None = 20,
|
||||||
|
model: str | None = None,
|
||||||
|
order: Order | None = Order.desc,
|
||||||
|
) -> ListOpenAIChatCompletionResponse:
|
||||||
|
"""List all chat completions.
|
||||||
|
|
||||||
|
:param after: The ID of the last chat completion to return.
|
||||||
|
:param limit: The maximum number of chat completions to return.
|
||||||
|
:param model: The model to filter by.
|
||||||
|
:param order: The order to sort the chat completions by: "asc" or "desc". Defaults to "desc".
|
||||||
|
:returns: A ListOpenAIChatCompletionResponse.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError("List chat completions is not implemented")
|
||||||
|
|
||||||
|
@webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET")
|
||||||
|
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
|
||||||
|
"""Describe a chat completion by its ID.
|
||||||
|
|
||||||
|
:param completion_id: ID of the chat completion.
|
||||||
|
:returns: A OpenAICompletionWithInputMessages.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError("Get chat completion is not implemented")
|
||||||
|
|
|
@ -13,7 +13,7 @@ from llama_stack.apis.datasetio import DatasetIO
|
||||||
from llama_stack.apis.datasets import Datasets
|
from llama_stack.apis.datasets import Datasets
|
||||||
from llama_stack.apis.eval import Eval
|
from llama_stack.apis.eval import Eval
|
||||||
from llama_stack.apis.files import Files
|
from llama_stack.apis.files import Files
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import Inference, InferenceProvider
|
||||||
from llama_stack.apis.inspect import Inspect
|
from llama_stack.apis.inspect import Inspect
|
||||||
from llama_stack.apis.models import Models
|
from llama_stack.apis.models import Models
|
||||||
from llama_stack.apis.post_training import PostTraining
|
from llama_stack.apis.post_training import PostTraining
|
||||||
|
@ -83,6 +83,13 @@ def api_protocol_map() -> dict[Api, Any]:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def api_protocol_map_for_compliance_check() -> dict[Api, Any]:
|
||||||
|
return {
|
||||||
|
**api_protocol_map(),
|
||||||
|
Api.inference: InferenceProvider,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def additional_protocols_map() -> dict[Api, Any]:
|
def additional_protocols_map() -> dict[Api, Any]:
|
||||||
return {
|
return {
|
||||||
Api.inference: (ModelsProtocolPrivate, Models, Api.models),
|
Api.inference: (ModelsProtocolPrivate, Models, Api.models),
|
||||||
|
@ -302,9 +309,6 @@ async def instantiate_provider(
|
||||||
inner_impls: dict[str, Any],
|
inner_impls: dict[str, Any],
|
||||||
dist_registry: DistributionRegistry,
|
dist_registry: DistributionRegistry,
|
||||||
):
|
):
|
||||||
protocols = api_protocol_map()
|
|
||||||
additional_protocols = additional_protocols_map()
|
|
||||||
|
|
||||||
provider_spec = provider.spec
|
provider_spec = provider.spec
|
||||||
if not hasattr(provider_spec, "module"):
|
if not hasattr(provider_spec, "module"):
|
||||||
raise AttributeError(f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute")
|
raise AttributeError(f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute")
|
||||||
|
@ -342,6 +346,8 @@ async def instantiate_provider(
|
||||||
impl.__provider_spec__ = provider_spec
|
impl.__provider_spec__ = provider_spec
|
||||||
impl.__provider_config__ = config
|
impl.__provider_config__ = config
|
||||||
|
|
||||||
|
protocols = api_protocol_map_for_compliance_check()
|
||||||
|
additional_protocols = additional_protocols_map()
|
||||||
# TODO: check compliance for special tool groups
|
# TODO: check compliance for special tool groups
|
||||||
# the impl should be for Api.tool_runtime, the name should be the special tool group, the protocol should be the special tool group protocol
|
# the impl should be for Api.tool_runtime, the name should be the special tool group, the protocol should be the special tool group protocol
|
||||||
check_protocol_compliance(impl, protocols[provider_spec.api])
|
check_protocol_compliance(impl, protocols[provider_spec.api])
|
||||||
|
|
|
@ -28,7 +28,7 @@ from llama_stack.apis.inference import (
|
||||||
CompletionRequest,
|
CompletionRequest,
|
||||||
CompletionResponse,
|
CompletionResponse,
|
||||||
CompletionResponseStreamChunk,
|
CompletionResponseStreamChunk,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
InterleavedContent,
|
InterleavedContent,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
@ -86,7 +86,7 @@ class MetaReferenceInferenceImpl(
|
||||||
OpenAICompletionToLlamaStackMixin,
|
OpenAICompletionToLlamaStackMixin,
|
||||||
OpenAIChatCompletionToLlamaStackMixin,
|
OpenAIChatCompletionToLlamaStackMixin,
|
||||||
SentenceTransformerEmbeddingMixin,
|
SentenceTransformerEmbeddingMixin,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
ModelsProtocolPrivate,
|
ModelsProtocolPrivate,
|
||||||
):
|
):
|
||||||
def __init__(self, config: MetaReferenceInferenceConfig) -> None:
|
def __init__(self, config: MetaReferenceInferenceConfig) -> None:
|
||||||
|
|
|
@ -9,7 +9,7 @@ from collections.abc import AsyncGenerator
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
CompletionResponse,
|
CompletionResponse,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
InterleavedContent,
|
InterleavedContent,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
@ -38,7 +38,7 @@ class SentenceTransformersInferenceImpl(
|
||||||
OpenAIChatCompletionToLlamaStackMixin,
|
OpenAIChatCompletionToLlamaStackMixin,
|
||||||
OpenAICompletionToLlamaStackMixin,
|
OpenAICompletionToLlamaStackMixin,
|
||||||
SentenceTransformerEmbeddingMixin,
|
SentenceTransformerEmbeddingMixin,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
ModelsProtocolPrivate,
|
ModelsProtocolPrivate,
|
||||||
):
|
):
|
||||||
def __init__(self, config: SentenceTransformersInferenceConfig) -> None:
|
def __init__(self, config: SentenceTransformersInferenceConfig) -> None:
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import InferenceProvider
|
||||||
|
|
||||||
from .config import CerebrasCompatConfig
|
from .config import CerebrasCompatConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: CerebrasCompatConfig, _deps) -> Inference:
|
async def get_adapter_impl(config: CerebrasCompatConfig, _deps) -> InferenceProvider:
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .cerebras import CerebrasCompatInferenceAdapter
|
from .cerebras import CerebrasCompatInferenceAdapter
|
||||||
|
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import InferenceProvider
|
||||||
|
|
||||||
from .config import FireworksCompatConfig
|
from .config import FireworksCompatConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: FireworksCompatConfig, _deps) -> Inference:
|
async def get_adapter_impl(config: FireworksCompatConfig, _deps) -> InferenceProvider:
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .fireworks import FireworksCompatInferenceAdapter
|
from .fireworks import FireworksCompatInferenceAdapter
|
||||||
|
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import InferenceProvider
|
||||||
|
|
||||||
from .config import GroqCompatConfig
|
from .config import GroqCompatConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: GroqCompatConfig, _deps) -> Inference:
|
async def get_adapter_impl(config: GroqCompatConfig, _deps) -> InferenceProvider:
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .groq import GroqCompatInferenceAdapter
|
from .groq import GroqCompatInferenceAdapter
|
||||||
|
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import InferenceProvider
|
||||||
|
|
||||||
from .config import LlamaCompatConfig
|
from .config import LlamaCompatConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: LlamaCompatConfig, _deps) -> Inference:
|
async def get_adapter_impl(config: LlamaCompatConfig, _deps) -> InferenceProvider:
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .llama import LlamaCompatInferenceAdapter
|
from .llama import LlamaCompatInferenceAdapter
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ from llama_stack.apis.inference import (
|
||||||
EmbeddingsResponse,
|
EmbeddingsResponse,
|
||||||
EmbeddingTaskType,
|
EmbeddingTaskType,
|
||||||
GrammarResponseFormat,
|
GrammarResponseFormat,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
JsonSchemaResponseFormat,
|
JsonSchemaResponseFormat,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
@ -82,7 +82,7 @@ logger = get_logger(name=__name__, category="inference")
|
||||||
|
|
||||||
|
|
||||||
class OllamaInferenceAdapter(
|
class OllamaInferenceAdapter(
|
||||||
Inference,
|
InferenceProvider,
|
||||||
ModelsProtocolPrivate,
|
ModelsProtocolPrivate,
|
||||||
):
|
):
|
||||||
def __init__(self, url: str) -> None:
|
def __init__(self, url: str) -> None:
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import InferenceProvider
|
||||||
|
|
||||||
from .config import SambaNovaCompatConfig
|
from .config import SambaNovaCompatConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: SambaNovaCompatConfig, _deps) -> Inference:
|
async def get_adapter_impl(config: SambaNovaCompatConfig, _deps) -> InferenceProvider:
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .sambanova import SambaNovaCompatInferenceAdapter
|
from .sambanova import SambaNovaCompatInferenceAdapter
|
||||||
|
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.apis.inference import Inference
|
from llama_stack.apis.inference import InferenceProvider
|
||||||
|
|
||||||
from .config import TogetherCompatConfig
|
from .config import TogetherCompatConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: TogetherCompatConfig, _deps) -> Inference:
|
async def get_adapter_impl(config: TogetherCompatConfig, _deps) -> InferenceProvider:
|
||||||
# import dynamically so the import is used only when it is needed
|
# import dynamically so the import is used only when it is needed
|
||||||
from .together import TogetherCompatInferenceAdapter
|
from .together import TogetherCompatInferenceAdapter
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ from llama_stack.apis.inference import (
|
||||||
ChatCompletionResponseStreamChunk,
|
ChatCompletionResponseStreamChunk,
|
||||||
EmbeddingsResponse,
|
EmbeddingsResponse,
|
||||||
EmbeddingTaskType,
|
EmbeddingTaskType,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
JsonSchemaResponseFormat,
|
JsonSchemaResponseFormat,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
@ -59,7 +59,7 @@ logger = get_logger(name=__name__, category="inference")
|
||||||
|
|
||||||
class LiteLLMOpenAIMixin(
|
class LiteLLMOpenAIMixin(
|
||||||
ModelRegistryHelper,
|
ModelRegistryHelper,
|
||||||
Inference,
|
InferenceProvider,
|
||||||
NeedsRequestProviderData,
|
NeedsRequestProviderData,
|
||||||
):
|
):
|
||||||
# TODO: avoid exposing the litellm specific model names to the user.
|
# TODO: avoid exposing the litellm specific model names to the user.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue