feat: introduce APIs for retrieving chat completion requests (#2145)

# What does this PR do?
This PR introduces APIs to retrieve past chat completion requests, which
will be used in the LS UI.

Our current `Telemetry` is ill-suited for this purpose as it's untyped
so we'd need to filter by obscure attribute names, making it brittle.

Since these APIs are 'provided by stack' and don't need to be
implemented by inference providers, we introduce a new InferenceProvider
class, containing the existing inference protocol, which is implemented
by inference providers.

The APIs are OpenAI-compliant, with an additional `input_messages`
field.


## Test Plan
This PR just adds the API and marks them provided_by_stack. S
tart stack server -> doesn't crash
This commit is contained in:
ehhuang 2025-05-18 21:43:19 -07:00 committed by GitHub
parent c7015d3d60
commit 047303e339
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 1356 additions and 869 deletions

File diff suppressed because it is too large Load diff

View file

@ -827,6 +827,35 @@ paths:
required: true
schema:
type: string
/v1/openai/v1/chat/completions/{completion_id}:
get:
responses:
'200':
description: A OpenAICompletionWithInputMessages.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAICompletionWithInputMessages'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
description: Describe a chat completion by its ID.
parameters:
- name: completion_id
in: path
description: ID of the chat completion.
required: true
schema:
type: string
/v1/datasets/{dataset_id}:
get:
responses:
@ -1795,6 +1824,89 @@ paths:
schema:
$ref: '#/components/schemas/RegisterBenchmarkRequest'
required: true
/v1/openai/v1/chat/completions:
get:
responses:
'200':
description: A ListOpenAIChatCompletionResponse.
content:
application/json:
schema:
$ref: '#/components/schemas/ListOpenAIChatCompletionResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
description: List all chat completions.
parameters:
- name: after
in: query
description: >-
The ID of the last chat completion to return.
required: false
schema:
type: string
- name: limit
in: query
description: >-
The maximum number of chat completions to return.
required: false
schema:
type: integer
- name: model
in: query
description: The model to filter by.
required: false
schema:
type: string
- name: order
in: query
description: >-
The order to sort the chat completions by: "asc" or "desc". Defaults to
"desc".
required: false
schema:
$ref: '#/components/schemas/Order'
post:
responses:
'200':
description: An OpenAIChatCompletion.
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/OpenAIChatCompletion'
- $ref: '#/components/schemas/OpenAIChatCompletionChunk'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
description: >-
Generate an OpenAI-compatible chat completion for the given messages using
the specified model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/OpenaiChatCompletionRequest'
required: true
/v1/datasets:
get:
responses:
@ -2261,39 +2373,6 @@ paths:
schema:
$ref: '#/components/schemas/LogEventRequest'
required: true
/v1/openai/v1/chat/completions:
post:
responses:
'200':
description: An OpenAIChatCompletion.
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/OpenAIChatCompletion'
- $ref: '#/components/schemas/OpenAIChatCompletionChunk'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
description: >-
Generate an OpenAI-compatible chat completion for the given messages using
the specified model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/OpenaiChatCompletionRequest'
required: true
/v1/openai/v1/completions:
post:
responses:
@ -5479,6 +5558,369 @@ components:
- scoring_functions
- metadata
title: Benchmark
OpenAIAssistantMessageParam:
type: object
properties:
role:
type: string
const: assistant
default: assistant
description: >-
Must be "assistant" to identify this as the model's response
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: The content of the model's response
name:
type: string
description: >-
(Optional) The name of the assistant message participant.
tool_calls:
type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionToolCall'
description: >-
List of tool calls. Each tool call is an OpenAIChatCompletionToolCall
object.
additionalProperties: false
required:
- role
title: OpenAIAssistantMessageParam
description: >-
A message containing the model's (assistant) response in an OpenAI-compatible
chat completion request.
"OpenAIChatCompletionContentPartImageParam":
type: object
properties:
type:
type: string
const: image_url
default: image_url
image_url:
$ref: '#/components/schemas/OpenAIImageURL'
additionalProperties: false
required:
- type
- image_url
title: >-
OpenAIChatCompletionContentPartImageParam
OpenAIChatCompletionContentPartParam:
oneOf:
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
discriminator:
propertyName: type
mapping:
text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
OpenAIChatCompletionContentPartTextParam:
type: object
properties:
type:
type: string
const: text
default: text
text:
type: string
additionalProperties: false
required:
- type
- text
title: OpenAIChatCompletionContentPartTextParam
OpenAIChatCompletionToolCall:
type: object
properties:
index:
type: integer
id:
type: string
type:
type: string
const: function
default: function
function:
$ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
additionalProperties: false
required:
- type
title: OpenAIChatCompletionToolCall
OpenAIChatCompletionToolCallFunction:
type: object
properties:
name:
type: string
arguments:
type: string
additionalProperties: false
title: OpenAIChatCompletionToolCallFunction
OpenAIChoice:
type: object
properties:
message:
$ref: '#/components/schemas/OpenAIMessageParam'
description: The message from the model
finish_reason:
type: string
description: The reason the model stopped generating
index:
type: integer
description: The index of the choice
logprobs:
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
description: >-
(Optional) The log probabilities for the tokens in the message
additionalProperties: false
required:
- message
- finish_reason
- index
title: OpenAIChoice
description: >-
A choice from an OpenAI-compatible chat completion response.
OpenAIChoiceLogprobs:
type: object
properties:
content:
type: array
items:
$ref: '#/components/schemas/OpenAITokenLogProb'
description: >-
(Optional) The log probabilities for the tokens in the message
refusal:
type: array
items:
$ref: '#/components/schemas/OpenAITokenLogProb'
description: >-
(Optional) The log probabilities for the tokens in the message
additionalProperties: false
title: OpenAIChoiceLogprobs
description: >-
The log probabilities for the tokens in the message from an OpenAI-compatible
chat completion response.
OpenAIDeveloperMessageParam:
type: object
properties:
role:
type: string
const: developer
default: developer
description: >-
Must be "developer" to identify this as a developer message
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: The content of the developer message
name:
type: string
description: >-
(Optional) The name of the developer message participant.
additionalProperties: false
required:
- role
- content
title: OpenAIDeveloperMessageParam
description: >-
A message from the developer in an OpenAI-compatible chat completion request.
OpenAIImageURL:
type: object
properties:
url:
type: string
detail:
type: string
additionalProperties: false
required:
- url
title: OpenAIImageURL
OpenAIMessageParam:
oneOf:
- $ref: '#/components/schemas/OpenAIUserMessageParam'
- $ref: '#/components/schemas/OpenAISystemMessageParam'
- $ref: '#/components/schemas/OpenAIAssistantMessageParam'
- $ref: '#/components/schemas/OpenAIToolMessageParam'
- $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
discriminator:
propertyName: role
mapping:
user: '#/components/schemas/OpenAIUserMessageParam'
system: '#/components/schemas/OpenAISystemMessageParam'
assistant: '#/components/schemas/OpenAIAssistantMessageParam'
tool: '#/components/schemas/OpenAIToolMessageParam'
developer: '#/components/schemas/OpenAIDeveloperMessageParam'
OpenAISystemMessageParam:
type: object
properties:
role:
type: string
const: system
default: system
description: >-
Must be "system" to identify this as a system message
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: >-
The content of the "system prompt". If multiple system messages are provided,
they are concatenated. The underlying Llama Stack code may also add other
system messages (for example, for formatting tool definitions).
name:
type: string
description: >-
(Optional) The name of the system message participant.
additionalProperties: false
required:
- role
- content
title: OpenAISystemMessageParam
description: >-
A system message providing instructions or context to the model.
OpenAITokenLogProb:
type: object
properties:
token:
type: string
bytes:
type: array
items:
type: integer
logprob:
type: number
top_logprobs:
type: array
items:
$ref: '#/components/schemas/OpenAITopLogProb'
additionalProperties: false
required:
- token
- logprob
- top_logprobs
title: OpenAITokenLogProb
description: >-
The log probability for a token from an OpenAI-compatible chat completion
response.
OpenAIToolMessageParam:
type: object
properties:
role:
type: string
const: tool
default: tool
description: >-
Must be "tool" to identify this as a tool response
tool_call_id:
type: string
description: >-
Unique identifier for the tool call this response is for
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: The response content from the tool
additionalProperties: false
required:
- role
- tool_call_id
- content
title: OpenAIToolMessageParam
description: >-
A message representing the result of a tool invocation in an OpenAI-compatible
chat completion request.
OpenAITopLogProb:
type: object
properties:
token:
type: string
bytes:
type: array
items:
type: integer
logprob:
type: number
additionalProperties: false
required:
- token
- logprob
title: OpenAITopLogProb
description: >-
The top log probability for a token from an OpenAI-compatible chat completion
response.
OpenAIUserMessageParam:
type: object
properties:
role:
type: string
const: user
default: user
description: >-
Must be "user" to identify this as a user message
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: >-
The content of the message, which can include text and other media
name:
type: string
description: >-
(Optional) The name of the user message participant.
additionalProperties: false
required:
- role
- content
title: OpenAIUserMessageParam
description: >-
A message from the user in an OpenAI-compatible chat completion request.
OpenAICompletionWithInputMessages:
type: object
properties:
id:
type: string
description: The ID of the chat completion
choices:
type: array
items:
$ref: '#/components/schemas/OpenAIChoice'
description: List of choices
object:
type: string
const: chat.completion
default: chat.completion
description: >-
The object type, which will be "chat.completion"
created:
type: integer
description: >-
The Unix timestamp in seconds when the chat completion was created
model:
type: string
description: >-
The model that was used to generate the chat completion
input_messages:
type: array
items:
$ref: '#/components/schemas/OpenAIMessageParam'
additionalProperties: false
required:
- id
- choices
- object
- created
- model
- input_messages
title: OpenAICompletionWithInputMessages
DataSource:
oneOf:
- $ref: '#/components/schemas/URIDataSource'
@ -6497,6 +6939,73 @@ components:
required:
- data
title: ListBenchmarksResponse
Order:
type: string
enum:
- asc
- desc
title: Order
ListOpenAIChatCompletionResponse:
type: object
properties:
data:
type: array
items:
type: object
properties:
id:
type: string
description: The ID of the chat completion
choices:
type: array
items:
$ref: '#/components/schemas/OpenAIChoice'
description: List of choices
object:
type: string
const: chat.completion
default: chat.completion
description: >-
The object type, which will be "chat.completion"
created:
type: integer
description: >-
The Unix timestamp in seconds when the chat completion was created
model:
type: string
description: >-
The model that was used to generate the chat completion
input_messages:
type: array
items:
$ref: '#/components/schemas/OpenAIMessageParam'
additionalProperties: false
required:
- id
- choices
- object
- created
- model
- input_messages
title: OpenAICompletionWithInputMessages
has_more:
type: boolean
first_id:
type: string
last_id:
type: string
object:
type: string
const: list
default: list
additionalProperties: false
required:
- data
- has_more
- first_id
- last_id
- object
title: ListOpenAIChatCompletionResponse
ListDatasetsResponse:
type: object
properties:
@ -6835,142 +7344,6 @@ components:
- event
- ttl_seconds
title: LogEventRequest
OpenAIAssistantMessageParam:
type: object
properties:
role:
type: string
const: assistant
default: assistant
description: >-
Must be "assistant" to identify this as the model's response
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: The content of the model's response
name:
type: string
description: >-
(Optional) The name of the assistant message participant.
tool_calls:
type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionToolCall'
description: >-
List of tool calls. Each tool call is an OpenAIChatCompletionToolCall
object.
additionalProperties: false
required:
- role
title: OpenAIAssistantMessageParam
description: >-
A message containing the model's (assistant) response in an OpenAI-compatible
chat completion request.
"OpenAIChatCompletionContentPartImageParam":
type: object
properties:
type:
type: string
const: image_url
default: image_url
image_url:
$ref: '#/components/schemas/OpenAIImageURL'
additionalProperties: false
required:
- type
- image_url
title: >-
OpenAIChatCompletionContentPartImageParam
OpenAIChatCompletionContentPartParam:
oneOf:
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
discriminator:
propertyName: type
mapping:
text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
OpenAIChatCompletionContentPartTextParam:
type: object
properties:
type:
type: string
const: text
default: text
text:
type: string
additionalProperties: false
required:
- type
- text
title: OpenAIChatCompletionContentPartTextParam
OpenAIChatCompletionToolCall:
type: object
properties:
index:
type: integer
id:
type: string
type:
type: string
const: function
default: function
function:
$ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
additionalProperties: false
required:
- type
title: OpenAIChatCompletionToolCall
OpenAIChatCompletionToolCallFunction:
type: object
properties:
name:
type: string
arguments:
type: string
additionalProperties: false
title: OpenAIChatCompletionToolCallFunction
OpenAIDeveloperMessageParam:
type: object
properties:
role:
type: string
const: developer
default: developer
description: >-
Must be "developer" to identify this as a developer message
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: The content of the developer message
name:
type: string
description: >-
(Optional) The name of the developer message participant.
additionalProperties: false
required:
- role
- content
title: OpenAIDeveloperMessageParam
description: >-
A message from the developer in an OpenAI-compatible chat completion request.
OpenAIImageURL:
type: object
properties:
url:
type: string
detail:
type: string
additionalProperties: false
required:
- url
title: OpenAIImageURL
OpenAIJSONSchema:
type: object
properties:
@ -6994,21 +7367,6 @@ components:
required:
- name
title: OpenAIJSONSchema
OpenAIMessageParam:
oneOf:
- $ref: '#/components/schemas/OpenAIUserMessageParam'
- $ref: '#/components/schemas/OpenAISystemMessageParam'
- $ref: '#/components/schemas/OpenAIAssistantMessageParam'
- $ref: '#/components/schemas/OpenAIToolMessageParam'
- $ref: '#/components/schemas/OpenAIDeveloperMessageParam'
discriminator:
propertyName: role
mapping:
user: '#/components/schemas/OpenAIUserMessageParam'
system: '#/components/schemas/OpenAISystemMessageParam'
assistant: '#/components/schemas/OpenAIAssistantMessageParam'
tool: '#/components/schemas/OpenAIToolMessageParam'
developer: '#/components/schemas/OpenAIDeveloperMessageParam'
OpenAIResponseFormatJSONObject:
type: object
properties:
@ -7056,93 +7414,6 @@ components:
required:
- type
title: OpenAIResponseFormatText
OpenAISystemMessageParam:
type: object
properties:
role:
type: string
const: system
default: system
description: >-
Must be "system" to identify this as a system message
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: >-
The content of the "system prompt". If multiple system messages are provided,
they are concatenated. The underlying Llama Stack code may also add other
system messages (for example, for formatting tool definitions).
name:
type: string
description: >-
(Optional) The name of the system message participant.
additionalProperties: false
required:
- role
- content
title: OpenAISystemMessageParam
description: >-
A system message providing instructions or context to the model.
OpenAIToolMessageParam:
type: object
properties:
role:
type: string
const: tool
default: tool
description: >-
Must be "tool" to identify this as a tool response
tool_call_id:
type: string
description: >-
Unique identifier for the tool call this response is for
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: The response content from the tool
additionalProperties: false
required:
- role
- tool_call_id
- content
title: OpenAIToolMessageParam
description: >-
A message representing the result of a tool invocation in an OpenAI-compatible
chat completion request.
OpenAIUserMessageParam:
type: object
properties:
role:
type: string
const: user
default: user
description: >-
Must be "user" to identify this as a user message
content:
oneOf:
- type: string
- type: array
items:
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
description: >-
The content of the message, which can include text and other media
name:
type: string
description: >-
(Optional) The name of the user message participant.
additionalProperties: false
required:
- role
- content
title: OpenAIUserMessageParam
description: >-
A message from the user in an OpenAI-compatible chat completion request.
OpenaiChatCompletionRequest:
type: object
properties:
@ -7356,30 +7627,6 @@ components:
title: OpenAIChatCompletionChunk
description: >-
Chunk from a streaming response to an OpenAI-compatible chat completion request.
OpenAIChoice:
type: object
properties:
message:
$ref: '#/components/schemas/OpenAIMessageParam'
description: The message from the model
finish_reason:
type: string
description: The reason the model stopped generating
index:
type: integer
description: The index of the choice
logprobs:
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
description: >-
(Optional) The log probabilities for the tokens in the message
additionalProperties: false
required:
- message
- finish_reason
- index
title: OpenAIChoice
description: >-
A choice from an OpenAI-compatible chat completion response.
OpenAIChoiceDelta:
type: object
properties:
@ -7401,26 +7648,6 @@ components:
title: OpenAIChoiceDelta
description: >-
A delta from an OpenAI-compatible chat completion streaming response.
OpenAIChoiceLogprobs:
type: object
properties:
content:
type: array
items:
$ref: '#/components/schemas/OpenAITokenLogProb'
description: >-
(Optional) The log probabilities for the tokens in the message
refusal:
type: array
items:
$ref: '#/components/schemas/OpenAITokenLogProb'
description: >-
(Optional) The log probabilities for the tokens in the message
additionalProperties: false
title: OpenAIChoiceLogprobs
description: >-
The log probabilities for the tokens in the message from an OpenAI-compatible
chat completion response.
OpenAIChunkChoice:
type: object
properties:
@ -7445,49 +7672,6 @@ components:
title: OpenAIChunkChoice
description: >-
A chunk choice from an OpenAI-compatible chat completion streaming response.
OpenAITokenLogProb:
type: object
properties:
token:
type: string
bytes:
type: array
items:
type: integer
logprob:
type: number
top_logprobs:
type: array
items:
$ref: '#/components/schemas/OpenAITopLogProb'
additionalProperties: false
required:
- token
- logprob
- top_logprobs
title: OpenAITokenLogProb
description: >-
The log probability for a token from an OpenAI-compatible chat completion
response.
OpenAITopLogProb:
type: object
properties:
token:
type: string
bytes:
type: array
items:
type: integer
logprob:
type: number
additionalProperties: false
required:
- token
- logprob
title: OpenAITopLogProb
description: >-
The top log probability for a token from an OpenAI-compatible chat completion
response.
OpenaiCompletionRequest:
type: object
properties:

View file

@ -759,7 +759,7 @@ class Generator:
)
return Operation(
tags=[op.defining_class.__name__],
tags=[getattr(op.defining_class, "API_NAMESPACE", op.defining_class.__name__)],
summary=None,
# summary=doc_string.short_description,
description=description,
@ -805,6 +805,8 @@ class Generator:
operation_tags: List[Tag] = []
for cls in endpoint_classes:
doc_string = parse_type(cls)
if hasattr(cls, "API_NAMESPACE") and cls.API_NAMESPACE != cls.__name__:
continue
operation_tags.append(
Tag(
name=cls.__name__,

View file

@ -820,15 +820,32 @@ class BatchChatCompletionResponse(BaseModel):
batch: list[ChatCompletionResponse]
class OpenAICompletionWithInputMessages(OpenAIChatCompletion):
input_messages: list[OpenAIMessageParam]
@json_schema_type
class ListOpenAIChatCompletionResponse(BaseModel):
data: list[OpenAICompletionWithInputMessages]
has_more: bool
first_id: str
last_id: str
object: Literal["list"] = "list"
class Order(Enum):
asc = "asc"
desc = "desc"
@runtime_checkable
@trace_protocol
class Inference(Protocol):
"""Llama Stack Inference API for generating completions, chat completions, and embeddings.
This API provides the raw interface to the underlying models. Two kinds of models are supported:
- LLM models: these models generate "raw" and "chat" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic search.
class InferenceProvider(Protocol):
"""
This protocol defines the interface that should be implemented by all inference providers.
"""
API_NAMESPACE: str = "Inference"
model_store: ModelStore | None = None
@ -1062,3 +1079,39 @@ class Inference(Protocol):
:returns: An OpenAIChatCompletion.
"""
...
class Inference(InferenceProvider):
"""Llama Stack Inference API for generating completions, chat completions, and embeddings.
This API provides the raw interface to the underlying models. Two kinds of models are supported:
- LLM models: these models generate "raw" and "chat" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic search.
"""
@webmethod(route="/openai/v1/chat/completions", method="GET")
async def list_chat_completions(
self,
after: str | None = None,
limit: int | None = 20,
model: str | None = None,
order: Order | None = Order.desc,
) -> ListOpenAIChatCompletionResponse:
"""List all chat completions.
:param after: The ID of the last chat completion to return.
:param limit: The maximum number of chat completions to return.
:param model: The model to filter by.
:param order: The order to sort the chat completions by: "asc" or "desc". Defaults to "desc".
:returns: A ListOpenAIChatCompletionResponse.
"""
raise NotImplementedError("List chat completions is not implemented")
@webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET")
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
"""Describe a chat completion by its ID.
:param completion_id: ID of the chat completion.
:returns: A OpenAICompletionWithInputMessages.
"""
raise NotImplementedError("Get chat completion is not implemented")

View file

@ -13,7 +13,7 @@ from llama_stack.apis.datasetio import DatasetIO
from llama_stack.apis.datasets import Datasets
from llama_stack.apis.eval import Eval
from llama_stack.apis.files import Files
from llama_stack.apis.inference import Inference
from llama_stack.apis.inference import Inference, InferenceProvider
from llama_stack.apis.inspect import Inspect
from llama_stack.apis.models import Models
from llama_stack.apis.post_training import PostTraining
@ -83,6 +83,13 @@ def api_protocol_map() -> dict[Api, Any]:
}
def api_protocol_map_for_compliance_check() -> dict[Api, Any]:
return {
**api_protocol_map(),
Api.inference: InferenceProvider,
}
def additional_protocols_map() -> dict[Api, Any]:
return {
Api.inference: (ModelsProtocolPrivate, Models, Api.models),
@ -302,9 +309,6 @@ async def instantiate_provider(
inner_impls: dict[str, Any],
dist_registry: DistributionRegistry,
):
protocols = api_protocol_map()
additional_protocols = additional_protocols_map()
provider_spec = provider.spec
if not hasattr(provider_spec, "module"):
raise AttributeError(f"ProviderSpec of type {type(provider_spec)} does not have a 'module' attribute")
@ -342,6 +346,8 @@ async def instantiate_provider(
impl.__provider_spec__ = provider_spec
impl.__provider_config__ = config
protocols = api_protocol_map_for_compliance_check()
additional_protocols = additional_protocols_map()
# TODO: check compliance for special tool groups
# the impl should be for Api.tool_runtime, the name should be the special tool group, the protocol should be the special tool group protocol
check_protocol_compliance(impl, protocols[provider_spec.api])

View file

@ -28,7 +28,7 @@ from llama_stack.apis.inference import (
CompletionRequest,
CompletionResponse,
CompletionResponseStreamChunk,
Inference,
InferenceProvider,
InterleavedContent,
LogProbConfig,
Message,
@ -86,7 +86,7 @@ class MetaReferenceInferenceImpl(
OpenAICompletionToLlamaStackMixin,
OpenAIChatCompletionToLlamaStackMixin,
SentenceTransformerEmbeddingMixin,
Inference,
InferenceProvider,
ModelsProtocolPrivate,
):
def __init__(self, config: MetaReferenceInferenceConfig) -> None:

View file

@ -9,7 +9,7 @@ from collections.abc import AsyncGenerator
from llama_stack.apis.inference import (
CompletionResponse,
Inference,
InferenceProvider,
InterleavedContent,
LogProbConfig,
Message,
@ -38,7 +38,7 @@ class SentenceTransformersInferenceImpl(
OpenAIChatCompletionToLlamaStackMixin,
OpenAICompletionToLlamaStackMixin,
SentenceTransformerEmbeddingMixin,
Inference,
InferenceProvider,
ModelsProtocolPrivate,
):
def __init__(self, config: SentenceTransformersInferenceConfig) -> None:

View file

@ -4,12 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference import Inference
from llama_stack.apis.inference import InferenceProvider
from .config import CerebrasCompatConfig
async def get_adapter_impl(config: CerebrasCompatConfig, _deps) -> Inference:
async def get_adapter_impl(config: CerebrasCompatConfig, _deps) -> InferenceProvider:
# import dynamically so the import is used only when it is needed
from .cerebras import CerebrasCompatInferenceAdapter

View file

@ -4,12 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference import Inference
from llama_stack.apis.inference import InferenceProvider
from .config import FireworksCompatConfig
async def get_adapter_impl(config: FireworksCompatConfig, _deps) -> Inference:
async def get_adapter_impl(config: FireworksCompatConfig, _deps) -> InferenceProvider:
# import dynamically so the import is used only when it is needed
from .fireworks import FireworksCompatInferenceAdapter

View file

@ -4,12 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference import Inference
from llama_stack.apis.inference import InferenceProvider
from .config import GroqCompatConfig
async def get_adapter_impl(config: GroqCompatConfig, _deps) -> Inference:
async def get_adapter_impl(config: GroqCompatConfig, _deps) -> InferenceProvider:
# import dynamically so the import is used only when it is needed
from .groq import GroqCompatInferenceAdapter

View file

@ -4,12 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference import Inference
from llama_stack.apis.inference import InferenceProvider
from .config import LlamaCompatConfig
async def get_adapter_impl(config: LlamaCompatConfig, _deps) -> Inference:
async def get_adapter_impl(config: LlamaCompatConfig, _deps) -> InferenceProvider:
# import dynamically so the import is used only when it is needed
from .llama import LlamaCompatInferenceAdapter

View file

@ -28,7 +28,7 @@ from llama_stack.apis.inference import (
EmbeddingsResponse,
EmbeddingTaskType,
GrammarResponseFormat,
Inference,
InferenceProvider,
JsonSchemaResponseFormat,
LogProbConfig,
Message,
@ -82,7 +82,7 @@ logger = get_logger(name=__name__, category="inference")
class OllamaInferenceAdapter(
Inference,
InferenceProvider,
ModelsProtocolPrivate,
):
def __init__(self, url: str) -> None:

View file

@ -4,12 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference import Inference
from llama_stack.apis.inference import InferenceProvider
from .config import SambaNovaCompatConfig
async def get_adapter_impl(config: SambaNovaCompatConfig, _deps) -> Inference:
async def get_adapter_impl(config: SambaNovaCompatConfig, _deps) -> InferenceProvider:
# import dynamically so the import is used only when it is needed
from .sambanova import SambaNovaCompatInferenceAdapter

View file

@ -4,12 +4,12 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.apis.inference import Inference
from llama_stack.apis.inference import InferenceProvider
from .config import TogetherCompatConfig
async def get_adapter_impl(config: TogetherCompatConfig, _deps) -> Inference:
async def get_adapter_impl(config: TogetherCompatConfig, _deps) -> InferenceProvider:
# import dynamically so the import is used only when it is needed
from .together import TogetherCompatInferenceAdapter

View file

@ -19,7 +19,7 @@ from llama_stack.apis.inference import (
ChatCompletionResponseStreamChunk,
EmbeddingsResponse,
EmbeddingTaskType,
Inference,
InferenceProvider,
JsonSchemaResponseFormat,
LogProbConfig,
Message,
@ -59,7 +59,7 @@ logger = get_logger(name=__name__, category="inference")
class LiteLLMOpenAIMixin(
ModelRegistryHelper,
Inference,
InferenceProvider,
NeedsRequestProviderData,
):
# TODO: avoid exposing the litellm specific model names to the user.