forked from phoenix-oss/llama-stack-mirror
feat: New OpenAI compat embeddings API (#2314)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 4s
Integration Tests / test-matrix (http, inspect) (push) Failing after 9s
Integration Tests / test-matrix (http, inference) (push) Failing after 9s
Integration Tests / test-matrix (http, datasets) (push) Failing after 10s
Integration Tests / test-matrix (http, post_training) (push) Failing after 9s
Integration Tests / test-matrix (library, agents) (push) Failing after 7s
Integration Tests / test-matrix (http, agents) (push) Failing after 10s
Integration Tests / test-matrix (http, tool_runtime) (push) Failing after 8s
Integration Tests / test-matrix (http, providers) (push) Failing after 9s
Integration Tests / test-matrix (library, datasets) (push) Failing after 8s
Integration Tests / test-matrix (library, inference) (push) Failing after 9s
Integration Tests / test-matrix (http, scoring) (push) Failing after 10s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Integration Tests / test-matrix (library, providers) (push) Failing after 7s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 6s
Integration Tests / test-matrix (library, inspect) (push) Failing after 9s
Test Llama Stack Build / build-single-provider (push) Failing after 7s
Integration Tests / test-matrix (library, scoring) (push) Failing after 9s
Integration Tests / test-matrix (library, post_training) (push) Failing after 9s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 7s
Integration Tests / test-matrix (library, tool_runtime) (push) Failing after 10s
Unit Tests / unit-tests (3.11) (push) Failing after 7s
Test Llama Stack Build / build (push) Failing after 5s
Unit Tests / unit-tests (3.10) (push) Failing after 7s
Update ReadTheDocs / update-readthedocs (push) Failing after 6s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Unit Tests / unit-tests (3.13) (push) Failing after 7s
Test External Providers / test-external-providers (venv) (push) Failing after 26s
Pre-commit / pre-commit (push) Successful in 1m11s
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 4s
Integration Tests / test-matrix (http, inspect) (push) Failing after 9s
Integration Tests / test-matrix (http, inference) (push) Failing after 9s
Integration Tests / test-matrix (http, datasets) (push) Failing after 10s
Integration Tests / test-matrix (http, post_training) (push) Failing after 9s
Integration Tests / test-matrix (library, agents) (push) Failing after 7s
Integration Tests / test-matrix (http, agents) (push) Failing after 10s
Integration Tests / test-matrix (http, tool_runtime) (push) Failing after 8s
Integration Tests / test-matrix (http, providers) (push) Failing after 9s
Integration Tests / test-matrix (library, datasets) (push) Failing after 8s
Integration Tests / test-matrix (library, inference) (push) Failing after 9s
Integration Tests / test-matrix (http, scoring) (push) Failing after 10s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Integration Tests / test-matrix (library, providers) (push) Failing after 7s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 6s
Integration Tests / test-matrix (library, inspect) (push) Failing after 9s
Test Llama Stack Build / build-single-provider (push) Failing after 7s
Integration Tests / test-matrix (library, scoring) (push) Failing after 9s
Integration Tests / test-matrix (library, post_training) (push) Failing after 9s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 7s
Integration Tests / test-matrix (library, tool_runtime) (push) Failing after 10s
Unit Tests / unit-tests (3.11) (push) Failing after 7s
Test Llama Stack Build / build (push) Failing after 5s
Unit Tests / unit-tests (3.10) (push) Failing after 7s
Update ReadTheDocs / update-readthedocs (push) Failing after 6s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Unit Tests / unit-tests (3.13) (push) Failing after 7s
Test External Providers / test-external-providers (venv) (push) Failing after 26s
Pre-commit / pre-commit (push) Successful in 1m11s
# What does this PR do? Adds a new endpoint that is compatible with OpenAI for embeddings api. `/openai/v1/embeddings` Added providers for OpenAI, LiteLLM and SentenceTransformer. ## Test Plan ``` LLAMA_STACK_CONFIG=http://localhost:8321 pytest -sv tests/integration/inference/test_openai_embeddings.py --embedding-model all-MiniLM-L6-v2,text-embedding-3-small,gemini/text-embedding-004 ```
This commit is contained in:
parent
277f8690ef
commit
b21050935e
21 changed files with 981 additions and 0 deletions
144
docs/_static/llama-stack-spec.yaml
vendored
144
docs/_static/llama-stack-spec.yaml
vendored
|
@ -2520,6 +2520,38 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/OpenaiCompletionRequest'
|
||||
required: true
|
||||
/v1/openai/v1/embeddings:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: >-
|
||||
An OpenAIEmbeddingsResponse containing the embeddings.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIEmbeddingsResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
description: >-
|
||||
Generate OpenAI-compatible embeddings for the given input using the specified
|
||||
model.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenaiEmbeddingsRequest'
|
||||
required: true
|
||||
/v1/openai/v1/models:
|
||||
get:
|
||||
responses:
|
||||
|
@ -8197,6 +8229,118 @@ components:
|
|||
title: OpenAICompletionChoice
|
||||
description: >-
|
||||
A choice from an OpenAI-compatible completion response.
|
||||
OpenaiEmbeddingsRequest:
|
||||
type: object
|
||||
properties:
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the model to use. The model must be an embedding model
|
||||
registered with Llama Stack and available via the /models endpoint.
|
||||
input:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
type: string
|
||||
description: >-
|
||||
Input text to embed, encoded as a string or array of strings. To embed
|
||||
multiple inputs in a single request, pass an array of strings.
|
||||
encoding_format:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The format to return the embeddings in. Can be either "float"
|
||||
or "base64". Defaults to "float".
|
||||
dimensions:
|
||||
type: integer
|
||||
description: >-
|
||||
(Optional) The number of dimensions the resulting output embeddings should
|
||||
have. Only supported in text-embedding-3 and later models.
|
||||
user:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) A unique identifier representing your end-user, which can help
|
||||
OpenAI to monitor and detect abuse.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model
|
||||
- input
|
||||
title: OpenaiEmbeddingsRequest
|
||||
OpenAIEmbeddingData:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
type: string
|
||||
const: embedding
|
||||
default: embedding
|
||||
description: >-
|
||||
The object type, which will be "embedding"
|
||||
embedding:
|
||||
oneOf:
|
||||
- type: array
|
||||
items:
|
||||
type: number
|
||||
- type: string
|
||||
description: >-
|
||||
The embedding vector as a list of floats (when encoding_format="float")
|
||||
or as a base64-encoded string (when encoding_format="base64")
|
||||
index:
|
||||
type: integer
|
||||
description: >-
|
||||
The index of the embedding in the input list
|
||||
additionalProperties: false
|
||||
required:
|
||||
- object
|
||||
- embedding
|
||||
- index
|
||||
title: OpenAIEmbeddingData
|
||||
description: >-
|
||||
A single embedding data object from an OpenAI-compatible embeddings response.
|
||||
OpenAIEmbeddingUsage:
|
||||
type: object
|
||||
properties:
|
||||
prompt_tokens:
|
||||
type: integer
|
||||
description: The number of tokens in the input
|
||||
total_tokens:
|
||||
type: integer
|
||||
description: The total number of tokens used
|
||||
additionalProperties: false
|
||||
required:
|
||||
- prompt_tokens
|
||||
- total_tokens
|
||||
title: OpenAIEmbeddingUsage
|
||||
description: >-
|
||||
Usage information for an OpenAI-compatible embeddings response.
|
||||
OpenAIEmbeddingsResponse:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
type: string
|
||||
const: list
|
||||
default: list
|
||||
description: The object type, which will be "list"
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIEmbeddingData'
|
||||
description: List of embedding data objects
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The model that was used to generate the embeddings
|
||||
usage:
|
||||
$ref: '#/components/schemas/OpenAIEmbeddingUsage'
|
||||
description: Usage information
|
||||
additionalProperties: false
|
||||
required:
|
||||
- object
|
||||
- data
|
||||
- model
|
||||
- usage
|
||||
title: OpenAIEmbeddingsResponse
|
||||
description: >-
|
||||
Response from an OpenAI-compatible embeddings request.
|
||||
OpenAIModel:
|
||||
type: object
|
||||
properties:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue