feat: New OpenAI compat embeddings API (#2314)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 4s
Integration Tests / test-matrix (http, inspect) (push) Failing after 9s
Integration Tests / test-matrix (http, inference) (push) Failing after 9s
Integration Tests / test-matrix (http, datasets) (push) Failing after 10s
Integration Tests / test-matrix (http, post_training) (push) Failing after 9s
Integration Tests / test-matrix (library, agents) (push) Failing after 7s
Integration Tests / test-matrix (http, agents) (push) Failing after 10s
Integration Tests / test-matrix (http, tool_runtime) (push) Failing after 8s
Integration Tests / test-matrix (http, providers) (push) Failing after 9s
Integration Tests / test-matrix (library, datasets) (push) Failing after 8s
Integration Tests / test-matrix (library, inference) (push) Failing after 9s
Integration Tests / test-matrix (http, scoring) (push) Failing after 10s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Integration Tests / test-matrix (library, providers) (push) Failing after 7s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 6s
Integration Tests / test-matrix (library, inspect) (push) Failing after 9s
Test Llama Stack Build / build-single-provider (push) Failing after 7s
Integration Tests / test-matrix (library, scoring) (push) Failing after 9s
Integration Tests / test-matrix (library, post_training) (push) Failing after 9s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 7s
Integration Tests / test-matrix (library, tool_runtime) (push) Failing after 10s
Unit Tests / unit-tests (3.11) (push) Failing after 7s
Test Llama Stack Build / build (push) Failing after 5s
Unit Tests / unit-tests (3.10) (push) Failing after 7s
Update ReadTheDocs / update-readthedocs (push) Failing after 6s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Unit Tests / unit-tests (3.13) (push) Failing after 7s
Test External Providers / test-external-providers (venv) (push) Failing after 26s
Pre-commit / pre-commit (push) Successful in 1m11s

# What does this PR do?
Adds a new endpoint that is compatible with OpenAI for embeddings api. 
`/openai/v1/embeddings`
Added providers for OpenAI, LiteLLM and SentenceTransformer. 


## Test Plan
```
LLAMA_STACK_CONFIG=http://localhost:8321 pytest -sv tests/integration/inference/test_openai_embeddings.py --embedding-model all-MiniLM-L6-v2,text-embedding-3-small,gemini/text-embedding-004
```
This commit is contained in:
Hardik Shah 2025-05-31 22:11:47 -07:00 committed by GitHub
parent 277f8690ef
commit b21050935e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 981 additions and 0 deletions

View file

@ -3607,6 +3607,49 @@
}
}
},
"/v1/openai/v1/embeddings": {
"post": {
"responses": {
"200": {
"description": "An OpenAIEmbeddingsResponse containing the embeddings.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OpenAIEmbeddingsResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Inference"
],
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OpenaiEmbeddingsRequest"
}
}
},
"required": true
}
}
},
"/v1/openai/v1/models": {
"get": {
"responses": {
@ -11777,6 +11820,139 @@
"title": "OpenAICompletionChoice",
"description": "A choice from an OpenAI-compatible completion response."
},
"OpenaiEmbeddingsRequest": {
"type": "object",
"properties": {
"model": {
"type": "string",
"description": "The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint."
},
"input": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
],
"description": "Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings."
},
"encoding_format": {
"type": "string",
"description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"."
},
"dimensions": {
"type": "integer",
"description": "(Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models."
},
"user": {
"type": "string",
"description": "(Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse."
}
},
"additionalProperties": false,
"required": [
"model",
"input"
],
"title": "OpenaiEmbeddingsRequest"
},
"OpenAIEmbeddingData": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "embedding",
"default": "embedding",
"description": "The object type, which will be \"embedding\""
},
"embedding": {
"oneOf": [
{
"type": "array",
"items": {
"type": "number"
}
},
{
"type": "string"
}
],
"description": "The embedding vector as a list of floats (when encoding_format=\"float\") or as a base64-encoded string (when encoding_format=\"base64\")"
},
"index": {
"type": "integer",
"description": "The index of the embedding in the input list"
}
},
"additionalProperties": false,
"required": [
"object",
"embedding",
"index"
],
"title": "OpenAIEmbeddingData",
"description": "A single embedding data object from an OpenAI-compatible embeddings response."
},
"OpenAIEmbeddingUsage": {
"type": "object",
"properties": {
"prompt_tokens": {
"type": "integer",
"description": "The number of tokens in the input"
},
"total_tokens": {
"type": "integer",
"description": "The total number of tokens used"
}
},
"additionalProperties": false,
"required": [
"prompt_tokens",
"total_tokens"
],
"title": "OpenAIEmbeddingUsage",
"description": "Usage information for an OpenAI-compatible embeddings response."
},
"OpenAIEmbeddingsResponse": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "list",
"default": "list",
"description": "The object type, which will be \"list\""
},
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/OpenAIEmbeddingData"
},
"description": "List of embedding data objects"
},
"model": {
"type": "string",
"description": "The model that was used to generate the embeddings"
},
"usage": {
"$ref": "#/components/schemas/OpenAIEmbeddingUsage",
"description": "Usage information"
}
},
"additionalProperties": false,
"required": [
"object",
"data",
"model",
"usage"
],
"title": "OpenAIEmbeddingsResponse",
"description": "Response from an OpenAI-compatible embeddings request."
},
"OpenAIModel": {
"type": "object",
"properties": {

View file

@ -2520,6 +2520,38 @@ paths:
schema:
$ref: '#/components/schemas/OpenaiCompletionRequest'
required: true
/v1/openai/v1/embeddings:
post:
responses:
'200':
description: >-
An OpenAIEmbeddingsResponse containing the embeddings.
content:
application/json:
schema:
$ref: '#/components/schemas/OpenAIEmbeddingsResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
description: >-
Generate OpenAI-compatible embeddings for the given input using the specified
model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/OpenaiEmbeddingsRequest'
required: true
/v1/openai/v1/models:
get:
responses:
@ -8197,6 +8229,118 @@ components:
title: OpenAICompletionChoice
description: >-
A choice from an OpenAI-compatible completion response.
OpenaiEmbeddingsRequest:
type: object
properties:
model:
type: string
description: >-
The identifier of the model to use. The model must be an embedding model
registered with Llama Stack and available via the /models endpoint.
input:
oneOf:
- type: string
- type: array
items:
type: string
description: >-
Input text to embed, encoded as a string or array of strings. To embed
multiple inputs in a single request, pass an array of strings.
encoding_format:
type: string
description: >-
(Optional) The format to return the embeddings in. Can be either "float"
or "base64". Defaults to "float".
dimensions:
type: integer
description: >-
(Optional) The number of dimensions the resulting output embeddings should
have. Only supported in text-embedding-3 and later models.
user:
type: string
description: >-
(Optional) A unique identifier representing your end-user, which can help
OpenAI to monitor and detect abuse.
additionalProperties: false
required:
- model
- input
title: OpenaiEmbeddingsRequest
OpenAIEmbeddingData:
type: object
properties:
object:
type: string
const: embedding
default: embedding
description: >-
The object type, which will be "embedding"
embedding:
oneOf:
- type: array
items:
type: number
- type: string
description: >-
The embedding vector as a list of floats (when encoding_format="float")
or as a base64-encoded string (when encoding_format="base64")
index:
type: integer
description: >-
The index of the embedding in the input list
additionalProperties: false
required:
- object
- embedding
- index
title: OpenAIEmbeddingData
description: >-
A single embedding data object from an OpenAI-compatible embeddings response.
OpenAIEmbeddingUsage:
type: object
properties:
prompt_tokens:
type: integer
description: The number of tokens in the input
total_tokens:
type: integer
description: The total number of tokens used
additionalProperties: false
required:
- prompt_tokens
- total_tokens
title: OpenAIEmbeddingUsage
description: >-
Usage information for an OpenAI-compatible embeddings response.
OpenAIEmbeddingsResponse:
type: object
properties:
object:
type: string
const: list
default: list
description: The object type, which will be "list"
data:
type: array
items:
$ref: '#/components/schemas/OpenAIEmbeddingData'
description: List of embedding data objects
model:
type: string
description: >-
The model that was used to generate the embeddings
usage:
$ref: '#/components/schemas/OpenAIEmbeddingUsage'
description: Usage information
additionalProperties: false
required:
- object
- data
- model
- usage
title: OpenAIEmbeddingsResponse
description: >-
Response from an OpenAI-compatible embeddings request.
OpenAIModel:
type: object
properties: