forked from phoenix-oss/llama-stack-mirror
feat: New OpenAI compat embeddings API (#2314)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 4s
Integration Tests / test-matrix (http, inspect) (push) Failing after 9s
Integration Tests / test-matrix (http, inference) (push) Failing after 9s
Integration Tests / test-matrix (http, datasets) (push) Failing after 10s
Integration Tests / test-matrix (http, post_training) (push) Failing after 9s
Integration Tests / test-matrix (library, agents) (push) Failing after 7s
Integration Tests / test-matrix (http, agents) (push) Failing after 10s
Integration Tests / test-matrix (http, tool_runtime) (push) Failing after 8s
Integration Tests / test-matrix (http, providers) (push) Failing after 9s
Integration Tests / test-matrix (library, datasets) (push) Failing after 8s
Integration Tests / test-matrix (library, inference) (push) Failing after 9s
Integration Tests / test-matrix (http, scoring) (push) Failing after 10s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Integration Tests / test-matrix (library, providers) (push) Failing after 7s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 6s
Integration Tests / test-matrix (library, inspect) (push) Failing after 9s
Test Llama Stack Build / build-single-provider (push) Failing after 7s
Integration Tests / test-matrix (library, scoring) (push) Failing after 9s
Integration Tests / test-matrix (library, post_training) (push) Failing after 9s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 7s
Integration Tests / test-matrix (library, tool_runtime) (push) Failing after 10s
Unit Tests / unit-tests (3.11) (push) Failing after 7s
Test Llama Stack Build / build (push) Failing after 5s
Unit Tests / unit-tests (3.10) (push) Failing after 7s
Update ReadTheDocs / update-readthedocs (push) Failing after 6s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Unit Tests / unit-tests (3.13) (push) Failing after 7s
Test External Providers / test-external-providers (venv) (push) Failing after 26s
Pre-commit / pre-commit (push) Successful in 1m11s
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 4s
Integration Tests / test-matrix (http, inspect) (push) Failing after 9s
Integration Tests / test-matrix (http, inference) (push) Failing after 9s
Integration Tests / test-matrix (http, datasets) (push) Failing after 10s
Integration Tests / test-matrix (http, post_training) (push) Failing after 9s
Integration Tests / test-matrix (library, agents) (push) Failing after 7s
Integration Tests / test-matrix (http, agents) (push) Failing after 10s
Integration Tests / test-matrix (http, tool_runtime) (push) Failing after 8s
Integration Tests / test-matrix (http, providers) (push) Failing after 9s
Integration Tests / test-matrix (library, datasets) (push) Failing after 8s
Integration Tests / test-matrix (library, inference) (push) Failing after 9s
Integration Tests / test-matrix (http, scoring) (push) Failing after 10s
Test Llama Stack Build / generate-matrix (push) Successful in 6s
Integration Tests / test-matrix (library, providers) (push) Failing after 7s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 6s
Integration Tests / test-matrix (library, inspect) (push) Failing after 9s
Test Llama Stack Build / build-single-provider (push) Failing after 7s
Integration Tests / test-matrix (library, scoring) (push) Failing after 9s
Integration Tests / test-matrix (library, post_training) (push) Failing after 9s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 7s
Integration Tests / test-matrix (library, tool_runtime) (push) Failing after 10s
Unit Tests / unit-tests (3.11) (push) Failing after 7s
Test Llama Stack Build / build (push) Failing after 5s
Unit Tests / unit-tests (3.10) (push) Failing after 7s
Update ReadTheDocs / update-readthedocs (push) Failing after 6s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Unit Tests / unit-tests (3.13) (push) Failing after 7s
Test External Providers / test-external-providers (venv) (push) Failing after 26s
Pre-commit / pre-commit (push) Successful in 1m11s
# What does this PR do? Adds a new endpoint that is compatible with OpenAI for embeddings api. `/openai/v1/embeddings` Added providers for OpenAI, LiteLLM and SentenceTransformer. ## Test Plan ``` LLAMA_STACK_CONFIG=http://localhost:8321 pytest -sv tests/integration/inference/test_openai_embeddings.py --embedding-model all-MiniLM-L6-v2,text-embedding-3-small,gemini/text-embedding-004 ```
This commit is contained in:
parent
277f8690ef
commit
b21050935e
21 changed files with 981 additions and 0 deletions
176
docs/_static/llama-stack-spec.html
vendored
176
docs/_static/llama-stack-spec.html
vendored
|
@ -3607,6 +3607,49 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/embeddings": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "An OpenAIEmbeddingsResponse containing the embeddings.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/OpenAIEmbeddingsResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Inference"
|
||||
],
|
||||
"description": "Generate OpenAI-compatible embeddings for the given input using the specified model.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/OpenaiEmbeddingsRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/models": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
@ -11777,6 +11820,139 @@
|
|||
"title": "OpenAICompletionChoice",
|
||||
"description": "A choice from an OpenAI-compatible completion response."
|
||||
},
|
||||
"OpenaiEmbeddingsRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint."
|
||||
},
|
||||
"input": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "Input text to embed, encoded as a string or array of strings. To embed multiple inputs in a single request, pass an array of strings."
|
||||
},
|
||||
"encoding_format": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The format to return the embeddings in. Can be either \"float\" or \"base64\". Defaults to \"float\"."
|
||||
},
|
||||
"dimensions": {
|
||||
"type": "integer",
|
||||
"description": "(Optional) The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models."
|
||||
},
|
||||
"user": {
|
||||
"type": "string",
|
||||
"description": "(Optional) A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"model",
|
||||
"input"
|
||||
],
|
||||
"title": "OpenaiEmbeddingsRequest"
|
||||
},
|
||||
"OpenAIEmbeddingData": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "embedding",
|
||||
"default": "embedding",
|
||||
"description": "The object type, which will be \"embedding\""
|
||||
},
|
||||
"embedding": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "number"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
}
|
||||
],
|
||||
"description": "The embedding vector as a list of floats (when encoding_format=\"float\") or as a base64-encoded string (when encoding_format=\"base64\")"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "The index of the embedding in the input list"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"object",
|
||||
"embedding",
|
||||
"index"
|
||||
],
|
||||
"title": "OpenAIEmbeddingData",
|
||||
"description": "A single embedding data object from an OpenAI-compatible embeddings response."
|
||||
},
|
||||
"OpenAIEmbeddingUsage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prompt_tokens": {
|
||||
"type": "integer",
|
||||
"description": "The number of tokens in the input"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer",
|
||||
"description": "The total number of tokens used"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"prompt_tokens",
|
||||
"total_tokens"
|
||||
],
|
||||
"title": "OpenAIEmbeddingUsage",
|
||||
"description": "Usage information for an OpenAI-compatible embeddings response."
|
||||
},
|
||||
"OpenAIEmbeddingsResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "list",
|
||||
"default": "list",
|
||||
"description": "The object type, which will be \"list\""
|
||||
},
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIEmbeddingData"
|
||||
},
|
||||
"description": "List of embedding data objects"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model that was used to generate the embeddings"
|
||||
},
|
||||
"usage": {
|
||||
"$ref": "#/components/schemas/OpenAIEmbeddingUsage",
|
||||
"description": "Usage information"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"object",
|
||||
"data",
|
||||
"model",
|
||||
"usage"
|
||||
],
|
||||
"title": "OpenAIEmbeddingsResponse",
|
||||
"description": "Response from an OpenAI-compatible embeddings request."
|
||||
},
|
||||
"OpenAIModel": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue