feat: New OpenAI compat embeddings API (#2314)

# What does this PR do?
Adds a new endpoint that is compatible with OpenAI for embeddings api. 
`/openai/v1/embeddings`
Added providers for OpenAI, LiteLLM and SentenceTransformer. 


## Test Plan
```
LLAMA_STACK_CONFIG=http://localhost:8321 pytest -sv tests/integration/inference/test_openai_embeddings.py --embedding-model all-MiniLM-L6-v2,text-embedding-3-small,gemini/text-embedding-004
```
This commit is contained in:
Hardik Shah 2025-05-31 22:11:47 -07:00 committed by Sumit Jaiswal
parent 455939e63c
commit 6ec2ed4196
No known key found for this signature in database
GPG key ID: A4604B39D64D6AEC
21 changed files with 981 additions and 0 deletions

View file

@ -38,6 +38,7 @@ from llama_stack.apis.inference import (
JsonSchemaResponseFormat,
LogProbConfig,
Message,
OpenAIEmbeddingsResponse,
ResponseFormat,
SamplingParams,
TextTruncation,
@ -507,6 +508,16 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
embeddings = [data.embedding for data in response.data]
return EmbeddingsResponse(embeddings=embeddings)
async def openai_embeddings(
self,
model: str,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse:
raise NotImplementedError()
async def openai_completion(
self,
model: str,