mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
chore: Add OpenAI compatiblity for vLLM embeddings (#2448)
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 2s
Integration Tests / test-matrix (http, 3.10, datasets) (push) Failing after 5s
Integration Tests / test-matrix (http, 3.10, scoring) (push) Failing after 4s
Integration Tests / test-matrix (http, 3.10, inference) (push) Failing after 7s
Integration Tests / test-matrix (http, 3.10, post_training) (push) Failing after 5s
Integration Tests / test-matrix (http, 3.10, tool_runtime) (push) Failing after 6s
Integration Tests / test-matrix (http, 3.11, agents) (push) Failing after 5s
Integration Tests / test-matrix (http, 3.10, vector_io) (push) Failing after 8s
Integration Tests / test-matrix (http, 3.10, inspect) (push) Failing after 10s
Integration Tests / test-matrix (http, 3.11, inspect) (push) Failing after 6s
Integration Tests / test-matrix (http, 3.11, post_training) (push) Failing after 6s
Integration Tests / test-matrix (http, 3.11, vector_io) (push) Failing after 5s
Integration Tests / test-matrix (http, 3.12, inspect) (push) Failing after 6s
Integration Tests / test-matrix (http, 3.12, tool_runtime) (push) Failing after 5s
Integration Tests / test-matrix (library, 3.10, inference) (push) Failing after 7s
Integration Tests / test-matrix (http, 3.10, providers) (push) Failing after 19s
Integration Tests / test-matrix (http, 3.12, post_training) (push) Failing after 12s
Integration Tests / test-matrix (http, 3.11, providers) (push) Failing after 15s
Integration Tests / test-matrix (http, 3.10, agents) (push) Failing after 21s
Integration Tests / test-matrix (http, 3.12, providers) (push) Failing after 14s
Integration Tests / test-matrix (http, 3.12, agents) (push) Failing after 16s
Integration Tests / test-matrix (http, 3.12, scoring) (push) Failing after 19s
Integration Tests / test-matrix (library, 3.10, datasets) (push) Failing after 18s
Integration Tests / test-matrix (http, 3.12, inference) (push) Failing after 21s
Integration Tests / test-matrix (library, 3.10, providers) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.10, scoring) (push) Failing after 12s
Integration Tests / test-matrix (http, 3.11, tool_runtime) (push) Failing after 23s
Integration Tests / test-matrix (library, 3.11, agents) (push) Failing after 8s
Integration Tests / test-matrix (http, 3.12, datasets) (push) Failing after 22s
Integration Tests / test-matrix (library, 3.11, providers) (push) Failing after 6s
Integration Tests / test-matrix (library, 3.10, agents) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.10, post_training) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.10, tool_runtime) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.11, inspect) (push) Failing after 7s
Integration Tests / test-matrix (library, 3.10, vector_io) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.11, post_training) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.11, vector_io) (push) Failing after 8s
Integration Tests / test-matrix (http, 3.12, vector_io) (push) Failing after 16s
Integration Tests / test-matrix (library, 3.10, inspect) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.11, tool_runtime) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.11, scoring) (push) Failing after 24s
Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 7s
Integration Tests / test-matrix (library, 3.11, scoring) (push) Failing after 6s
Integration Tests / test-matrix (http, 3.11, datasets) (push) Failing after 26s
Integration Tests / test-matrix (http, 3.11, inference) (push) Failing after 25s
Integration Tests / test-matrix (library, 3.11, datasets) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.11, inference) (push) Failing after 9s
Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 6s
Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 16s
Test Llama Stack Build / generate-matrix (push) Successful in 13s
Test External Providers / test-external-providers (venv) (push) Failing after 3s
Unit Tests / unit-tests (3.11) (push) Failing after 4s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 8s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
Test Llama Stack Build / build (push) Failing after 5s
Update ReadTheDocs / update-readthedocs (push) Failing after 5s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Unit Tests / unit-tests (3.10) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, vector_io) (push) Failing after 47s
Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 49s
Test Llama Stack Build / build-single-provider (push) Failing after 38s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 43s
Pre-commit / pre-commit (push) Successful in 1m38s
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 2s
Integration Tests / test-matrix (http, 3.10, datasets) (push) Failing after 5s
Integration Tests / test-matrix (http, 3.10, scoring) (push) Failing after 4s
Integration Tests / test-matrix (http, 3.10, inference) (push) Failing after 7s
Integration Tests / test-matrix (http, 3.10, post_training) (push) Failing after 5s
Integration Tests / test-matrix (http, 3.10, tool_runtime) (push) Failing after 6s
Integration Tests / test-matrix (http, 3.11, agents) (push) Failing after 5s
Integration Tests / test-matrix (http, 3.10, vector_io) (push) Failing after 8s
Integration Tests / test-matrix (http, 3.10, inspect) (push) Failing after 10s
Integration Tests / test-matrix (http, 3.11, inspect) (push) Failing after 6s
Integration Tests / test-matrix (http, 3.11, post_training) (push) Failing after 6s
Integration Tests / test-matrix (http, 3.11, vector_io) (push) Failing after 5s
Integration Tests / test-matrix (http, 3.12, inspect) (push) Failing after 6s
Integration Tests / test-matrix (http, 3.12, tool_runtime) (push) Failing after 5s
Integration Tests / test-matrix (library, 3.10, inference) (push) Failing after 7s
Integration Tests / test-matrix (http, 3.10, providers) (push) Failing after 19s
Integration Tests / test-matrix (http, 3.12, post_training) (push) Failing after 12s
Integration Tests / test-matrix (http, 3.11, providers) (push) Failing after 15s
Integration Tests / test-matrix (http, 3.10, agents) (push) Failing after 21s
Integration Tests / test-matrix (http, 3.12, providers) (push) Failing after 14s
Integration Tests / test-matrix (http, 3.12, agents) (push) Failing after 16s
Integration Tests / test-matrix (http, 3.12, scoring) (push) Failing after 19s
Integration Tests / test-matrix (library, 3.10, datasets) (push) Failing after 18s
Integration Tests / test-matrix (http, 3.12, inference) (push) Failing after 21s
Integration Tests / test-matrix (library, 3.10, providers) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.10, scoring) (push) Failing after 12s
Integration Tests / test-matrix (http, 3.11, tool_runtime) (push) Failing after 23s
Integration Tests / test-matrix (library, 3.11, agents) (push) Failing after 8s
Integration Tests / test-matrix (http, 3.12, datasets) (push) Failing after 22s
Integration Tests / test-matrix (library, 3.11, providers) (push) Failing after 6s
Integration Tests / test-matrix (library, 3.10, agents) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.10, post_training) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.10, tool_runtime) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.11, inspect) (push) Failing after 7s
Integration Tests / test-matrix (library, 3.10, vector_io) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.11, post_training) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.11, vector_io) (push) Failing after 8s
Integration Tests / test-matrix (http, 3.12, vector_io) (push) Failing after 16s
Integration Tests / test-matrix (library, 3.10, inspect) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.11, tool_runtime) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.11, scoring) (push) Failing after 24s
Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 7s
Integration Tests / test-matrix (library, 3.11, scoring) (push) Failing after 6s
Integration Tests / test-matrix (http, 3.11, datasets) (push) Failing after 26s
Integration Tests / test-matrix (http, 3.11, inference) (push) Failing after 25s
Integration Tests / test-matrix (library, 3.11, datasets) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.11, inference) (push) Failing after 9s
Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 12s
Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 6s
Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 16s
Test Llama Stack Build / generate-matrix (push) Successful in 13s
Test External Providers / test-external-providers (venv) (push) Failing after 3s
Unit Tests / unit-tests (3.11) (push) Failing after 4s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 8s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
Test Llama Stack Build / build (push) Failing after 5s
Update ReadTheDocs / update-readthedocs (push) Failing after 5s
Unit Tests / unit-tests (3.12) (push) Failing after 8s
Unit Tests / unit-tests (3.10) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.12, vector_io) (push) Failing after 47s
Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 49s
Test Llama Stack Build / build-single-provider (push) Failing after 38s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 43s
Pre-commit / pre-commit (push) Successful in 1m38s
# What does this PR do? - Implement OpenAI-compatible embeddings endpoint in vLLM provider - Support both float and base64 encoding formats - Add proper error handling and response formatting <!-- If resolving an issue, uncomment and update the line below --> Closes #2447 ## Test Plan <!-- Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.* --> Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
This commit is contained in:
parent
40e2c97915
commit
6f1a935365
1 changed files with 35 additions and 1 deletions
|
@ -38,7 +38,9 @@ from llama_stack.apis.inference import (
|
||||||
JsonSchemaResponseFormat,
|
JsonSchemaResponseFormat,
|
||||||
LogProbConfig,
|
LogProbConfig,
|
||||||
Message,
|
Message,
|
||||||
|
OpenAIEmbeddingData,
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
|
OpenAIEmbeddingUsage,
|
||||||
ResponseFormat,
|
ResponseFormat,
|
||||||
SamplingParams,
|
SamplingParams,
|
||||||
TextTruncation,
|
TextTruncation,
|
||||||
|
@ -536,7 +538,39 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||||
dimensions: int | None = None,
|
dimensions: int | None = None,
|
||||||
user: str | None = None,
|
user: str | None = None,
|
||||||
) -> OpenAIEmbeddingsResponse:
|
) -> OpenAIEmbeddingsResponse:
|
||||||
raise NotImplementedError()
|
self._lazy_initialize_client()
|
||||||
|
assert self.client is not None
|
||||||
|
model_obj = await self._get_model(model)
|
||||||
|
assert model_obj.model_type == ModelType.embedding
|
||||||
|
|
||||||
|
# Convert input to list if it's a string
|
||||||
|
input_list = [input] if isinstance(input, str) else input
|
||||||
|
|
||||||
|
# Call vLLM embeddings endpoint with encoding_format
|
||||||
|
response = await self.client.embeddings.create(
|
||||||
|
model=model_obj.provider_resource_id,
|
||||||
|
input=input_list,
|
||||||
|
dimensions=dimensions,
|
||||||
|
encoding_format=encoding_format,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert response to OpenAI format
|
||||||
|
data = [
|
||||||
|
OpenAIEmbeddingData(
|
||||||
|
embedding=embedding_data.embedding,
|
||||||
|
index=i,
|
||||||
|
)
|
||||||
|
for i, embedding_data in enumerate(response.data)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Not returning actual token usage since vLLM doesn't provide it
|
||||||
|
usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)
|
||||||
|
|
||||||
|
return OpenAIEmbeddingsResponse(
|
||||||
|
data=data,
|
||||||
|
model=model_obj.provider_resource_id,
|
||||||
|
usage=usage,
|
||||||
|
)
|
||||||
|
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue