OpenAI compat embeddings API

2025-06-28 19:04:19 +00:00 · 2025-05-29 15:27:59 -07:00 · 2025-05-29 15:27:59 -07:00 · f2c2a05f58
commit f2c2a05f58
parent 2603f10f95
20 changed files with 706 additions and 0 deletions
--- a/llama_stack/providers/inline/inference/vllm/vllm.py
+++ b/llama_stack/providers/inline/inference/vllm/vllm.py
@ -40,6 +40,7 @@ from llama_stack.apis.inference import (
    JsonSchemaResponseFormat,
    LogProbConfig,
    Message,
+    OpenAIEmbeddingsResponse,
    ResponseFormat,
    SamplingParams,
    TextTruncation,
@ -410,6 +411,16 @@ class VLLMInferenceImpl(
    ) -> EmbeddingsResponse:
        raise NotImplementedError()

+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()
+
    async def chat_completion(
        self,
        model_id: str,