chore(api): remove deprecated embeddings impls

2025-10-05 20:27:35 +00:00 · 2025-09-02 02:02:02 -04:00 · 2025-09-02 02:02:02 -04:00 · 30998fd1ff
commit 30998fd1ff
parent 478b4ff1e6
20 changed files with 3 additions and 927 deletions
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -15,7 +15,6 @@ from openai.types.chat.chat_completion_chunk import (

 from llama_stack.apis.common.content_types import (
    InterleavedContent,
-    InterleavedContentItem,
    TextDelta,
    ToolCallDelta,
    ToolCallParseStatus,
@ -30,8 +29,6 @@ from llama_stack.apis.inference import (
    CompletionRequest,
    CompletionResponse,
    CompletionResponseStreamChunk,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
    GrammarResponseFormat,
    Inference,
    JsonSchemaResponseFormat,
@ -47,7 +44,6 @@ from llama_stack.apis.inference import (
    OpenAIResponseFormatParam,
    ResponseFormat,
    SamplingParams,
-    TextTruncation,
    ToolChoice,
    ToolConfig,
    ToolDefinition,
@ -78,8 +74,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
 )
 from llama_stack.providers.utils.inference.prompt_adapter import (
    completion_request_to_prompt,
-    content_has_media,
-    interleaved_content_as_str,
    request_has_media,
 )

@ -535,32 +529,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
            **options,
        }

-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        self._lazy_initialize_client()
-        assert self.client is not None
-        model = await self._get_model(model_id)
-
-        kwargs = {}
-        assert model.model_type == ModelType.embedding
-        assert model.metadata.get("embedding_dimension")
-        kwargs["dimensions"] = model.metadata.get("embedding_dimension")
-        assert all(not content_has_media(content) for content in contents), "VLLM does not support media for embeddings"
-        response = await self.client.embeddings.create(
-            model=model.provider_resource_id,
-            input=[interleaved_content_as_str(content) for content in contents],
-            **kwargs,
-        )
-
-        embeddings = [data.embedding for data in response.data]
-        return EmbeddingsResponse(embeddings=embeddings)
-
    async def openai_embeddings(
        self,
        model: str,