chore(api): remove deprecated embeddings impls (#3301)

# What does this PR do? remove deprecated embeddings implementations
2025-12-04 02:03:44 +00:00 · 2025-09-29 14:45:09 -04:00 · 2025-09-29 14:45:09 -04:00 · 975ead1d6a
commit 975ead1d6a
parent aab22dc759
19 changed files with 3 additions and 632 deletions
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -16,7 +16,6 @@ from openai.types.chat.chat_completion_chunk import (

 from llama_stack.apis.common.content_types import (
    InterleavedContent,
-    InterleavedContentItem,
    TextDelta,
    ToolCallDelta,
    ToolCallParseStatus,
@ -31,8 +30,6 @@ from llama_stack.apis.inference import (
    CompletionRequest,
    CompletionResponse,
    CompletionResponseStreamChunk,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
    GrammarResponseFormat,
    Inference,
    JsonSchemaResponseFormat,
@ -41,7 +38,6 @@ from llama_stack.apis.inference import (
    ModelStore,
    ResponseFormat,
    SamplingParams,
-    TextTruncation,
    ToolChoice,
    ToolConfig,
    ToolDefinition,
@ -74,8 +70,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
 from llama_stack.providers.utils.inference.prompt_adapter import (
    completion_request_to_prompt,
-    content_has_media,
-    interleaved_content_as_str,
    request_has_media,
 )

@ -550,27 +544,3 @@ class VLLMInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin, Inference, ModelsPro
            "stream": request.stream,
            **options,
        }
-
-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        model = await self._get_model(model_id)
-
-        kwargs = {}
-        assert model.model_type == ModelType.embedding
-        assert model.metadata.get("embedding_dimension")
-        kwargs["dimensions"] = model.metadata.get("embedding_dimension")
-        assert all(not content_has_media(content) for content in contents), "VLLM does not support media for embeddings"
-        response = await self.client.embeddings.create(
-            model=model.provider_resource_id,
-            input=[interleaved_content_as_str(content) for content in contents],
-            **kwargs,
-        )
-
-        embeddings = [data.embedding for data in response.data]
-        return EmbeddingsResponse(embeddings=embeddings)