chore(api): remove deprecated embeddings impls

2025-12-19 09:49:39 +00:00 · 2025-09-02 02:02:02 -04:00 · 2025-09-02 02:02:02 -04:00 · 30998fd1ff
commit 30998fd1ff
parent 478b4ff1e6
20 changed files with 3 additions and 927 deletions
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@ -16,7 +16,6 @@ from pydantic import Field, TypeAdapter

 from llama_stack.apis.common.content_types import (
    InterleavedContent,
-    InterleavedContentItem,
 )
 from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
 from llama_stack.apis.inference import (
@ -28,8 +27,6 @@ from llama_stack.apis.inference import (
    CompletionMessage,
    CompletionResponse,
    CompletionResponseStreamChunk,
-    EmbeddingsResponse,
-    EmbeddingTaskType,
    Inference,
    ListOpenAIChatCompletionResponse,
    LogProbConfig,
@ -50,7 +47,6 @@ from llama_stack.apis.inference import (
    ResponseFormat,
    SamplingParams,
    StopReason,
-    TextTruncation,
    ToolChoice,
    ToolConfig,
    ToolDefinition,
@ -347,25 +343,6 @@ class InferenceRouter(Inference):
        provider = await self.routing_table.get_provider_impl(model_id)
        return await provider.batch_completion(model_id, content_batch, sampling_params, response_format, logprobs)

-    async def embeddings(
-        self,
-        model_id: str,
-        contents: list[str] | list[InterleavedContentItem],
-        text_truncation: TextTruncation | None = TextTruncation.none,
-        output_dimension: int | None = None,
-        task_type: EmbeddingTaskType | None = None,
-    ) -> EmbeddingsResponse:
-        logger.debug(f"InferenceRouter.embeddings: {model_id}")
-        await self._get_model(model_id, ModelType.embedding)
-        provider = await self.routing_table.get_provider_impl(model_id)
-        return await provider.embeddings(
-            model_id=model_id,
-            contents=contents,
-            text_truncation=text_truncation,
-            output_dimension=output_dimension,
-            task_type=task_type,
-        )
-
    async def openai_completion(
        self,
        model: str,