From 412ea00c0b6a3e4055cb34fc03ad0d38203458c5 Mon Sep 17 00:00:00 2001 From: Justin Date: Mon, 6 Oct 2025 15:11:27 -0400 Subject: [PATCH] Remove openai embedding override We can just use the default, runpod embedding endpoint for vllm is nothing special and just passes through to vllm --- .../remote/inference/runpod/runpod.py | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/llama_stack/providers/remote/inference/runpod/runpod.py b/llama_stack/providers/remote/inference/runpod/runpod.py index 09cf68504..ab5e1b60c 100644 --- a/llama_stack/providers/remote/inference/runpod/runpod.py +++ b/llama_stack/providers/remote/inference/runpod/runpod.py @@ -7,7 +7,6 @@ from typing import Any from llama_stack.apis.inference import ( - OpenAIEmbeddingsResponse, OpenAIMessageParam, OpenAIResponseFormatParam, ) @@ -114,25 +113,3 @@ class RunpodInferenceAdapter(OpenAIMixin): ) return model - - async def openai_embeddings( - self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, - ) -> OpenAIEmbeddingsResponse: - # Resolve model_id to provider_resource_id - model_obj = await self.model_store.get_model(model) - provider_model_id = model_obj.provider_resource_id or model - - response = await self.client.embeddings.create( - model=provider_model_id, - input=input, - encoding_format=encoding_format, - dimensions=dimensions, - user=user, - ) - - return response