From de9940c697c499f33a3e74a64aac2a454e73f3bb Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Mon, 6 Oct 2025 13:27:40 -0400 Subject: [PATCH] chore: disable openai_embeddings on inference=remote::llama-openai-compat (#3704) # What does this PR do? api.llama.com does not provide embedding models, this makes that clear ## Test Plan ci --- .../remote/inference/llama_openai_compat/llama.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py index 403680668..165992c16 100644 --- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py +++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py @@ -5,7 +5,7 @@ # the root directory of this source tree. from typing import Any -from llama_stack.apis.inference.inference import OpenAICompletion +from llama_stack.apis.inference.inference import OpenAICompletion, OpenAIEmbeddingsResponse from llama_stack.log import get_logger from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin @@ -56,3 +56,13 @@ class LlamaCompatInferenceAdapter(OpenAIMixin): suffix: str | None = None, ) -> OpenAICompletion: raise NotImplementedError() + + async def openai_embeddings( + self, + model: str, + input: str | list[str], + encoding_format: str | None = "float", + dimensions: int | None = None, + user: str | None = None, + ) -> OpenAIEmbeddingsResponse: + raise NotImplementedError()