diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py index 403680668..165992c16 100644 --- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py +++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py @@ -5,7 +5,7 @@ # the root directory of this source tree. from typing import Any -from llama_stack.apis.inference.inference import OpenAICompletion +from llama_stack.apis.inference.inference import OpenAICompletion, OpenAIEmbeddingsResponse from llama_stack.log import get_logger from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin @@ -56,3 +56,13 @@ class LlamaCompatInferenceAdapter(OpenAIMixin): suffix: str | None = None, ) -> OpenAICompletion: raise NotImplementedError() + + async def openai_embeddings( + self, + model: str, + input: str | list[str], + encoding_format: str | None = "float", + dimensions: int | None = None, + user: str | None = None, + ) -> OpenAIEmbeddingsResponse: + raise NotImplementedError()