chore: disable openai_embeddings on inference=remote::llama-openai-compat (#3704)

# What does this PR do? api.llama.com does not provide embedding models, this makes that clear ## Test Plan ci
2025-12-03 18:00:36 +00:00 · 2025-10-06 13:27:40 -04:00 · 2025-10-06 13:27:40 -04:00 · de9940c697
commit de9940c697
parent ae74b31ae3
1 changed files with 11 additions and 1 deletions
--- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@ -5,7 +5,7 @@
 # the root directory of this source tree.
 from typing import Any
-from llama_stack.apis.inference.inference import OpenAICompletion
+from llama_stack.apis.inference.inference import OpenAICompletion, OpenAIEmbeddingsResponse
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@ -56,3 +56,13 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
        suffix: str | None = None,
    ) -> OpenAICompletion:
        raise NotImplementedError()
    async def openai_embeddings(
        self,
        model: str,
        input: str | list[str],
        encoding_format: str | None = "float",
        dimensions: int | None = None,
        user: str | None = None,
    ) -> OpenAIEmbeddingsResponse:
        raise NotImplementedError()