chore: disable openai_embeddings on inference=remote::llama-openai-compat (#3704)

# What does this PR do?

api.llama.com does not provide embedding models, this makes that clear


## Test Plan

ci
This commit is contained in:
Matthew Farrellee 2025-10-06 13:27:40 -04:00 committed by GitHub
parent ae74b31ae3
commit de9940c697
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -5,7 +5,7 @@
# the root directory of this source tree.
from typing import Any
from llama_stack.apis.inference.inference import OpenAICompletion
from llama_stack.apis.inference.inference import OpenAICompletion, OpenAIEmbeddingsResponse
from llama_stack.log import get_logger
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@ -56,3 +56,13 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
suffix: str | None = None,
) -> OpenAICompletion:
raise NotImplementedError()
async def openai_embeddings(
self,
model: str,
input: str | list[str],
encoding_format: str | None = "float",
dimensions: int | None = None,
user: str | None = None,
) -> OpenAIEmbeddingsResponse:
raise NotImplementedError()