From de9940c697c499f33a3e74a64aac2a454e73f3bb Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Mon, 6 Oct 2025 13:27:40 -0400
Subject: [PATCH] chore: disable openai_embeddings on
 inference=remote::llama-openai-compat (#3704)

# What does this PR do?

api.llama.com does not provide embedding models, this makes that clear


## Test Plan

ci
---
 .../remote/inference/llama_openai_compat/llama.py    | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
index 403680668..165992c16 100644
--- a/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
+++ b/llama_stack/providers/remote/inference/llama_openai_compat/llama.py
@@ -5,7 +5,7 @@
 # the root directory of this source tree.
 from typing import Any
 
-from llama_stack.apis.inference.inference import OpenAICompletion
+from llama_stack.apis.inference.inference import OpenAICompletion, OpenAIEmbeddingsResponse
 from llama_stack.log import get_logger
 from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
 from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@@ -56,3 +56,13 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
         suffix: str | None = None,
     ) -> OpenAICompletion:
         raise NotImplementedError()
+
+    async def openai_embeddings(
+        self,
+        model: str,
+        input: str | list[str],
+        encoding_format: str | None = "float",
+        dimensions: int | None = None,
+        user: str | None = None,
+    ) -> OpenAIEmbeddingsResponse:
+        raise NotImplementedError()