From bbdde4ec124b5819ea1d20435119416e634595d2 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Sun, 12 Oct 2025 18:56:29 -0700
Subject: [PATCH] fix batch embeddings

---
 llama_stack/providers/inline/batches/reference/batches.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/inline/batches/reference/batches.py b/llama_stack/providers/inline/batches/reference/batches.py
index 102537dd7..fa581ae1f 100644
--- a/llama_stack/providers/inline/batches/reference/batches.py
+++ b/llama_stack/providers/inline/batches/reference/batches.py
@@ -25,6 +25,7 @@ from llama_stack.apis.inference import (
     OpenAIChatCompletionRequestWithExtraBody,
     OpenAICompletionRequestWithExtraBody,
     OpenAIDeveloperMessageParam,
+    OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIMessageParam,
     OpenAISystemMessageParam,
     OpenAIToolMessageParam,
@@ -640,7 +641,9 @@ class ReferenceBatchesImpl(Batches):
                     },
                 }
             else:  # /v1/embeddings
-                embeddings_response = await self.inference_api.openai_embeddings(**request.body)
+                embeddings_response = await self.inference_api.openai_embeddings(
+                    OpenAIEmbeddingsRequestWithExtraBody(**request.body)
+                )
                 assert hasattr(embeddings_response, "model_dump_json"), (
                     "Embeddings response must have model_dump_json method"
                 )