diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index a5c491f53..62d95f6cf 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -327,7 +327,7 @@ async def test_create_vector_store_file_batch(vector_io_adapter):
     vector_io_adapter._process_file_batch_async = AsyncMock()
 
     batch = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids)
     )
 
     assert batch.vector_store_id == store_id
@@ -354,7 +354,7 @@ async def test_retrieve_vector_store_file_batch(vector_io_adapter):
 
     # Create batch first
     created_batch = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids)
     )
 
     # Retrieve batch
@@ -387,7 +387,7 @@ async def test_cancel_vector_store_file_batch(vector_io_adapter):
 
     # Create batch
     batch = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids)
     )
 
     # Cancel batch
@@ -432,7 +432,7 @@ async def test_list_files_in_vector_store_file_batch(vector_io_adapter):
 
     # Create batch
     batch = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids)
     )
 
     # List files
@@ -451,7 +451,9 @@ async def test_file_batch_validation_errors(vector_io_adapter):
     # Test nonexistent vector store
     with pytest.raises(VectorStoreNotFoundError):
         await vector_io_adapter.openai_create_vector_store_file_batch(
-            vector_store_id="nonexistent", params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"])
+            params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(
+                vector_store_id="nonexistent", file_ids=["file_1"]
+            ),
         )
 
     # Setup store for remaining tests
@@ -468,7 +470,7 @@ async def test_file_batch_validation_errors(vector_io_adapter):
     # Test wrong vector store for batch
     vector_io_adapter.openai_attach_file_to_vector_store = AsyncMock()
     batch = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"])
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=["file_1"])
     )
 
     # Create wrong_store so it exists but the batch doesn't belong to it
@@ -515,7 +517,7 @@ async def test_file_batch_pagination(vector_io_adapter):
 
     # Create batch
     batch = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids)
     )
 
     # Test pagination with limit
@@ -587,7 +589,7 @@ async def test_file_batch_status_filtering(vector_io_adapter):
 
     # Create batch
     batch = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids)
     )
 
     # Test filtering by completed status
@@ -629,7 +631,7 @@ async def test_cancel_completed_batch_fails(vector_io_adapter):
 
     # Create batch
     batch = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids)
     )
 
     # Manually update status to completed
@@ -663,7 +665,7 @@ async def test_file_batch_persistence_across_restarts(vector_io_adapter):
 
     # Create batch
     batch = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids)
     )
     batch_id = batch.id
 
@@ -718,7 +720,7 @@ async def test_cancelled_batch_persists_in_storage(vector_io_adapter):
 
     # Create batch
     batch = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids)
     )
     batch_id = batch.id
 
@@ -765,10 +767,10 @@ async def test_only_in_progress_batches_resumed(vector_io_adapter):
 
     # Create multiple batches
     batch1 = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_1"])
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=["file_1"])
     )
     batch2 = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_2"])
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=["file_2"])
     )
 
     # Complete one batch (should persist with completed status)
@@ -781,7 +783,7 @@ async def test_only_in_progress_batches_resumed(vector_io_adapter):
 
     # Create a third batch that stays in progress
     batch3 = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=["file_3"])
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=["file_3"])
     )
 
     # Simulate restart - clear memory and reload from persistence
@@ -942,7 +944,7 @@ async def test_max_concurrent_files_per_batch(vector_io_adapter):
     file_ids = [f"file_{i}" for i in range(8)]  # 8 files, but limit should be 5
 
     batch = await vector_io_adapter.openai_create_vector_store_file_batch(
-        vector_store_id=store_id, params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(file_ids=file_ids)
+        params=OpenAICreateVectorStoreFileBatchRequestWithExtraBody(vector_store_id=store_id, file_ids=file_ids)
     )
 
     # Give time for the semaphore logic to start processing files
diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py
index 8c017a551..1e40c98e8 100644
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@@ -13,7 +13,10 @@ from unittest.mock import AsyncMock, MagicMock
 import numpy as np
 import pytest
 
-from llama_stack.apis.inference.inference import OpenAIEmbeddingData
+from llama_stack.apis.inference.inference import (
+    OpenAIEmbeddingData,
+    OpenAIEmbeddingsRequestWithExtraBody,
+)
 from llama_stack.apis.tools import RAGDocument
 from llama_stack.apis.vector_io import Chunk
 from llama_stack.providers.utils.memory.vector_store import (
@@ -226,9 +229,14 @@ class TestVectorDBWithIndex:
 
         await vector_db_with_index.insert_chunks(chunks)
 
-        mock_inference_api.openai_embeddings.assert_called_once_with(
-            "test-model without embeddings", ["Test 1", "Test 2"]
-        )
+        # Verify openai_embeddings was called with correct params
+        mock_inference_api.openai_embeddings.assert_called_once()
+        call_args = mock_inference_api.openai_embeddings.call_args[0]
+        assert len(call_args) == 1
+        params = call_args[0]
+        assert isinstance(params, OpenAIEmbeddingsRequestWithExtraBody)
+        assert params.model == "test-model without embeddings"
+        assert params.input == ["Test 1", "Test 2"]
         mock_index.add_chunks.assert_called_once()
         args = mock_index.add_chunks.call_args[0]
         assert args[0] == chunks
@@ -321,9 +329,14 @@ class TestVectorDBWithIndex:
 
         await vector_db_with_index.insert_chunks(chunks)
 
-        mock_inference_api.openai_embeddings.assert_called_once_with(
-            "test-model with partial embeddings", ["Test 1", "Test 3"]
-        )
+        # Verify openai_embeddings was called with correct params
+        mock_inference_api.openai_embeddings.assert_called_once()
+        call_args = mock_inference_api.openai_embeddings.call_args[0]
+        assert len(call_args) == 1
+        params = call_args[0]
+        assert isinstance(params, OpenAIEmbeddingsRequestWithExtraBody)
+        assert params.model == "test-model with partial embeddings"
+        assert params.input == ["Test 1", "Test 3"]
         mock_index.add_chunks.assert_called_once()
         args = mock_index.add_chunks.call_args[0]
         assert len(args[0]) == 3