diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index b74080384..4095e9610 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -294,11 +294,11 @@ class VectorDBWithIndex:
                 _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension)
 
         if chunks_to_embed:
-            resp = await self.inference_api.embeddings(
+            resp = await self.inference_api.openai_embeddings(
                 self.vector_db.embedding_model,
                 [c.content for c in chunks_to_embed],
             )
-            for c, embedding in zip(chunks_to_embed, resp.embeddings, strict=False):
+            for c, embedding in zip(chunks_to_embed, resp.data, strict=False):
                 c.embedding = embedding
 
         embeddings = np.array([c.embedding for c in chunks], dtype=np.float32)
@@ -334,8 +334,8 @@ class VectorDBWithIndex:
         if mode == "keyword":
             return await self.index.query_keyword(query_string, k, score_threshold)
 
-        embeddings_response = await self.inference_api.embeddings(self.vector_db.embedding_model, [query_string])
-        query_vector = np.array(embeddings_response.embeddings[0], dtype=np.float32)
+        embeddings_response = await self.inference_api.openai_embeddings(self.vector_db.embedding_model, [query_string])
+        query_vector = np.array(embeddings_response.data[0], dtype=np.float32)
         if mode == "hybrid":
             return await self.index.query_hybrid(
                 query_vector, query_string, k, score_threshold, reranker_type, reranker_params
diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py
index 919f97ba7..fcc64f869 100644
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@@ -218,11 +218,13 @@ class TestVectorDBWithIndex:
             Chunk(content="Test 2", embedding=None, metadata={}),
         ]
 
-        mock_inference_api.embeddings.return_value.embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
+        mock_inference_api.openai_embeddings.return_value.data = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]]
 
         await vector_db_with_index.insert_chunks(chunks)
 
-        mock_inference_api.embeddings.assert_called_once_with("test-model without embeddings", ["Test 1", "Test 2"])
+        mock_inference_api.openai_embeddings.assert_called_once_with(
+            "test-model without embeddings", ["Test 1", "Test 2"]
+        )
         mock_index.add_chunks.assert_called_once()
         args = mock_index.add_chunks.call_args[0]
         assert args[0] == chunks
@@ -246,7 +248,7 @@ class TestVectorDBWithIndex:
 
         await vector_db_with_index.insert_chunks(chunks)
 
-        mock_inference_api.embeddings.assert_not_called()
+        mock_inference_api.openai_embeddings.assert_not_called()
         mock_index.add_chunks.assert_called_once()
         args = mock_index.add_chunks.call_args[0]
         assert args[0] == chunks
@@ -288,7 +290,7 @@ class TestVectorDBWithIndex:
         with pytest.raises(ValueError, match="has dimension 4, expected 3"):
             await vector_db_with_index.insert_chunks(chunks_wrong_dim)
 
-        mock_inference_api.embeddings.assert_not_called()
+        mock_inference_api.openai_embeddings.assert_not_called()
         mock_index.add_chunks.assert_not_called()
 
     async def test_insert_chunks_with_partially_precomputed_embeddings(self):
@@ -308,11 +310,11 @@ class TestVectorDBWithIndex:
             Chunk(content="Test 3", embedding=None, metadata={}),
         ]
 
-        mock_inference_api.embeddings.return_value.embeddings = [[0.1, 0.1, 0.1], [0.3, 0.3, 0.3]]
+        mock_inference_api.openai_embeddings.return_value.data = [[0.1, 0.1, 0.1], [0.3, 0.3, 0.3]]
 
         await vector_db_with_index.insert_chunks(chunks)
 
-        mock_inference_api.embeddings.assert_called_once_with(
+        mock_inference_api.openai_embeddings.assert_called_once_with(
             "test-model with partial embeddings", ["Test 1", "Test 3"]
         )
         mock_index.add_chunks.assert_called_once()