diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index b74080384..4095e9610 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -294,11 +294,11 @@ class VectorDBWithIndex: _validate_embedding(c.embedding, i, self.vector_db.embedding_dimension) if chunks_to_embed: - resp = await self.inference_api.embeddings( + resp = await self.inference_api.openai_embeddings( self.vector_db.embedding_model, [c.content for c in chunks_to_embed], ) - for c, embedding in zip(chunks_to_embed, resp.embeddings, strict=False): + for c, embedding in zip(chunks_to_embed, resp.data, strict=False): c.embedding = embedding embeddings = np.array([c.embedding for c in chunks], dtype=np.float32) @@ -334,8 +334,8 @@ class VectorDBWithIndex: if mode == "keyword": return await self.index.query_keyword(query_string, k, score_threshold) - embeddings_response = await self.inference_api.embeddings(self.vector_db.embedding_model, [query_string]) - query_vector = np.array(embeddings_response.embeddings[0], dtype=np.float32) + embeddings_response = await self.inference_api.openai_embeddings(self.vector_db.embedding_model, [query_string]) + query_vector = np.array(embeddings_response.data[0], dtype=np.float32) if mode == "hybrid": return await self.index.query_hybrid( query_vector, query_string, k, score_threshold, reranker_type, reranker_params diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py index 919f97ba7..fcc64f869 100644 --- a/tests/unit/rag/test_vector_store.py +++ b/tests/unit/rag/test_vector_store.py @@ -218,11 +218,13 @@ class TestVectorDBWithIndex: Chunk(content="Test 2", embedding=None, metadata={}), ] - mock_inference_api.embeddings.return_value.embeddings = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] + mock_inference_api.openai_embeddings.return_value.data = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] await vector_db_with_index.insert_chunks(chunks) - mock_inference_api.embeddings.assert_called_once_with("test-model without embeddings", ["Test 1", "Test 2"]) + mock_inference_api.openai_embeddings.assert_called_once_with( + "test-model without embeddings", ["Test 1", "Test 2"] + ) mock_index.add_chunks.assert_called_once() args = mock_index.add_chunks.call_args[0] assert args[0] == chunks @@ -246,7 +248,7 @@ class TestVectorDBWithIndex: await vector_db_with_index.insert_chunks(chunks) - mock_inference_api.embeddings.assert_not_called() + mock_inference_api.openai_embeddings.assert_not_called() mock_index.add_chunks.assert_called_once() args = mock_index.add_chunks.call_args[0] assert args[0] == chunks @@ -288,7 +290,7 @@ class TestVectorDBWithIndex: with pytest.raises(ValueError, match="has dimension 4, expected 3"): await vector_db_with_index.insert_chunks(chunks_wrong_dim) - mock_inference_api.embeddings.assert_not_called() + mock_inference_api.openai_embeddings.assert_not_called() mock_index.add_chunks.assert_not_called() async def test_insert_chunks_with_partially_precomputed_embeddings(self): @@ -308,11 +310,11 @@ class TestVectorDBWithIndex: Chunk(content="Test 3", embedding=None, metadata={}), ] - mock_inference_api.embeddings.return_value.embeddings = [[0.1, 0.1, 0.1], [0.3, 0.3, 0.3]] + mock_inference_api.openai_embeddings.return_value.data = [[0.1, 0.1, 0.1], [0.3, 0.3, 0.3]] await vector_db_with_index.insert_chunks(chunks) - mock_inference_api.embeddings.assert_called_once_with( + mock_inference_api.openai_embeddings.assert_called_once_with( "test-model with partial embeddings", ["Test 1", "Test 3"] ) mock_index.add_chunks.assert_called_once()