From 15c1f8b88555d0d6a8a0203632a379d9a9b98125 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Sun, 31 Aug 2025 09:06:59 -0400 Subject: [PATCH] correct output structure --- llama_stack/providers/utils/memory/vector_store.py | 6 +++--- tests/unit/rag/test_vector_store.py | 11 +++++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index 4095e9610..aaa470970 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -298,8 +298,8 @@ class VectorDBWithIndex: self.vector_db.embedding_model, [c.content for c in chunks_to_embed], ) - for c, embedding in zip(chunks_to_embed, resp.data, strict=False): - c.embedding = embedding + for c, data in zip(chunks_to_embed, resp.data, strict=False): + c.embedding = data.embedding embeddings = np.array([c.embedding for c in chunks], dtype=np.float32) await self.index.add_chunks(chunks, embeddings) @@ -335,7 +335,7 @@ class VectorDBWithIndex: return await self.index.query_keyword(query_string, k, score_threshold) embeddings_response = await self.inference_api.openai_embeddings(self.vector_db.embedding_model, [query_string]) - query_vector = np.array(embeddings_response.data[0], dtype=np.float32) + query_vector = np.array(embeddings_response.data[0].embedding, dtype=np.float32) if mode == "hybrid": return await self.index.query_hybrid( query_vector, query_string, k, score_threshold, reranker_type, reranker_params diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py index fcc64f869..8c017a551 100644 --- a/tests/unit/rag/test_vector_store.py +++ b/tests/unit/rag/test_vector_store.py @@ -13,6 +13,7 @@ from unittest.mock import AsyncMock, MagicMock import numpy as np import pytest +from llama_stack.apis.inference.inference import OpenAIEmbeddingData from llama_stack.apis.tools import RAGDocument from llama_stack.apis.vector_io import Chunk from llama_stack.providers.utils.memory.vector_store import ( @@ -218,7 +219,10 @@ class TestVectorDBWithIndex: Chunk(content="Test 2", embedding=None, metadata={}), ] - mock_inference_api.openai_embeddings.return_value.data = [[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]] + mock_inference_api.openai_embeddings.return_value.data = [ + OpenAIEmbeddingData(embedding=[0.1, 0.2, 0.3], index=0), + OpenAIEmbeddingData(embedding=[0.4, 0.5, 0.6], index=1), + ] await vector_db_with_index.insert_chunks(chunks) @@ -310,7 +314,10 @@ class TestVectorDBWithIndex: Chunk(content="Test 3", embedding=None, metadata={}), ] - mock_inference_api.openai_embeddings.return_value.data = [[0.1, 0.1, 0.1], [0.3, 0.3, 0.3]] + mock_inference_api.openai_embeddings.return_value.data = [ + OpenAIEmbeddingData(embedding=[0.1, 0.1, 0.1], index=0), + OpenAIEmbeddingData(embedding=[0.3, 0.3, 0.3], index=1), + ] await vector_db_with_index.insert_chunks(chunks)