fix(vector-io): unify score calculation to use cosine and normalize to [0,1]

2025-10-08 04:54:38 +00:00 · 2025-09-04 13:03:59 +01:00 · 2025-09-04 13:03:59 +01:00 · a0e0c7030b
commit a0e0c7030b
parent 9618adba89
9 changed files with 166 additions and 42 deletions
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@ -52,7 +52,7 @@ OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_conten

 class FaissIndex(EmbeddingIndex):
    def __init__(self, dimension: int, kvstore: KVStore | None = None, bank_id: str | None = None):
-        self.index = faiss.IndexFlatL2(dimension)
+        self.index = faiss.IndexFlatIP(dimension)
        self.chunk_by_index: dict[int, Chunk] = {}
        self.kvstore = kvstore
        self.bank_id = bank_id
@ -122,8 +122,12 @@ class FaissIndex(EmbeddingIndex):
        for i, chunk in enumerate(chunks):
            self.chunk_by_index[indexlen + i] = chunk

+        # Normalize embeddings for cosine similarity
+        normalized_embeddings = np.array(embeddings).astype(np.float32)
+        faiss.normalize_L2(normalized_embeddings)
+
        async with self.chunk_id_lock:
-            self.index.add(np.array(embeddings).astype(np.float32))
+            self.index.add(normalized_embeddings)
            self.chunk_ids.extend([chunk.chunk_id for chunk in chunks])

        # Save updated index
@ -160,18 +164,28 @@ class FaissIndex(EmbeddingIndex):
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
-        distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k)
+        logger.info(
+            f"FAISS VECTOR SEARCH CALLED: embedding_shape={embedding.shape}, k={k}, threshold={score_threshold}"
+        )
+        # Normalize query embedding for cosine similarity
+        query_embedding = embedding.reshape(1, -1).astype(np.float32)
+        faiss.normalize_L2(query_embedding)
+
+        distances, indices = await asyncio.to_thread(self.index.search, query_embedding, k)
        chunks = []
        scores = []
        for d, i in zip(distances[0], indices[0], strict=False):
            if i < 0:
                continue
-            score = 1.0 / float(d) if d != 0 else float("inf")
+            # For IndexFlatIP with normalized vectors, d is cosine similarity in [-1,1]
+            score = (float(d) + 1.0) / 2.0  # rescale to [0,1]
+            logger.info(f"Computed score {score} from distance {d} for chunk id {self.chunk_ids[int(i)]}")
            if score < score_threshold:
                continue
            chunks.append(self.chunk_by_index[int(i)])
            scores.append(score)

+        logger.info(f"FAISS VECTOR SEARCH RESULTS: Found {len(chunks)} chunks with scores {scores}")
        return QueryChunksResponse(chunks=chunks, scores=scores)

    async def query_keyword(
@ -241,7 +255,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
        """
        try:
            vector_dimension = 128  # sample dimension
-            faiss.IndexFlatL2(vector_dimension)
+            faiss.IndexFlatIP(vector_dimension)
            return HealthResponse(status=HealthStatus.OK)
        except Exception as e:
            return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")