mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-08 04:54:38 +00:00
fix(vector-io): unify score calculation to use cosine and normalize to [0,1]
This commit is contained in:
parent
9618adba89
commit
a0e0c7030b
9 changed files with 166 additions and 42 deletions
|
@ -52,7 +52,7 @@ OPENAI_VECTOR_STORES_FILES_CONTENTS_PREFIX = f"openai_vector_stores_files_conten
|
|||
|
||||
class FaissIndex(EmbeddingIndex):
|
||||
def __init__(self, dimension: int, kvstore: KVStore | None = None, bank_id: str | None = None):
|
||||
self.index = faiss.IndexFlatL2(dimension)
|
||||
self.index = faiss.IndexFlatIP(dimension)
|
||||
self.chunk_by_index: dict[int, Chunk] = {}
|
||||
self.kvstore = kvstore
|
||||
self.bank_id = bank_id
|
||||
|
@ -122,8 +122,12 @@ class FaissIndex(EmbeddingIndex):
|
|||
for i, chunk in enumerate(chunks):
|
||||
self.chunk_by_index[indexlen + i] = chunk
|
||||
|
||||
# Normalize embeddings for cosine similarity
|
||||
normalized_embeddings = np.array(embeddings).astype(np.float32)
|
||||
faiss.normalize_L2(normalized_embeddings)
|
||||
|
||||
async with self.chunk_id_lock:
|
||||
self.index.add(np.array(embeddings).astype(np.float32))
|
||||
self.index.add(normalized_embeddings)
|
||||
self.chunk_ids.extend([chunk.chunk_id for chunk in chunks])
|
||||
|
||||
# Save updated index
|
||||
|
@ -160,18 +164,28 @@ class FaissIndex(EmbeddingIndex):
|
|||
k: int,
|
||||
score_threshold: float,
|
||||
) -> QueryChunksResponse:
|
||||
distances, indices = await asyncio.to_thread(self.index.search, embedding.reshape(1, -1).astype(np.float32), k)
|
||||
logger.info(
|
||||
f"FAISS VECTOR SEARCH CALLED: embedding_shape={embedding.shape}, k={k}, threshold={score_threshold}"
|
||||
)
|
||||
# Normalize query embedding for cosine similarity
|
||||
query_embedding = embedding.reshape(1, -1).astype(np.float32)
|
||||
faiss.normalize_L2(query_embedding)
|
||||
|
||||
distances, indices = await asyncio.to_thread(self.index.search, query_embedding, k)
|
||||
chunks = []
|
||||
scores = []
|
||||
for d, i in zip(distances[0], indices[0], strict=False):
|
||||
if i < 0:
|
||||
continue
|
||||
score = 1.0 / float(d) if d != 0 else float("inf")
|
||||
# For IndexFlatIP with normalized vectors, d is cosine similarity in [-1,1]
|
||||
score = (float(d) + 1.0) / 2.0 # rescale to [0,1]
|
||||
logger.info(f"Computed score {score} from distance {d} for chunk id {self.chunk_ids[int(i)]}")
|
||||
if score < score_threshold:
|
||||
continue
|
||||
chunks.append(self.chunk_by_index[int(i)])
|
||||
scores.append(score)
|
||||
|
||||
logger.info(f"FAISS VECTOR SEARCH RESULTS: Found {len(chunks)} chunks with scores {scores}")
|
||||
return QueryChunksResponse(chunks=chunks, scores=scores)
|
||||
|
||||
async def query_keyword(
|
||||
|
@ -241,7 +255,7 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPr
|
|||
"""
|
||||
try:
|
||||
vector_dimension = 128 # sample dimension
|
||||
faiss.IndexFlatL2(vector_dimension)
|
||||
faiss.IndexFlatIP(vector_dimension)
|
||||
return HealthResponse(status=HealthStatus.OK)
|
||||
except Exception as e:
|
||||
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue