From 3c2aee610db63c63213c37f243b2724158840dd9 Mon Sep 17 00:00:00 2001 From: Varsha Date: Sat, 2 Aug 2025 15:57:03 -0700 Subject: [PATCH] refactor: Remove double filtering based on score threshold (#3019) # What does this PR do? Remove score_threshold based check from `OpenAIVectorStoreMixin` Closes: https://github.com/meta-llama/llama-stack/issues/3018 ## Test Plan --- llama_stack/providers/inline/vector_io/faiss/faiss.py | 5 ++++- llama_stack/providers/remote/vector_io/pgvector/pgvector.py | 5 ++++- llama_stack/providers/remote/vector_io/weaviate/weaviate.py | 6 +++++- .../providers/utils/memory/openai_vector_store_mixin.py | 4 ---- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index c45651033..7a5373726 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -160,8 +160,11 @@ class FaissIndex(EmbeddingIndex): for d, i in zip(distances[0], indices[0], strict=False): if i < 0: continue + score = 1.0 / float(d) if d != 0 else float("inf") + if score < score_threshold: + continue chunks.append(self.chunk_by_index[int(i)]) - scores.append(1.0 / float(d) if d != 0 else float("inf")) + scores.append(score) return QueryChunksResponse(chunks=chunks, scores=scores) diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index 28af2b911..b1645ac5a 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -132,8 +132,11 @@ class PGVectorIndex(EmbeddingIndex): chunks = [] scores = [] for doc, dist in results: + score = 1.0 / float(dist) if dist != 0 else float("inf") + if score < score_threshold: + continue chunks.append(Chunk(**doc)) - scores.append(1.0 / float(dist) if dist != 0 else float("inf")) + scores.append(score) return QueryChunksResponse(chunks=chunks, scores=scores) diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index e21d1377f..11da8902c 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -105,8 +105,12 @@ class WeaviateIndex(EmbeddingIndex): log.exception(f"Failed to parse document: {chunk_json}") continue + score = 1.0 / doc.metadata.distance if doc.metadata.distance != 0 else float("inf") + if score < score_threshold: + continue + chunks.append(chunk) - scores.append(1.0 / doc.metadata.distance if doc.metadata.distance != 0 else float("inf")) + scores.append(score) return QueryChunksResponse(chunks=chunks, scores=scores) diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index c0b3175b0..7b6e69df1 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -433,10 +433,6 @@ class OpenAIVectorStoreMixin(ABC): # Convert response to OpenAI format data = [] for chunk, score in zip(response.chunks, response.scores, strict=False): - # Apply score based filtering - if score < score_threshold: - continue - # Apply filters if provided if filters: # Simple metadata filtering