From 3c2aee610db63c63213c37f243b2724158840dd9 Mon Sep 17 00:00:00 2001
From: Varsha <varshaprasad96@gmail.com>
Date: Sat, 2 Aug 2025 15:57:03 -0700
Subject: [PATCH] refactor: Remove double filtering based on score threshold
 (#3019)

# What does this PR do?
Remove score_threshold based check from `OpenAIVectorStoreMixin`

Closes: https://github.com/meta-llama/llama-stack/issues/3018

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
---
 llama_stack/providers/inline/vector_io/faiss/faiss.py       | 5 ++++-
 llama_stack/providers/remote/vector_io/pgvector/pgvector.py | 5 ++++-
 llama_stack/providers/remote/vector_io/weaviate/weaviate.py | 6 +++++-
 .../providers/utils/memory/openai_vector_store_mixin.py     | 4 ----
 4 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index c45651033..7a5373726 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -160,8 +160,11 @@ class FaissIndex(EmbeddingIndex):
         for d, i in zip(distances[0], indices[0], strict=False):
             if i < 0:
                 continue
+            score = 1.0 / float(d) if d != 0 else float("inf")
+            if score < score_threshold:
+                continue
             chunks.append(self.chunk_by_index[int(i)])
-            scores.append(1.0 / float(d) if d != 0 else float("inf"))
+            scores.append(score)
 
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 28af2b911..b1645ac5a 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -132,8 +132,11 @@ class PGVectorIndex(EmbeddingIndex):
             chunks = []
             scores = []
             for doc, dist in results:
+                score = 1.0 / float(dist) if dist != 0 else float("inf")
+                if score < score_threshold:
+                    continue
                 chunks.append(Chunk(**doc))
-                scores.append(1.0 / float(dist) if dist != 0 else float("inf"))
+                scores.append(score)
 
             return QueryChunksResponse(chunks=chunks, scores=scores)
 
diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index e21d1377f..11da8902c 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -105,8 +105,12 @@ class WeaviateIndex(EmbeddingIndex):
                 log.exception(f"Failed to parse document: {chunk_json}")
                 continue
 
+            score = 1.0 / doc.metadata.distance if doc.metadata.distance != 0 else float("inf")
+            if score < score_threshold:
+                continue
+
             chunks.append(chunk)
-            scores.append(1.0 / doc.metadata.distance if doc.metadata.distance != 0 else float("inf"))
+            scores.append(score)
 
         return QueryChunksResponse(chunks=chunks, scores=scores)
 
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index c0b3175b0..7b6e69df1 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -433,10 +433,6 @@ class OpenAIVectorStoreMixin(ABC):
             # Convert response to OpenAI format
             data = []
             for chunk, score in zip(response.chunks, response.scores, strict=False):
-                # Apply score based filtering
-                if score < score_threshold:
-                    continue
-
                 # Apply filters if provided
                 if filters:
                     # Simple metadata filtering