diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index c45651033..7a5373726 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -160,8 +160,11 @@ class FaissIndex(EmbeddingIndex): for d, i in zip(distances[0], indices[0], strict=False): if i < 0: continue + score = 1.0 / float(d) if d != 0 else float("inf") + if score < score_threshold: + continue chunks.append(self.chunk_by_index[int(i)]) - scores.append(1.0 / float(d) if d != 0 else float("inf")) + scores.append(score) return QueryChunksResponse(chunks=chunks, scores=scores) diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index 28af2b911..b1645ac5a 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -132,8 +132,11 @@ class PGVectorIndex(EmbeddingIndex): chunks = [] scores = [] for doc, dist in results: + score = 1.0 / float(dist) if dist != 0 else float("inf") + if score < score_threshold: + continue chunks.append(Chunk(**doc)) - scores.append(1.0 / float(dist) if dist != 0 else float("inf")) + scores.append(score) return QueryChunksResponse(chunks=chunks, scores=scores) diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index e21d1377f..11da8902c 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -105,8 +105,12 @@ class WeaviateIndex(EmbeddingIndex): log.exception(f"Failed to parse document: {chunk_json}") continue + score = 1.0 / doc.metadata.distance if doc.metadata.distance != 0 else float("inf") + if score < score_threshold: + continue + chunks.append(chunk) - scores.append(1.0 / doc.metadata.distance if doc.metadata.distance != 0 else float("inf")) + scores.append(score) return QueryChunksResponse(chunks=chunks, scores=scores) diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index c0b3175b0..7b6e69df1 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -433,10 +433,6 @@ class OpenAIVectorStoreMixin(ABC): # Convert response to OpenAI format data = [] for chunk, score in zip(response.chunks, response.scores, strict=False): - # Apply score based filtering - if score < score_threshold: - continue - # Apply filters if provided if filters: # Simple metadata filtering