From 6a2a0836c5b771280b29f575975de68b0026db99 Mon Sep 17 00:00:00 2001 From: Varsha Prasad Narsing Date: Wed, 21 May 2025 10:57:37 -0700 Subject: [PATCH] Feature: Configuring search modes for RAG - Address review Signed-off-by: Varsha Prasad Narsing --- docs/_static/llama-stack-spec.html | 2 +- docs/_static/llama-stack-spec.yaml | 2 +- llama_stack/apis/tools/rag_tool.py | 2 +- llama_stack/providers/inline/vector_io/faiss/faiss.py | 2 +- .../providers/inline/vector_io/sqlite_vec/sqlite_vec.py | 4 +--- llama_stack/providers/remote/vector_io/chroma/chroma.py | 2 +- llama_stack/providers/remote/vector_io/milvus/milvus.py | 2 +- llama_stack/providers/remote/vector_io/pgvector/pgvector.py | 2 +- llama_stack/providers/remote/vector_io/qdrant/qdrant.py | 2 +- llama_stack/providers/remote/vector_io/weaviate/weaviate.py | 2 +- llama_stack/providers/utils/memory/vector_store.py | 2 +- 11 files changed, 11 insertions(+), 13 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 0735aa8b0..33befc95e 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -11611,7 +11611,7 @@ }, "mode": { "type": "string", - "description": "Search mode for retrieval—either \"vector\" or \"keyword\"." + "description": "Search mode for retrieval—either \"vector\" or \"keyword\". Default \"vector\"." } }, "additionalProperties": false, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 8cd7bc5d8..cae6331b0 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -8089,7 +8089,7 @@ components: mode: type: string description: >- - Search mode for retrieval—either "vector" or "keyword". + Search mode for retrieval—either "vector" or "keyword". Default "vector". additionalProperties: false required: - query_generator_config diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py index 0cc521baf..1e3542f74 100644 --- a/llama_stack/apis/tools/rag_tool.py +++ b/llama_stack/apis/tools/rag_tool.py @@ -76,7 +76,7 @@ class RAGQueryConfig(BaseModel): :param chunk_template: Template for formatting each retrieved chunk in the context. Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict). Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n" - :param mode: Search mode for retrieval—either "vector" or "keyword". + :param mode: Search mode for retrieval—either "vector" or "keyword". Default "vector". """ # This config defines how a query is generated using the messages diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py index 050605464..47256d88d 100644 --- a/llama_stack/providers/inline/vector_io/faiss/faiss.py +++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py @@ -118,7 +118,7 @@ class FaissIndex(EmbeddingIndex): async def query_keyword( self, - query_string: str | None, + query_string: str, k: int, score_threshold: float, ) -> QueryChunksResponse: diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py index 402cd5ffd..fc1a8ddb0 100644 --- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py @@ -203,8 +203,6 @@ class SQLiteVecIndex(EmbeddingIndex): """ Performs vector-based search using a virtual table for vector similarity. """ - if embedding is None: - raise ValueError("embedding is required for vector search.") def _execute_query(): connection = _create_sqlite_connection(self.db_path) @@ -243,7 +241,7 @@ class SQLiteVecIndex(EmbeddingIndex): async def query_keyword( self, - query_string: str | None, + query_string: str, k: int, score_threshold: float, ) -> QueryChunksResponse: diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 52aacbe59..a59a38573 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -86,7 +86,7 @@ class ChromaIndex(EmbeddingIndex): async def query_keyword( self, - query_string: str | None, + query_string: str, k: int, score_threshold: float, ) -> QueryChunksResponse: diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py index 67c5d4474..6628292db 100644 --- a/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -88,7 +88,7 @@ class MilvusIndex(EmbeddingIndex): async def query_keyword( self, - query_string: str | None, + query_string: str, k: int, score_threshold: float, ) -> QueryChunksResponse: diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py index 150129c5c..ea918c552 100644 --- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py +++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py @@ -122,7 +122,7 @@ class PGVectorIndex(EmbeddingIndex): async def query_keyword( self, - query_string: str | None, + query_string: str, k: int, score_threshold: float, ) -> QueryChunksResponse: diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 4357ec03a..ff0690083 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -97,7 +97,7 @@ class QdrantIndex(EmbeddingIndex): async def query_keyword( self, - query_string: str | None, + query_string: str, k: int, score_threshold: float, ) -> QueryChunksResponse: diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py index f0d154b09..e6fe8ccd3 100644 --- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py +++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py @@ -86,7 +86,7 @@ class WeaviateIndex(EmbeddingIndex): async def query_keyword( self, - query_string: str | None, + query_string: str, k: int, score_threshold: float, ) -> QueryChunksResponse: diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index d915942be..3655c7049 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -181,7 +181,7 @@ class EmbeddingIndex(ABC): raise NotImplementedError() @abstractmethod - async def query_keyword(self, query_string: str | None, k: int, score_threshold: float) -> QueryChunksResponse: + async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse: raise NotImplementedError() @abstractmethod