From 6a2a0836c5b771280b29f575975de68b0026db99 Mon Sep 17 00:00:00 2001
From: Varsha Prasad Narsing <varshaprasad96@gmail.com>
Date: Wed, 21 May 2025 10:57:37 -0700
Subject: [PATCH] Feature: Configuring search modes for RAG - Address review

Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
---
 docs/_static/llama-stack-spec.html                            | 2 +-
 docs/_static/llama-stack-spec.yaml                            | 2 +-
 llama_stack/apis/tools/rag_tool.py                            | 2 +-
 llama_stack/providers/inline/vector_io/faiss/faiss.py         | 2 +-
 .../providers/inline/vector_io/sqlite_vec/sqlite_vec.py       | 4 +---
 llama_stack/providers/remote/vector_io/chroma/chroma.py       | 2 +-
 llama_stack/providers/remote/vector_io/milvus/milvus.py       | 2 +-
 llama_stack/providers/remote/vector_io/pgvector/pgvector.py   | 2 +-
 llama_stack/providers/remote/vector_io/qdrant/qdrant.py       | 2 +-
 llama_stack/providers/remote/vector_io/weaviate/weaviate.py   | 2 +-
 llama_stack/providers/utils/memory/vector_store.py            | 2 +-
 11 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 0735aa8b0..33befc95e 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -11611,7 +11611,7 @@
                     },
                     "mode": {
                         "type": "string",
-                        "description": "Search mode for retrieval—either \"vector\" or \"keyword\"."
+                        "description": "Search mode for retrieval—either \"vector\" or \"keyword\". Default \"vector\"."
                     }
                 },
                 "additionalProperties": false,
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 8cd7bc5d8..cae6331b0 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -8089,7 +8089,7 @@ components:
         mode:
           type: string
           description: >-
-            Search mode for retrieval—either "vector" or "keyword".
+            Search mode for retrieval—either "vector" or "keyword". Default "vector".
       additionalProperties: false
       required:
         - query_generator_config
diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py
index 0cc521baf..1e3542f74 100644
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@@ -76,7 +76,7 @@ class RAGQueryConfig(BaseModel):
     :param chunk_template: Template for formatting each retrieved chunk in the context.
         Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict).
         Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n"
-    :param mode: Search mode for retrieval—either "vector" or "keyword".
+    :param mode: Search mode for retrieval—either "vector" or "keyword". Default "vector".
     """
 
     # This config defines how a query is generated using the messages
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 050605464..47256d88d 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -118,7 +118,7 @@ class FaissIndex(EmbeddingIndex):
 
     async def query_keyword(
         self,
-        query_string: str | None,
+        query_string: str,
         k: int,
         score_threshold: float,
     ) -> QueryChunksResponse:
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 402cd5ffd..fc1a8ddb0 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -203,8 +203,6 @@ class SQLiteVecIndex(EmbeddingIndex):
         """
         Performs vector-based search using a virtual table for vector similarity.
         """
-        if embedding is None:
-            raise ValueError("embedding is required for vector search.")
 
         def _execute_query():
             connection = _create_sqlite_connection(self.db_path)
@@ -243,7 +241,7 @@ class SQLiteVecIndex(EmbeddingIndex):
 
     async def query_keyword(
         self,
-        query_string: str | None,
+        query_string: str,
         k: int,
         score_threshold: float,
     ) -> QueryChunksResponse:
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 52aacbe59..a59a38573 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -86,7 +86,7 @@ class ChromaIndex(EmbeddingIndex):
 
     async def query_keyword(
         self,
-        query_string: str | None,
+        query_string: str,
         k: int,
         score_threshold: float,
     ) -> QueryChunksResponse:
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index 67c5d4474..6628292db 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -88,7 +88,7 @@ class MilvusIndex(EmbeddingIndex):
 
     async def query_keyword(
         self,
-        query_string: str | None,
+        query_string: str,
         k: int,
         score_threshold: float,
     ) -> QueryChunksResponse:
diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 150129c5c..ea918c552 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -122,7 +122,7 @@ class PGVectorIndex(EmbeddingIndex):
 
     async def query_keyword(
         self,
-        query_string: str | None,
+        query_string: str,
         k: int,
         score_threshold: float,
     ) -> QueryChunksResponse:
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 4357ec03a..ff0690083 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -97,7 +97,7 @@ class QdrantIndex(EmbeddingIndex):
 
     async def query_keyword(
         self,
-        query_string: str | None,
+        query_string: str,
         k: int,
         score_threshold: float,
     ) -> QueryChunksResponse:
diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index f0d154b09..e6fe8ccd3 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -86,7 +86,7 @@ class WeaviateIndex(EmbeddingIndex):
 
     async def query_keyword(
         self,
-        query_string: str | None,
+        query_string: str,
         k: int,
         score_threshold: float,
     ) -> QueryChunksResponse:
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index d915942be..3655c7049 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -181,7 +181,7 @@ class EmbeddingIndex(ABC):
         raise NotImplementedError()
 
     @abstractmethod
-    async def query_keyword(self, query_string: str | None, k: int, score_threshold: float) -> QueryChunksResponse:
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
         raise NotImplementedError()
 
     @abstractmethod