Feature: Configuring search modes for RAG - Address review

Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
2025-12-27 15:28:05 +00:00 · 2025-05-21 10:57:37 -07:00 · 2025-05-21 10:57:37 -07:00 · 6a2a0836c5
commit 6a2a0836c5
parent 2060fdba7f
11 changed files with 11 additions and 13 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -11611,7 +11611,7 @@
                    },
                    "mode": {
                        "type": "string",
-                        "description": "Search mode for retrieval—either \"vector\" or \"keyword\"."
+                        "description": "Search mode for retrieval—either \"vector\" or \"keyword\". Default \"vector\"."
                    }
                },
                "additionalProperties": false,
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -8089,7 +8089,7 @@ components:
        mode:
          type: string
          description: >-
-            Search mode for retrieval—either "vector" or "keyword".
+            Search mode for retrieval—either "vector" or "keyword". Default "vector".
      additionalProperties: false
      required:
        - query_generator_config
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@ -76,7 +76,7 @@ class RAGQueryConfig(BaseModel):
    :param chunk_template: Template for formatting each retrieved chunk in the context.
        Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict).
        Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n"
-    :param mode: Search mode for retrieval—either "vector" or "keyword".
+    :param mode: Search mode for retrieval—either "vector" or "keyword". Default "vector".
    """

    # This config defines how a query is generated using the messages
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@ -118,7 +118,7 @@ class FaissIndex(EmbeddingIndex):

    async def query_keyword(
        self,
-        query_string: str | None,
+        query_string: str,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@ -203,8 +203,6 @@ class SQLiteVecIndex(EmbeddingIndex):
        """
        Performs vector-based search using a virtual table for vector similarity.
        """
-        if embedding is None:
-            raise ValueError("embedding is required for vector search.")

        def _execute_query():
            connection = _create_sqlite_connection(self.db_path)
@ -243,7 +241,7 @@ class SQLiteVecIndex(EmbeddingIndex):

    async def query_keyword(
        self,
-        query_string: str | None,
+        query_string: str,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@ -86,7 +86,7 @@ class ChromaIndex(EmbeddingIndex):

    async def query_keyword(
        self,
-        query_string: str | None,
+        query_string: str,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@ -88,7 +88,7 @@ class MilvusIndex(EmbeddingIndex):

    async def query_keyword(
        self,
-        query_string: str | None,
+        query_string: str,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@ -122,7 +122,7 @@ class PGVectorIndex(EmbeddingIndex):

    async def query_keyword(
        self,
-        query_string: str | None,
+        query_string: str,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@ -97,7 +97,7 @@ class QdrantIndex(EmbeddingIndex):

    async def query_keyword(
        self,
-        query_string: str | None,
+        query_string: str,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@ -86,7 +86,7 @@ class WeaviateIndex(EmbeddingIndex):

    async def query_keyword(
        self,
-        query_string: str | None,
+        query_string: str,
        k: int,
        score_threshold: float,
    ) -> QueryChunksResponse:
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@ -181,7 +181,7 @@ class EmbeddingIndex(ABC):
        raise NotImplementedError()

    @abstractmethod
-    async def query_keyword(self, query_string: str | None, k: int, score_threshold: float) -> QueryChunksResponse:
+    async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
        raise NotImplementedError()

    @abstractmethod