diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 0735aa8b0..33befc95e 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -11611,7 +11611,7 @@
},
"mode": {
"type": "string",
- "description": "Search mode for retrieval—either \"vector\" or \"keyword\"."
+ "description": "Search mode for retrieval—either \"vector\" or \"keyword\". Default \"vector\"."
}
},
"additionalProperties": false,
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index 8cd7bc5d8..cae6331b0 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -8089,7 +8089,7 @@ components:
mode:
type: string
description: >-
- Search mode for retrieval—either "vector" or "keyword".
+ Search mode for retrieval—either "vector" or "keyword". Default "vector".
additionalProperties: false
required:
- query_generator_config
diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py
index 0cc521baf..1e3542f74 100644
--- a/llama_stack/apis/tools/rag_tool.py
+++ b/llama_stack/apis/tools/rag_tool.py
@@ -76,7 +76,7 @@ class RAGQueryConfig(BaseModel):
:param chunk_template: Template for formatting each retrieved chunk in the context.
Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk content string), {metadata} (chunk metadata dict).
Default: "Result {index}\\nContent: {chunk.content}\\nMetadata: {metadata}\\n"
- :param mode: Search mode for retrieval—either "vector" or "keyword".
+ :param mode: Search mode for retrieval—either "vector" or "keyword". Default "vector".
"""
# This config defines how a query is generated using the messages
diff --git a/llama_stack/providers/inline/vector_io/faiss/faiss.py b/llama_stack/providers/inline/vector_io/faiss/faiss.py
index 050605464..47256d88d 100644
--- a/llama_stack/providers/inline/vector_io/faiss/faiss.py
+++ b/llama_stack/providers/inline/vector_io/faiss/faiss.py
@@ -118,7 +118,7 @@ class FaissIndex(EmbeddingIndex):
async def query_keyword(
self,
- query_string: str | None,
+ query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
diff --git a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
index 402cd5ffd..fc1a8ddb0 100644
--- a/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
+++ b/llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py
@@ -203,8 +203,6 @@ class SQLiteVecIndex(EmbeddingIndex):
"""
Performs vector-based search using a virtual table for vector similarity.
"""
- if embedding is None:
- raise ValueError("embedding is required for vector search.")
def _execute_query():
connection = _create_sqlite_connection(self.db_path)
@@ -243,7 +241,7 @@ class SQLiteVecIndex(EmbeddingIndex):
async def query_keyword(
self,
- query_string: str | None,
+ query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 52aacbe59..a59a38573 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -86,7 +86,7 @@ class ChromaIndex(EmbeddingIndex):
async def query_keyword(
self,
- query_string: str | None,
+ query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index 67c5d4474..6628292db 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -88,7 +88,7 @@ class MilvusIndex(EmbeddingIndex):
async def query_keyword(
self,
- query_string: str | None,
+ query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
diff --git a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
index 150129c5c..ea918c552 100644
--- a/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
+++ b/llama_stack/providers/remote/vector_io/pgvector/pgvector.py
@@ -122,7 +122,7 @@ class PGVectorIndex(EmbeddingIndex):
async def query_keyword(
self,
- query_string: str | None,
+ query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 4357ec03a..ff0690083 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -97,7 +97,7 @@ class QdrantIndex(EmbeddingIndex):
async def query_keyword(
self,
- query_string: str | None,
+ query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
diff --git a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
index f0d154b09..e6fe8ccd3 100644
--- a/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
+++ b/llama_stack/providers/remote/vector_io/weaviate/weaviate.py
@@ -86,7 +86,7 @@ class WeaviateIndex(EmbeddingIndex):
async def query_keyword(
self,
- query_string: str | None,
+ query_string: str,
k: int,
score_threshold: float,
) -> QueryChunksResponse:
diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py
index d915942be..3655c7049 100644
--- a/llama_stack/providers/utils/memory/vector_store.py
+++ b/llama_stack/providers/utils/memory/vector_store.py
@@ -181,7 +181,7 @@ class EmbeddingIndex(ABC):
raise NotImplementedError()
@abstractmethod
- async def query_keyword(self, query_string: str | None, k: int, score_threshold: float) -> QueryChunksResponse:
+ async def query_keyword(self, query_string: str, k: int, score_threshold: float) -> QueryChunksResponse:
raise NotImplementedError()
@abstractmethod