diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 0a5caa3d1..affc426d6 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -13943,6 +13943,10 @@
"rewrite_query": {
"type": "boolean",
"description": "Whether to rewrite the natural language query for vector search (default false)"
+ },
+ "search_mode": {
+ "type": "string",
+ "description": "The search mode to use - \"keyword\", \"vector\", or \"hybrid\" (default \"vector\")"
}
},
"additionalProperties": false,
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index c115e1df2..1e1293dc2 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -9737,6 +9737,10 @@ components:
description: >-
Whether to rewrite the natural language query for vector search (default
false)
+ search_mode:
+ type: string
+ description: >-
+ The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
additionalProperties: false
required:
- query
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index 017fa62de..d6de0108c 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -372,6 +372,7 @@ class VectorIO(Protocol):
max_num_results: int | None = 10,
ranking_options: SearchRankingOptions | None = None,
rewrite_query: bool | None = False,
+ search_mode: str | None = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
) -> VectorStoreSearchResponsePage:
"""Search for chunks in a vector store.
@@ -383,6 +384,7 @@ class VectorIO(Protocol):
:param max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10).
:param ranking_options: Ranking options for fine-tuning the search results.
:param rewrite_query: Whether to rewrite the natural language query for vector search (default false)
+ :param search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
:returns: A VectorStoreSearchResponse containing the search results.
"""
...
diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/distribution/routers/vector_io.py
index 643029d60..6af3bd416 100644
--- a/llama_stack/distribution/routers/vector_io.py
+++ b/llama_stack/distribution/routers/vector_io.py
@@ -255,6 +255,7 @@ class VectorIORouter(VectorIO):
max_num_results: int | None = 10,
ranking_options: SearchRankingOptions | None = None,
rewrite_query: bool | None = False,
+ search_mode: str | None = "vector",
) -> VectorStoreSearchResponsePage:
logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}")
# Route based on vector store ID
@@ -266,6 +267,7 @@ class VectorIORouter(VectorIO):
max_num_results=max_num_results,
ranking_options=ranking_options,
rewrite_query=rewrite_query,
+ search_mode=search_mode,
)
async def openai_attach_file_to_vector_store(
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 407bdda56..06d1786f0 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -256,6 +256,7 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
max_num_results: int | None = 10,
ranking_options: SearchRankingOptions | None = None,
rewrite_query: bool | None = False,
+ search_mode: str | None = "vector",
) -> VectorStoreSearchResponsePage:
raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index 09d8520d1..182227a85 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -254,8 +254,9 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
max_num_results: int | None = 10,
ranking_options: SearchRankingOptions | None = None,
rewrite_query: bool | None = False,
+ search_mode: str | None = "vector",
) -> VectorStoreSearchResponsePage:
- raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
+ raise NotImplementedError("OpenAI Vector Stores API is not supported in Milvus")
async def openai_attach_file_to_vector_store(
self,
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 694a1bf93..e9d6eec22 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -256,6 +256,7 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
max_num_results: int | None = 10,
ranking_options: SearchRankingOptions | None = None,
rewrite_query: bool | None = False,
+ search_mode: str | None = "vector",
) -> VectorStoreSearchResponsePage:
raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index c741f7045..8b962db76 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -337,13 +337,16 @@ class OpenAIVectorStoreMixin(ABC):
max_num_results: int | None = 10,
ranking_options: SearchRankingOptions | None = None,
rewrite_query: bool | None = False,
- # search_mode: Literal["keyword", "vector", "hybrid"] = "vector",
+ search_mode: str | None = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations
) -> VectorStoreSearchResponsePage:
"""Search for chunks in a vector store."""
- # TODO: Add support in the API for this
- search_mode = "vector"
max_num_results = max_num_results or 10
+ # Validate search_mode
+ valid_modes = {"keyword", "vector", "hybrid"}
+ if search_mode not in valid_modes:
+ raise ValueError(f"search_mode must be one of {valid_modes}, got {search_mode}")
+
if vector_store_id not in self.openai_vector_stores:
raise ValueError(f"Vector store {vector_store_id} not found")
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 4856455c4..4c061f519 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -804,3 +804,23 @@ def test_openai_vector_store_update_file(compat_client_with_empty_stores, client
file_id=file.id,
)
assert retrieved_file.attributes["foo"] == "baz"
+
+
+@pytest.mark.skip(reason="Client library needs to be scaffolded to support search_mode parameter")
+def test_openai_vector_store_search_modes():
+ """Test OpenAI vector store search with different search modes.
+
+ This test is skipped because the client library
+ needs to be regenerated from the updated OpenAPI spec to support the
+ search_mode parameter. Once the client library is updated, this test
+ can be enabled to verify:
+ - vector search mode (default)
+ - keyword search mode
+ - hybrid search mode
+ - invalid search mode validation
+ """
+ # TODO: Enable this test once llama_stack_client is updated to support search_mode
+ # The server-side implementation is complete but the client
+ # library needs to be updated:
+ # https://github.com/meta-llama/llama-stack-client-python/blob/52c0b5d23e9ae67ceb09d755143d436f38c20547/src/llama_stack_client/resources/vector_stores/vector_stores.py#L314
+ pass