diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 0a5caa3d1..affc426d6 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -13943,6 +13943,10 @@ "rewrite_query": { "type": "boolean", "description": "Whether to rewrite the natural language query for vector search (default false)" + }, + "search_mode": { + "type": "string", + "description": "The search mode to use - \"keyword\", \"vector\", or \"hybrid\" (default \"vector\")" } }, "additionalProperties": false, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index c115e1df2..1e1293dc2 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -9737,6 +9737,10 @@ components: description: >- Whether to rewrite the natural language query for vector search (default false) + search_mode: + type: string + description: >- + The search mode to use - "keyword", "vector", or "hybrid" (default "vector") additionalProperties: false required: - query diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py index 017fa62de..d6de0108c 100644 --- a/llama_stack/apis/vector_io/vector_io.py +++ b/llama_stack/apis/vector_io/vector_io.py @@ -372,6 +372,7 @@ class VectorIO(Protocol): max_num_results: int | None = 10, ranking_options: SearchRankingOptions | None = None, rewrite_query: bool | None = False, + search_mode: str | None = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations ) -> VectorStoreSearchResponsePage: """Search for chunks in a vector store. @@ -383,6 +384,7 @@ class VectorIO(Protocol): :param max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10). :param ranking_options: Ranking options for fine-tuning the search results. :param rewrite_query: Whether to rewrite the natural language query for vector search (default false) + :param search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector") :returns: A VectorStoreSearchResponse containing the search results. """ ... diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/distribution/routers/vector_io.py index 643029d60..6af3bd416 100644 --- a/llama_stack/distribution/routers/vector_io.py +++ b/llama_stack/distribution/routers/vector_io.py @@ -255,6 +255,7 @@ class VectorIORouter(VectorIO): max_num_results: int | None = 10, ranking_options: SearchRankingOptions | None = None, rewrite_query: bool | None = False, + search_mode: str | None = "vector", ) -> VectorStoreSearchResponsePage: logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}") # Route based on vector store ID @@ -266,6 +267,7 @@ class VectorIORouter(VectorIO): max_num_results=max_num_results, ranking_options=ranking_options, rewrite_query=rewrite_query, + search_mode=search_mode, ) async def openai_attach_file_to_vector_store( diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 407bdda56..06d1786f0 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -256,6 +256,7 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate): max_num_results: int | None = 10, ranking_options: SearchRankingOptions | None = None, rewrite_query: bool | None = False, + search_mode: str | None = "vector", ) -> VectorStoreSearchResponsePage: raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma") diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py index 09d8520d1..182227a85 100644 --- a/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -254,8 +254,9 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate): max_num_results: int | None = 10, ranking_options: SearchRankingOptions | None = None, rewrite_query: bool | None = False, + search_mode: str | None = "vector", ) -> VectorStoreSearchResponsePage: - raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant") + raise NotImplementedError("OpenAI Vector Stores API is not supported in Milvus") async def openai_attach_file_to_vector_store( self, diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 694a1bf93..e9d6eec22 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -256,6 +256,7 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate): max_num_results: int | None = 10, ranking_options: SearchRankingOptions | None = None, rewrite_query: bool | None = False, + search_mode: str | None = "vector", ) -> VectorStoreSearchResponsePage: raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant") diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index c741f7045..8b962db76 100644 --- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -337,13 +337,16 @@ class OpenAIVectorStoreMixin(ABC): max_num_results: int | None = 10, ranking_options: SearchRankingOptions | None = None, rewrite_query: bool | None = False, - # search_mode: Literal["keyword", "vector", "hybrid"] = "vector", + search_mode: str | None = "vector", # Using str instead of Literal due to OpenAPI schema generator limitations ) -> VectorStoreSearchResponsePage: """Search for chunks in a vector store.""" - # TODO: Add support in the API for this - search_mode = "vector" max_num_results = max_num_results or 10 + # Validate search_mode + valid_modes = {"keyword", "vector", "hybrid"} + if search_mode not in valid_modes: + raise ValueError(f"search_mode must be one of {valid_modes}, got {search_mode}") + if vector_store_id not in self.openai_vector_stores: raise ValueError(f"Vector store {vector_store_id} not found") diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index 4856455c4..4c061f519 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -804,3 +804,23 @@ def test_openai_vector_store_update_file(compat_client_with_empty_stores, client file_id=file.id, ) assert retrieved_file.attributes["foo"] == "baz" + + +@pytest.mark.skip(reason="Client library needs to be scaffolded to support search_mode parameter") +def test_openai_vector_store_search_modes(): + """Test OpenAI vector store search with different search modes. + + This test is skipped because the client library + needs to be regenerated from the updated OpenAPI spec to support the + search_mode parameter. Once the client library is updated, this test + can be enabled to verify: + - vector search mode (default) + - keyword search mode + - hybrid search mode + - invalid search mode validation + """ + # TODO: Enable this test once llama_stack_client is updated to support search_mode + # The server-side implementation is complete but the client + # library needs to be updated: + # https://github.com/meta-llama/llama-stack-client-python/blob/52c0b5d23e9ae67ceb09d755143d436f38c20547/src/llama_stack_client/resources/vector_stores/vector_stores.py#L314 + pass