feat: Add search_mode support to OpenAI vector store API (#2500)

# What does this PR do? Add search_mode parameter (vector/keyword/hybrid) to openai_search_vector_store method. Fixes OpenAPI code generation by using str instead of Literal type. Closes: #2459 ## Test Plan  Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
2025-12-04 02:03:44 +00:00 · 2025-06-24 17:38:47 -07:00 · 2025-06-24 17:38:47 -07:00 · cfee63bd0d
commit cfee63bd0d
parent 114946ae88
9 changed files with 42 additions and 4 deletions
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@ -13943,6 +13943,10 @@
                    "rewrite_query": {
                        "type": "boolean",
                        "description": "Whether to rewrite the natural language query for vector search (default false)"
                    },
                    "search_mode": {
                        "type": "string",
                        "description": "The search mode to use - \"keyword\", \"vector\", or \"hybrid\" (default \"vector\")"
                    }
                },
                "additionalProperties": false,
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@ -9737,6 +9737,10 @@ components:
          description: >-
            Whether to rewrite the natural language query for vector search (default
            false)
        search_mode:
          type: string
          description: >-
            The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
      additionalProperties: false
      required:
        - query
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@ -372,6 +372,7 @@ class VectorIO(Protocol):
        max_num_results: int | None = 10,
        ranking_options: SearchRankingOptions | None = None,
        rewrite_query: bool | None = False,
        search_mode: str | None = "vector",  # Using str instead of Literal due to OpenAPI schema generator limitations
    ) -> VectorStoreSearchResponsePage:
        """Search for chunks in a vector store.
@ -383,6 +384,7 @@ class VectorIO(Protocol):
        :param max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10).
        :param ranking_options: Ranking options for fine-tuning the search results.
        :param rewrite_query: Whether to rewrite the natural language query for vector search (default false)
        :param search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
        :returns: A VectorStoreSearchResponse containing the search results.
        """
        ...
--- a/llama_stack/distribution/routers/vector_io.py
+++ b/llama_stack/distribution/routers/vector_io.py
@ -255,6 +255,7 @@ class VectorIORouter(VectorIO):
        max_num_results: int | None = 10,
        ranking_options: SearchRankingOptions | None = None,
        rewrite_query: bool | None = False,
        search_mode: str | None = "vector",
    ) -> VectorStoreSearchResponsePage:
        logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}")
        # Route based on vector store ID
@ -266,6 +267,7 @@ class VectorIORouter(VectorIO):
            max_num_results=max_num_results,
            ranking_options=ranking_options,
            rewrite_query=rewrite_query,
            search_mode=search_mode,
        )
    async def openai_attach_file_to_vector_store(
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@ -256,6 +256,7 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
        max_num_results: int | None = 10,
        ranking_options: SearchRankingOptions | None = None,
        rewrite_query: bool | None = False,
        search_mode: str | None = "vector",
    ) -> VectorStoreSearchResponsePage:
        raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@ -254,8 +254,9 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
        max_num_results: int | None = 10,
        ranking_options: SearchRankingOptions | None = None,
        rewrite_query: bool | None = False,
        search_mode: str | None = "vector",
    ) -> VectorStoreSearchResponsePage:
-        raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in Milvus")
    async def openai_attach_file_to_vector_store(
        self,
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@ -256,6 +256,7 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
        max_num_results: int | None = 10,
        ranking_options: SearchRankingOptions | None = None,
        rewrite_query: bool | None = False,
        search_mode: str | None = "vector",
    ) -> VectorStoreSearchResponsePage:
        raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@ -337,13 +337,16 @@ class OpenAIVectorStoreMixin(ABC):
        max_num_results: int | None = 10,
        ranking_options: SearchRankingOptions | None = None,
        rewrite_query: bool | None = False,
-        # search_mode: Literal["keyword", "vector", "hybrid"] = "vector",
+        search_mode: str | None = "vector",  # Using str instead of Literal due to OpenAPI schema generator limitations
    ) -> VectorStoreSearchResponsePage:
        """Search for chunks in a vector store."""
        # TODO: Add support in the API for this
        search_mode = "vector"
        max_num_results = max_num_results or 10
        # Validate search_mode
        valid_modes = {"keyword", "vector", "hybrid"}
        if search_mode not in valid_modes:
            raise ValueError(f"search_mode must be one of {valid_modes}, got {search_mode}")
        if vector_store_id not in self.openai_vector_stores:
            raise ValueError(f"Vector store {vector_store_id} not found")
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@ -804,3 +804,23 @@ def test_openai_vector_store_update_file(compat_client_with_empty_stores, client
        file_id=file.id,
    )
    assert retrieved_file.attributes["foo"] == "baz"
@pytest.mark.skip(reason="Client library needs to be scaffolded to support search_mode parameter")
 def test_openai_vector_store_search_modes():
    """Test OpenAI vector store search with different search modes.
    This test is skipped because the client library
    needs to be regenerated from the updated OpenAPI spec to support the
    search_mode parameter. Once the client library is updated, this test
    can be enabled to verify:
    - vector search mode (default)
    - keyword search mode
    - hybrid search mode
    - invalid search mode validation
    """
    # TODO: Enable this test once llama_stack_client is updated to support search_mode
    # The server-side implementation is complete but the client
    # library needs to be updated:
    # https://github.com/meta-llama/llama-stack-client-python/blob/52c0b5d23e9ae67ceb09d755143d436f38c20547/src/llama_stack_client/resources/vector_stores/vector_stores.py#L314
    pass