diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html
index 0a5caa3d1..affc426d6 100644
--- a/docs/_static/llama-stack-spec.html
+++ b/docs/_static/llama-stack-spec.html
@@ -13943,6 +13943,10 @@
                     "rewrite_query": {
                         "type": "boolean",
                         "description": "Whether to rewrite the natural language query for vector search (default false)"
+                    },
+                    "search_mode": {
+                        "type": "string",
+                        "description": "The search mode to use - \"keyword\", \"vector\", or \"hybrid\" (default \"vector\")"
                     }
                 },
                 "additionalProperties": false,
diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml
index c115e1df2..1e1293dc2 100644
--- a/docs/_static/llama-stack-spec.yaml
+++ b/docs/_static/llama-stack-spec.yaml
@@ -9737,6 +9737,10 @@ components:
           description: >-
             Whether to rewrite the natural language query for vector search (default
             false)
+        search_mode:
+          type: string
+          description: >-
+            The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
       additionalProperties: false
       required:
         - query
diff --git a/llama_stack/apis/vector_io/vector_io.py b/llama_stack/apis/vector_io/vector_io.py
index 017fa62de..d6de0108c 100644
--- a/llama_stack/apis/vector_io/vector_io.py
+++ b/llama_stack/apis/vector_io/vector_io.py
@@ -372,6 +372,7 @@ class VectorIO(Protocol):
         max_num_results: int | None = 10,
         ranking_options: SearchRankingOptions | None = None,
         rewrite_query: bool | None = False,
+        search_mode: str | None = "vector",  # Using str instead of Literal due to OpenAPI schema generator limitations
     ) -> VectorStoreSearchResponsePage:
         """Search for chunks in a vector store.
 
@@ -383,6 +384,7 @@ class VectorIO(Protocol):
         :param max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10).
         :param ranking_options: Ranking options for fine-tuning the search results.
         :param rewrite_query: Whether to rewrite the natural language query for vector search (default false)
+        :param search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
         :returns: A VectorStoreSearchResponse containing the search results.
         """
         ...
diff --git a/llama_stack/distribution/routers/vector_io.py b/llama_stack/distribution/routers/vector_io.py
index 643029d60..6af3bd416 100644
--- a/llama_stack/distribution/routers/vector_io.py
+++ b/llama_stack/distribution/routers/vector_io.py
@@ -255,6 +255,7 @@ class VectorIORouter(VectorIO):
         max_num_results: int | None = 10,
         ranking_options: SearchRankingOptions | None = None,
         rewrite_query: bool | None = False,
+        search_mode: str | None = "vector",
     ) -> VectorStoreSearchResponsePage:
         logger.debug(f"VectorIORouter.openai_search_vector_store: {vector_store_id}")
         # Route based on vector store ID
@@ -266,6 +267,7 @@ class VectorIORouter(VectorIO):
             max_num_results=max_num_results,
             ranking_options=ranking_options,
             rewrite_query=rewrite_query,
+            search_mode=search_mode,
         )
 
     async def openai_attach_file_to_vector_store(
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 407bdda56..06d1786f0 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -256,6 +256,7 @@ class ChromaVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         max_num_results: int | None = 10,
         ranking_options: SearchRankingOptions | None = None,
         rewrite_query: bool | None = False,
+        search_mode: str | None = "vector",
     ) -> VectorStoreSearchResponsePage:
         raise NotImplementedError("OpenAI Vector Stores API is not supported in Chroma")
 
diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py
index 09d8520d1..182227a85 100644
--- a/llama_stack/providers/remote/vector_io/milvus/milvus.py
+++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py
@@ -254,8 +254,9 @@ class MilvusVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         max_num_results: int | None = 10,
         ranking_options: SearchRankingOptions | None = None,
         rewrite_query: bool | None = False,
+        search_mode: str | None = "vector",
     ) -> VectorStoreSearchResponsePage:
-        raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
+        raise NotImplementedError("OpenAI Vector Stores API is not supported in Milvus")
 
     async def openai_attach_file_to_vector_store(
         self,
diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
index 694a1bf93..e9d6eec22 100644
--- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
+++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py
@@ -256,6 +256,7 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate):
         max_num_results: int | None = 10,
         ranking_options: SearchRankingOptions | None = None,
         rewrite_query: bool | None = False,
+        search_mode: str | None = "vector",
     ) -> VectorStoreSearchResponsePage:
         raise NotImplementedError("OpenAI Vector Stores API is not supported in Qdrant")
 
diff --git a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
index c741f7045..8b962db76 100644
--- a/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
+++ b/llama_stack/providers/utils/memory/openai_vector_store_mixin.py
@@ -337,13 +337,16 @@ class OpenAIVectorStoreMixin(ABC):
         max_num_results: int | None = 10,
         ranking_options: SearchRankingOptions | None = None,
         rewrite_query: bool | None = False,
-        # search_mode: Literal["keyword", "vector", "hybrid"] = "vector",
+        search_mode: str | None = "vector",  # Using str instead of Literal due to OpenAPI schema generator limitations
     ) -> VectorStoreSearchResponsePage:
         """Search for chunks in a vector store."""
-        # TODO: Add support in the API for this
-        search_mode = "vector"
         max_num_results = max_num_results or 10
 
+        # Validate search_mode
+        valid_modes = {"keyword", "vector", "hybrid"}
+        if search_mode not in valid_modes:
+            raise ValueError(f"search_mode must be one of {valid_modes}, got {search_mode}")
+
         if vector_store_id not in self.openai_vector_stores:
             raise ValueError(f"Vector store {vector_store_id} not found")
 
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 4856455c4..4c061f519 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -804,3 +804,23 @@ def test_openai_vector_store_update_file(compat_client_with_empty_stores, client
         file_id=file.id,
     )
     assert retrieved_file.attributes["foo"] == "baz"
+
+
+@pytest.mark.skip(reason="Client library needs to be scaffolded to support search_mode parameter")
+def test_openai_vector_store_search_modes():
+    """Test OpenAI vector store search with different search modes.
+
+    This test is skipped because the client library
+    needs to be regenerated from the updated OpenAPI spec to support the
+    search_mode parameter. Once the client library is updated, this test
+    can be enabled to verify:
+    - vector search mode (default)
+    - keyword search mode
+    - hybrid search mode
+    - invalid search mode validation
+    """
+    # TODO: Enable this test once llama_stack_client is updated to support search_mode
+    # The server-side implementation is complete but the client
+    # library needs to be updated:
+    # https://github.com/meta-llama/llama-stack-client-python/blob/52c0b5d23e9ae67ceb09d755143d436f38c20547/src/llama_stack_client/resources/vector_stores/vector_stores.py#L314
+    pass