From ce72a2852516bbd702d202b2f4426478643faea0 Mon Sep 17 00:00:00 2001 From: Varsha Date: Sun, 10 Aug 2025 15:48:36 -0700 Subject: [PATCH] docs: Update doc on search modes for Milvus (#3078) # What does this PR do? Update Milvus doc on using search modes. ## Test Plan Signed-off-by: Varsha Prasad Narsing --- .../providers/vector_io/remote_milvus.md | 87 +++++++++++++++++++ llama_stack/providers/registry/vector_io.py | 87 +++++++++++++++++++ 2 files changed, 174 insertions(+) diff --git a/docs/source/providers/vector_io/remote_milvus.md b/docs/source/providers/vector_io/remote_milvus.md index 3646f4acc..2af64b8bb 100644 --- a/docs/source/providers/vector_io/remote_milvus.md +++ b/docs/source/providers/vector_io/remote_milvus.md @@ -11,6 +11,7 @@ That means you're not limited to storing vectors in memory or in a separate serv - Easy to use - Fully integrated with Llama Stack +- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations) ## Usage @@ -101,6 +102,92 @@ vector_io: - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS). - **`client_key_path`**: Path to the **client private key** file (required for mTLS). +## Search Modes + +Milvus supports three different search modes for both inline and remote configurations: + +### Vector Search +Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content. + +```python +# Vector search example +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="What is machine learning?", + search_mode="vector", + max_num_results=5, +) +``` + +### Keyword Search +Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches. + +```python +# Keyword search example +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="Python programming language", + search_mode="keyword", + max_num_results=5, +) +``` + +### Hybrid Search +Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching. + +#### Basic Hybrid Search +```python +# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0) +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="neural networks in Python", + search_mode="hybrid", + max_num_results=5, +) +``` + +**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009). + +#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker +RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results. + +```python +# Hybrid search with custom RRF parameters +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="neural networks in Python", + search_mode="hybrid", + max_num_results=5, + ranking_options={ + "ranker": { + "type": "rrf", + "impact_factor": 100.0, # Higher values give more weight to top-ranked results + } + }, +) +``` + +#### Hybrid Search with Weighted Ranker +Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods. + +```python +# Hybrid search with weighted ranker +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="neural networks in Python", + search_mode="hybrid", + max_num_results=5, + ranking_options={ + "ranker": { + "type": "weighted", + "alpha": 0.7, # 70% vector search, 30% keyword search + } + }, +) +``` + +For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md). + ## Documentation See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general. diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py index 846f7b88e..b4f3ab6ac 100644 --- a/llama_stack/providers/registry/vector_io.py +++ b/llama_stack/providers/registry/vector_io.py @@ -535,6 +535,7 @@ That means you're not limited to storing vectors in memory or in a separate serv - Easy to use - Fully integrated with Llama Stack +- Supports all search modes: vector, keyword, and hybrid search (both inline and remote configurations) ## Usage @@ -625,6 +626,92 @@ vector_io: - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS). - **`client_key_path`**: Path to the **client private key** file (required for mTLS). +## Search Modes + +Milvus supports three different search modes for both inline and remote configurations: + +### Vector Search +Vector search uses semantic similarity to find the most relevant chunks based on embedding vectors. This is the default search mode and works well for finding conceptually similar content. + +```python +# Vector search example +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="What is machine learning?", + search_mode="vector", + max_num_results=5, +) +``` + +### Keyword Search +Keyword search uses traditional text-based matching to find chunks containing specific terms or phrases. This is useful when you need exact term matches. + +```python +# Keyword search example +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="Python programming language", + search_mode="keyword", + max_num_results=5, +) +``` + +### Hybrid Search +Hybrid search combines both vector and keyword search methods to provide more comprehensive results. It leverages the strengths of both semantic similarity and exact term matching. + +#### Basic Hybrid Search +```python +# Basic hybrid search example (uses RRF ranker with default impact_factor=60.0) +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="neural networks in Python", + search_mode="hybrid", + max_num_results=5, +) +``` + +**Note**: The default `impact_factor` value of 60.0 was empirically determined to be optimal in the original RRF research paper: ["Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning Methods"](https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf) (Cormack et al., 2009). + +#### Hybrid Search with RRF (Reciprocal Rank Fusion) Ranker +RRF combines rankings from vector and keyword search by using reciprocal ranks. The impact factor controls how much weight is given to higher-ranked results. + +```python +# Hybrid search with custom RRF parameters +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="neural networks in Python", + search_mode="hybrid", + max_num_results=5, + ranking_options={ + "ranker": { + "type": "rrf", + "impact_factor": 100.0, # Higher values give more weight to top-ranked results + } + }, +) +``` + +#### Hybrid Search with Weighted Ranker +Weighted ranker linearly combines normalized scores from vector and keyword search. The alpha parameter controls the balance between the two search methods. + +```python +# Hybrid search with weighted ranker +search_response = client.vector_stores.search( + vector_store_id=vector_store.id, + query="neural networks in Python", + search_mode="hybrid", + max_num_results=5, + ranking_options={ + "ranker": { + "type": "weighted", + "alpha": 0.7, # 70% vector search, 30% keyword search + } + }, +) +``` + +For detailed documentation on RRF and Weighted rankers, please refer to the [Milvus Reranking Guide](https://milvus.io/docs/reranking.md). + ## Documentation See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general.