diff --git a/docs/source/providers/vector_io/remote_milvus.md b/docs/source/providers/vector_io/remote_milvus.md index b81716192..9017f0e22 100644 --- a/docs/source/providers/vector_io/remote_milvus.md +++ b/docs/source/providers/vector_io/remote_milvus.md @@ -101,15 +101,6 @@ vector_io: - **`client_pem_path`**: Path to the **client certificate** file (required for mTLS). - **`client_key_path`**: Path to the **client private key** file (required for mTLS). -## Supported Search Modes - -The Milvus provider supports both vector-based and keyword-based (full-text) search modes, but with some limitations: - -- Remote Milvus supports both vector-based and keyword-based search modes. -- Inline Milvus (Milvus-Lite) only supports vector-based search. Keyword search is not supported as Milvus-Lite has not implemented this functionality yet. For updates on this feature, see [Milvus GitHub Issue #40848](https://github.com/milvus-io/milvus/issues/40848). - -When using the RAGTool interface, you can specify the desired search behavior via the `mode` parameter in `RAGQueryConfig`. For more details on Milvus's implementation of keyword search modes, refer to the [Milvus documentation](https://milvus.io/docs/full_text_search_with_milvus.md). - ## Documentation See the [Milvus documentation](https://milvus.io/docs/install-overview.md) for more details about Milvus in general. diff --git a/llama_stack/providers/remote/vector_io/milvus/milvus.py b/llama_stack/providers/remote/vector_io/milvus/milvus.py index 387e22382..32f904822 100644 --- a/llama_stack/providers/remote/vector_io/milvus/milvus.py +++ b/llama_stack/providers/remote/vector_io/milvus/milvus.py @@ -12,7 +12,7 @@ import re from typing import Any from numpy.typing import NDArray -from pymilvus import DataType, MilvusClient +from pymilvus import DataType, Function, FunctionType, MilvusClient from llama_stack.apis.files.files import Files from llama_stack.apis.inference import Inference, InterleavedContent @@ -108,7 +108,6 @@ class MilvusIndex(EmbeddingIndex): ) # Add BM25 function for full-text search - from pymilvus import Function, FunctionType bm25_function = Function( name="text_bm25_emb", input_field_names=["content"], @@ -170,7 +169,7 @@ class MilvusIndex(EmbeddingIndex): Perform BM25-based keyword search using Milvus's built-in full-text search. """ try: - from pymilvus import Function, FunctionType + # Use Milvus's built-in BM25 search search_res = await asyncio.to_thread( self.client.search, collection_name=self.collection_name, @@ -184,24 +183,46 @@ class MilvusIndex(EmbeddingIndex): } }, ) + chunks = [] scores = [] for res in search_res[0]: chunk = Chunk(**res["entity"]["chunk_content"]) chunks.append(chunk) scores.append(res["distance"]) # BM25 score from Milvus + # Filter by score threshold - filtered_results = [(chunk, score) for chunk, score in zip(chunks, scores, strict=False) if score >= score_threshold] - if filtered_results: - chunks, scores = zip(*filtered_results, strict=False) - return QueryChunksResponse(chunks=list(chunks), scores=list(scores)) - else: - return QueryChunksResponse(chunks=[], scores=[]) + filtered_chunks = [chunk for chunk, score in zip(chunks, scores, strict=False) if score >= score_threshold] + filtered_scores = [score for score in scores if score >= score_threshold] + + return QueryChunksResponse(chunks=filtered_chunks, scores=filtered_scores) + except Exception as e: logger.error(f"Error performing BM25 search: {e}") # Fallback to simple text search return await self._fallback_keyword_search(query_string, k, score_threshold) + async def _fallback_keyword_search( + self, + query_string: str, + k: int, + score_threshold: float, + ) -> QueryChunksResponse: + """ + Fallback to simple text search when BM25 search is not available. + """ + # Simple text search using content field + search_res = await asyncio.to_thread( + self.client.query, + collection_name=self.collection_name, + filter=f'content like "%{query_string}%"', + output_fields=["*"], + limit=k, + ) + chunks = [Chunk(**res["chunk_content"]) for res in search_res] + scores = [1.0] * len(chunks) # Simple binary score for text search + return QueryChunksResponse(chunks=chunks, scores=scores) + async def query_hybrid( self, embedding: NDArray, diff --git a/tests/unit/providers/vector_io/remote/test_milvus.py b/tests/unit/providers/vector_io/remote/test_milvus.py index cc2f96ff3..8e4366b99 100644 --- a/tests/unit/providers/vector_io/remote/test_milvus.py +++ b/tests/unit/providers/vector_io/remote/test_milvus.py @@ -132,41 +132,52 @@ async def test_query_chunks_keyword_search(milvus_index, sample_chunks, sample_e # Test keyword search query_string = "Sentence 5" - response = await milvus_index.query_keyword(query_string=query_string, k=3, score_threshold=0.0) + response = await milvus_index.query_keyword(query_string=query_string, k=2, score_threshold=0.0) assert isinstance(response, QueryChunksResponse) - assert len(response.chunks) == 3 - mock_milvus_client.query.assert_called_once() - - # Test no results case - mock_milvus_client.query.return_value = [] - response_no_results = await milvus_index.query_keyword(query_string="nonexistent", k=1, score_threshold=0.0) - - assert isinstance(response_no_results, QueryChunksResponse) - assert len(response_no_results.chunks) == 0 + assert len(response.chunks) == 2 @pytest.mark.asyncio -async def test_query_chunks_keyword_search_k_greater_than_results( - milvus_index, sample_chunks, sample_embeddings, mock_milvus_client -): +async def test_bm25_fallback_to_simple_search(milvus_index, sample_chunks, sample_embeddings, mock_milvus_client): + """Test that when BM25 search fails, the system falls back to simple text search.""" mock_milvus_client.has_collection.return_value = True await milvus_index.add_chunks(sample_chunks, sample_embeddings) - # Mock returning only 1 result even though k=5 + # Force BM25 search to fail + mock_milvus_client.search.side_effect = Exception("BM25 search not available") + + # Mock simple text search results mock_milvus_client.query.return_value = [ { "chunk_id": "chunk1", - "chunk_content": {"content": "Sentence 1 from document 0", "metadata": {"document_id": "doc1"}}, - "score": 0.9, - } + "chunk_content": {"content": "Python programming language", "metadata": {"document_id": "doc1"}}, + }, + { + "chunk_id": "chunk2", + "chunk_content": {"content": "Machine learning algorithms", "metadata": {"document_id": "doc2"}}, + }, ] - query_str = "Sentence 1 from document 0" - response = await milvus_index.query_keyword(query_string=query_str, k=5, score_threshold=0.0) + # Test keyword search that should fall back to simple text search + query_string = "Python" + response = await milvus_index.query_keyword(query_string=query_string, k=3, score_threshold=0.0) - assert 0 < len(response.chunks) <= 4 - assert any("Sentence 1 from document 0" in chunk.content for chunk in response.chunks) + # Verify response structure + assert isinstance(response, QueryChunksResponse) + assert len(response.chunks) > 0, "Fallback search should return results" + + # Verify that simple text search was used (query method called instead of search) + mock_milvus_client.query.assert_called_once() + mock_milvus_client.search.assert_called_once() # Called once but failed + + # Verify the query filter contains the search term + query_call_args = mock_milvus_client.query.call_args + assert "filter" in query_call_args[1], "Query should include filter for text search" + assert "Python" in query_call_args[1]["filter"], "Filter should contain the search term" + + # Verify all returned chunks have score 1.0 (simple binary scoring) + assert all(score == 1.0 for score in response.scores), "Simple text search should use binary scoring" @pytest.mark.asyncio