mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
[Feat] Implement keyword search in Qdrant
This commit implements keyword search in Qdrant. Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
This commit is contained in:
parent
ef02b9ea10
commit
21211e8f67
4 changed files with 156 additions and 23 deletions
|
@ -145,3 +145,89 @@ async def test_qdrant_register_and_unregister_vector_db(
|
|||
await qdrant_adapter.unregister_vector_db(vector_db_id)
|
||||
assert not (await qdrant_adapter.client.collection_exists(vector_db_id))
|
||||
assert len((await qdrant_adapter.client.get_collections()).collections) == 0
|
||||
|
||||
|
||||
# Keyword search tests
|
||||
async def test_query_chunks_keyword_search(qdrant_vec_index, sample_chunks, sample_embeddings):
|
||||
"""Test keyword search functionality in Qdrant."""
|
||||
await qdrant_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
query_string = "Sentence 5"
|
||||
response = await qdrant_vec_index.query_keyword(query_string=query_string, k=3, score_threshold=0.0)
|
||||
|
||||
assert isinstance(response, QueryChunksResponse)
|
||||
assert len(response.chunks) > 0, f"Expected some chunks, but got {len(response.chunks)}"
|
||||
|
||||
non_existent_query_str = "blablabla"
|
||||
response_no_results = await qdrant_vec_index.query_keyword(
|
||||
query_string=non_existent_query_str, k=1, score_threshold=0.0
|
||||
)
|
||||
|
||||
assert isinstance(response_no_results, QueryChunksResponse)
|
||||
assert len(response_no_results.chunks) == 0, f"Expected 0 results, but got {len(response_no_results.chunks)}"
|
||||
|
||||
|
||||
async def test_query_chunks_keyword_search_k_greater_than_results(qdrant_vec_index, sample_chunks, sample_embeddings):
|
||||
"""Test keyword search when k is greater than available results."""
|
||||
await qdrant_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
|
||||
query_str = "Sentence 1 from document 0" # Should match only one chunk
|
||||
response = await qdrant_vec_index.query_keyword(k=5, score_threshold=0.0, query_string=query_str)
|
||||
|
||||
assert isinstance(response, QueryChunksResponse)
|
||||
assert 0 < len(response.chunks) < 5, f"Expected results between [1, 4], got {len(response.chunks)}"
|
||||
assert any("Sentence 1 from document 0" in chunk.content for chunk in response.chunks), "Expected chunk not found"
|
||||
|
||||
|
||||
async def test_query_chunks_keyword_search_score_threshold(qdrant_vec_index, sample_chunks, sample_embeddings):
|
||||
"""Test keyword search with score threshold filtering."""
|
||||
await qdrant_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
|
||||
query_string = "Sentence 5"
|
||||
|
||||
# Test with low threshold (should return results)
|
||||
response_low_threshold = await qdrant_vec_index.query_keyword(query_string=query_string, k=3, score_threshold=0.0)
|
||||
assert len(response_low_threshold.chunks) > 0
|
||||
|
||||
# Test with negative threshold (should return results since scores are 0.0)
|
||||
response_negative_threshold = await qdrant_vec_index.query_keyword(
|
||||
query_string=query_string, k=3, score_threshold=-1.0
|
||||
)
|
||||
assert len(response_negative_threshold.chunks) > 0
|
||||
|
||||
|
||||
async def test_query_chunks_keyword_search_edge_cases(qdrant_vec_index, sample_chunks, sample_embeddings):
|
||||
"""Test keyword search edge cases."""
|
||||
await qdrant_vec_index.add_chunks(sample_chunks, sample_embeddings)
|
||||
|
||||
# Test with empty string
|
||||
response_empty = await qdrant_vec_index.query_keyword(query_string="", k=3, score_threshold=0.0)
|
||||
assert isinstance(response_empty, QueryChunksResponse)
|
||||
|
||||
# Test with very long query string
|
||||
long_query = "a" * 1000
|
||||
response_long = await qdrant_vec_index.query_keyword(query_string=long_query, k=3, score_threshold=0.0)
|
||||
assert isinstance(response_long, QueryChunksResponse)
|
||||
|
||||
# Test with special characters
|
||||
special_query = "!@#$%^&*()_+-=[]{}|;':\",./<>?"
|
||||
response_special = await qdrant_vec_index.query_keyword(query_string=special_query, k=3, score_threshold=0.0)
|
||||
assert isinstance(response_special, QueryChunksResponse)
|
||||
|
||||
|
||||
async def test_query_chunks_keyword_search_metadata_preservation(
|
||||
qdrant_vec_index, sample_chunks_with_metadata, sample_embeddings_with_metadata
|
||||
):
|
||||
"""Test that keyword search preserves chunk metadata."""
|
||||
await qdrant_vec_index.add_chunks(sample_chunks_with_metadata, sample_embeddings_with_metadata)
|
||||
|
||||
query_string = "Sentence 0"
|
||||
response = await qdrant_vec_index.query_keyword(query_string=query_string, k=2, score_threshold=0.0)
|
||||
|
||||
assert len(response.chunks) > 0
|
||||
for chunk in response.chunks:
|
||||
# Check that metadata is preserved
|
||||
assert hasattr(chunk, "metadata") or hasattr(chunk, "chunk_metadata")
|
||||
if hasattr(chunk, "chunk_metadata") and chunk.chunk_metadata:
|
||||
assert chunk.chunk_metadata.document_id is not None
|
||||
assert chunk.chunk_metadata.chunk_id is not None
|
||||
assert chunk.chunk_metadata.source is not None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue