fix: sqlite_vec keyword implementation

Signed-off-by: Varsha Prasad Narsing <varshaprasad96@gmail.com>
This commit is contained in:
Varsha Prasad Narsing 2025-05-07 16:05:25 -07:00
parent e2a7022d3c
commit 2060fdba7f
14 changed files with 146 additions and 101 deletions

View file

@ -98,7 +98,7 @@ async def test_qdrant_adapter_returns_expected_chunks(
response = await qdrant_adapter.query_chunks(
query=__QUERY,
vector_db_id=vector_db_id,
params={"max_chunks": max_query_chunks},
params={"max_chunks": max_query_chunks, "mode": "vector"},
)
assert isinstance(response, QueryChunksResponse)
assert len(response.chunks) == expected_chunks

View file

@ -60,7 +60,7 @@ async def test_add_chunks(sqlite_vec_index, sample_chunks, sample_embeddings):
async def test_query_chunks_vector(sqlite_vec_index, sample_chunks, sample_embeddings, embedding_dimension):
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
query_embedding = np.random.rand(embedding_dimension).astype(np.float32)
response = await sqlite_vec_index.query(query_embedding, query_string="", k=2, score_threshold=0.0, mode="vector")
response = await sqlite_vec_index.query_vector(query_embedding, k=2, score_threshold=0.0)
assert isinstance(response, QueryChunksResponse)
assert len(response.chunks) == 2
@ -70,16 +70,14 @@ async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sa
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
query_string = "Sentence 5"
response = await sqlite_vec_index.query(
embedding=None, k=3, score_threshold=0.0, query_string=query_string, mode="keyword"
)
response = await sqlite_vec_index.query_keyword(k=3, score_threshold=0.0, query_string=query_string)
assert isinstance(response, QueryChunksResponse)
assert len(response.chunks) == 3, f"Expected at least one result, but got {len(response.chunks)}"
assert len(response.chunks) == 3, f"Expected three chunks, but got {len(response.chunks)}"
non_existent_query_str = "blablabla"
response_no_results = await sqlite_vec_index.query(
embedding=None, query_string=non_existent_query_str, k=1, score_threshold=0.0, mode="keyword"
response_no_results = await sqlite_vec_index.query_keyword(
query_string=non_existent_query_str, k=1, score_threshold=0.0
)
assert isinstance(response_no_results, QueryChunksResponse)
@ -92,12 +90,10 @@ async def test_query_chunks_full_text_search_k_greater_than_results(sqlite_vec_i
await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
query_str = "Sentence 1 from document 0" # Should match only one chunk
response = await sqlite_vec_index.query(
embedding=None, k=5, score_threshold=0.0, query_string=query_str, mode="keyword"
)
response = await sqlite_vec_index.query_keyword(k=5, score_threshold=0.0, query_string=query_str)
assert isinstance(response, QueryChunksResponse)
assert 0 < len(response.chunks) < 5, f"Expected <5 results but >0, got {len(response.chunks)}"
assert 0 < len(response.chunks) < 5, f"Expected results between [1, 4], got {len(response.chunks)}"
assert any("Sentence 1 from document 0" in chunk.content for chunk in response.chunks), "Expected chunk not found"