chore: Updating Milvus Client calls to be non-blocking (#1830)

# What does this PR do?
This PR converts blocking Milvus Client calls to non-blocking.

Another one for https://github.com/meta-llama/llama-stack/issues/1489

## Test Plan

I ran the integration tests from
https://github.com/meta-llama/llama-stack/pull/1467 with:
```python
pytest -s -v tests/integration/vector_io/test_vector_io.py \
  --stack-config inference=sentence-transformers,vector_io=inline::milvus \
  --embedding-model all-miniLM-L6-V2  --env MILVUS_DB_PATH=/tmp/moo.db

INFO     2025-03-28 21:35:22,726 tests.integration.conftest:41 tests: Setting DISABLE_CODE_SANDBOX=1 for macOS          
/Users/farceo/dev/llama-stack/.venv/lib/python3.10/site-packages/pytest_asyncio/plugin.py:207: PytestDeprecationWarning: The configuration option "asyncio_default_fixture_loop_scope" is unset.
The event loop scope for asynchronous fixtures will default to the fixture caching scope. Future versions of pytest-asyncio will default the loop scope for asynchronous fixtures to function scope. Set the default fixture loop scope explicitly in order to avoid unexpected behavior in the future. Valid fixture loop scopes are: "function", "class", "module", "package", "session"

  warnings.warn(PytestDeprecationWarning(_DEFAULT_FIXTURE_LOOP_SCOPE_UNSET))
=============================================================================================================================================================================================================================================================== test session starts ===============================================================================================================================================================================================================================================================
platform darwin -- Python 3.10.16, pytest-8.3.4, pluggy-1.5.0 -- /Users/farceo/dev/llama-stack/.venv/bin/python3
cachedir: .pytest_cache
metadata: {'Python': '3.10.16', 'Platform': 'macOS-15.3.1-arm64-arm-64bit', 'Packages': {'pytest': '8.3.4', 'pluggy': '1.5.0'}, 'Plugins': {'cov': '6.0.0', 'html': '4.1.1', 'metadata': '3.1.1', 'asyncio': '0.25.3', 'anyio': '4.8.0', 'nbval': '0.11.0'}}
rootdir: /Users/farceo/dev/llama-stack
configfile: pyproject.toml
plugins: cov-6.0.0, html-4.1.1, metadata-3.1.1, asyncio-0.25.3, anyio-4.8.0, nbval-0.11.0
asyncio: mode=strict, asyncio_default_fixture_loop_scope=None
collected 7 items                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 

tests/integration/vector_io/test_vector_io.py::test_vector_db_retrieve[emb=all-miniLM-L6-V2] PASSED
tests/integration/vector_io/test_vector_io.py::test_vector_db_register[emb=all-miniLM-L6-V2] PASSED
tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=all-miniLM-L6-V2-test_case0] PASSED
tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=all-miniLM-L6-V2-test_case1] PASSED
tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=all-miniLM-L6-V2-test_case2] PASSED
tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=all-miniLM-L6-V2-test_case3] PASSED
tests/integration/vector_io/test_vector_io.py::test_insert_chunks[emb=all-miniLM-L6-V2-test_case4] PASSED

========================================================================================================================================================================================================================================================= 7 passed, 2 warnings in 40.33s ==========================================================================================================================================================================================================================================================
```

[//]: # (## Documentation)

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
This commit is contained in:
Francisco Arceo 2025-03-28 20:14:07 -06:00 committed by GitHub
parent daa34909a0
commit 74a2584cdb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import asyncio
import hashlib import hashlib
import logging import logging
import os import os
@ -35,15 +36,16 @@ class MilvusIndex(EmbeddingIndex):
self.consistency_level = consistency_level self.consistency_level = consistency_level
async def delete(self): async def delete(self):
if self.client.has_collection(self.collection_name): if await asyncio.to_thread(self.client.has_collection, self.collection_name):
self.client.drop_collection(collection_name=self.collection_name) await asyncio.to_thread(self.client.drop_collection, collection_name=self.collection_name)
async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray): async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray):
assert len(chunks) == len(embeddings), ( assert len(chunks) == len(embeddings), (
f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}" f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
) )
if not self.client.has_collection(self.collection_name): if not await asyncio.to_thread(self.client.has_collection, self.collection_name):
self.client.create_collection( await asyncio.to_thread(
self.client.create_collection,
self.collection_name, self.collection_name,
dimension=len(embeddings[0]), dimension=len(embeddings[0]),
auto_id=True, auto_id=True,
@ -62,7 +64,8 @@ class MilvusIndex(EmbeddingIndex):
} }
) )
try: try:
self.client.insert( await asyncio.to_thread(
self.client.insert,
self.collection_name, self.collection_name,
data=data, data=data,
) )
@ -71,7 +74,8 @@ class MilvusIndex(EmbeddingIndex):
raise e raise e
async def query(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse: async def query(self, embedding: NDArray, k: int, score_threshold: float) -> QueryChunksResponse:
search_res = self.client.search( search_res = await asyncio.to_thread(
self.client.search,
collection_name=self.collection_name, collection_name=self.collection_name,
data=[embedding], data=[embedding],
limit=k, limit=k,