Merge branch 'main' into opengauss-add

2025-12-25 19:38:05 +00:00 · 2025-08-08 20:58:48 +08:00 · 2025-08-08 20:58:48 +08:00 · 39e49ab97a
commit 39e49ab97a
parent 5e9c394500 9e78f2da96
807 changed files with 79555 additions and 26772 deletions
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@ -8,20 +8,32 @@ import random

 import numpy as np
 import pytest
+from chromadb import PersistentClient
 from pymilvus import MilvusClient, connections

 from llama_stack.apis.vector_dbs import VectorDB
 from llama_stack.apis.vector_io import Chunk, ChunkMetadata
+from llama_stack.providers.inline.vector_io.chroma.config import ChromaVectorIOConfig
+from llama_stack.providers.inline.vector_io.faiss.config import FaissVectorIOConfig
+from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, FaissVectorIOAdapter
 from llama_stack.providers.inline.vector_io.milvus.config import MilvusVectorIOConfig, SqliteKVStoreConfig
+from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter
+from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex, ChromaVectorIOAdapter, maybe_await
 from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusIndex, MilvusVectorIOAdapter
+from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter

 EMBEDDING_DIMENSION = 384
 COLLECTION_PREFIX = "test_collection"
 MILVUS_ALIAS = "test_milvus"


+@pytest.fixture(params=["milvus", "sqlite_vec", "faiss", "chroma"])
+def vector_provider(request):
+    return request.param
+
+
@pytest.fixture
 def vector_db_id() -> str:
    return f"test-vector-db-{random.randint(1, 100)}"
@ -90,11 +102,6 @@ def sample_embeddings_with_metadata(sample_chunks_with_metadata):
    return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks_with_metadata])


-@pytest.fixture(params=["milvus", "sqlite_vec"])
-def vector_provider(request):
-    return request.param
-
-
@pytest.fixture(scope="session")
 def mock_inference_api(embedding_dimension):
    class MockInferenceAPI:
@ -116,7 +123,7 @@ async def unique_kvstore_config(tmp_path_factory):

@pytest.fixture(scope="session")
 def sqlite_vec_db_path(tmp_path_factory):
-    db_path = str(tmp_path_factory.getbasetemp() / "test.db")
+    db_path = str(tmp_path_factory.getbasetemp() / "test_sqlite_vec.db")
    return db_path


@ -198,13 +205,145 @@ async def milvus_vec_adapter(milvus_vec_db_path, mock_inference_api):
    await adapter.shutdown()


+@pytest.fixture
+def faiss_vec_db_path(tmp_path_factory):
+    db_path = str(tmp_path_factory.getbasetemp() / "test_faiss.db")
+    return db_path
+
+
+@pytest.fixture
+async def faiss_vec_index(embedding_dimension):
+    index = FaissIndex(embedding_dimension)
+    yield index
+    await index.delete()
+
+
+@pytest.fixture
+async def faiss_vec_adapter(unique_kvstore_config, mock_inference_api, embedding_dimension):
+    config = FaissVectorIOConfig(
+        kvstore=unique_kvstore_config,
+    )
+    adapter = FaissVectorIOAdapter(
+        config=config,
+        inference_api=mock_inference_api,
+        files_api=None,
+    )
+    await adapter.initialize()
+    await adapter.register_vector_db(
+        VectorDB(
+            identifier=f"faiss_test_collection_{np.random.randint(1e6)}",
+            provider_id="test_provider",
+            embedding_model="test_model",
+            embedding_dimension=embedding_dimension,
+        )
+    )
+    yield adapter
+    await adapter.shutdown()
+
+
+@pytest.fixture
+def chroma_vec_db_path(tmp_path_factory):
+    persist_dir = tmp_path_factory.mktemp(f"chroma_{np.random.randint(1e6)}")
+    return str(persist_dir)
+
+
+@pytest.fixture
+async def chroma_vec_index(chroma_vec_db_path, embedding_dimension):
+    client = PersistentClient(path=chroma_vec_db_path)
+    name = f"{COLLECTION_PREFIX}_{np.random.randint(1e6)}"
+    collection = await maybe_await(client.get_or_create_collection(name))
+    index = ChromaIndex(client=client, collection=collection)
+    await index.initialize()
+    yield index
+    await index.delete()
+
+
+@pytest.fixture
+async def chroma_vec_adapter(chroma_vec_db_path, mock_inference_api, embedding_dimension):
+    config = ChromaVectorIOConfig(
+        db_path=chroma_vec_db_path,
+        kvstore=SqliteKVStoreConfig(),
+    )
+    adapter = ChromaVectorIOAdapter(
+        config=config,
+        inference_api=mock_inference_api,
+        files_api=None,
+    )
+    await adapter.initialize()
+    await adapter.register_vector_db(
+        VectorDB(
+            identifier=f"chroma_test_collection_{random.randint(1, 1_000_000)}",
+            provider_id="test_provider",
+            embedding_model="test_model",
+            embedding_dimension=embedding_dimension,
+        )
+    )
+    yield adapter
+    await adapter.shutdown()
+
+
+@pytest.fixture
+def qdrant_vec_db_path(tmp_path_factory):
+    import uuid
+
+    db_path = str(tmp_path_factory.getbasetemp() / f"test_qdrant_{uuid.uuid4()}.db")
+    return db_path
+
+
+@pytest.fixture
+async def qdrant_vec_adapter(qdrant_vec_db_path, mock_inference_api, embedding_dimension):
+    import uuid
+
+    config = QdrantVectorIOConfig(
+        db_path=qdrant_vec_db_path,
+        kvstore=SqliteKVStoreConfig(),
+    )
+    adapter = QdrantVectorIOAdapter(
+        config=config,
+        inference_api=mock_inference_api,
+        files_api=None,
+    )
+    collection_id = f"qdrant_test_collection_{uuid.uuid4()}"
+    await adapter.initialize()
+    await adapter.register_vector_db(
+        VectorDB(
+            identifier=collection_id,
+            provider_id="test_provider",
+            embedding_model="test_model",
+            embedding_dimension=embedding_dimension,
+        )
+    )
+    adapter.test_collection_id = collection_id
+    yield adapter
+    await adapter.shutdown()
+
+
+@pytest.fixture
+async def qdrant_vec_index(qdrant_vec_db_path, embedding_dimension):
+    import uuid
+
+    from qdrant_client import AsyncQdrantClient
+
+    from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantIndex
+
+    client = AsyncQdrantClient(path=qdrant_vec_db_path)
+    collection_name = f"qdrant_test_collection_{uuid.uuid4()}"
+    index = QdrantIndex(client, collection_name)
+    yield index
+    await index.delete()
+
+
@pytest.fixture
 def vector_io_adapter(vector_provider, request):
    """Returns the appropriate vector IO adapter based on the provider parameter."""
-    if vector_provider == "milvus":
-        return request.getfixturevalue("milvus_vec_adapter")
-    else:
-        return request.getfixturevalue("sqlite_vec_adapter")
+    vector_provider_dict = {
+        "milvus": "milvus_vec_adapter",
+        "faiss": "faiss_vec_adapter",
+        "sqlite_vec": "sqlite_vec_adapter",
+        "chroma": "chroma_vec_adapter",
+        "qdrant": "qdrant_vec_adapter",
+    }
+    return request.getfixturevalue(vector_provider_dict[vector_provider])


@pytest.fixture
--- a/tests/unit/providers/vector_io/remote/test_milvus.py
+++ b/tests/unit/providers/vector_io/remote/test_milvus.py
@ -0,0 +1,326 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from unittest.mock import MagicMock, patch
+
+import numpy as np
+import pytest
+
+from llama_stack.apis.vector_io import QueryChunksResponse
+
+# Mock the entire pymilvus module
+pymilvus_mock = MagicMock()
+pymilvus_mock.DataType = MagicMock()
+pymilvus_mock.MilvusClient = MagicMock
+pymilvus_mock.RRFRanker = MagicMock
+pymilvus_mock.WeightedRanker = MagicMock
+pymilvus_mock.AnnSearchRequest = MagicMock
+
+# Apply the mock before importing MilvusIndex
+with patch.dict("sys.modules", {"pymilvus": pymilvus_mock}):
+    from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusIndex
+
+# This test is a unit test for the MilvusVectorIOAdapter class. This should only contain
+# tests which are specific to this class. More general (API-level) tests should be placed in
+# tests/integration/vector_io/
+#
+# How to run this test:
+#
+# pytest tests/unit/providers/vector_io/test_milvus.py \
+# -v -s --tb=short --disable-warnings --asyncio-mode=auto
+
+MILVUS_PROVIDER = "milvus"
+
+
+@pytest.fixture
+async def mock_milvus_client() -> MagicMock:
+    """Create a mock Milvus client with common method behaviors."""
+    client = MagicMock()
+
+    # Mock collection operations
+    client.has_collection.return_value = False  # Initially no collection
+    client.create_collection.return_value = None
+    client.drop_collection.return_value = None
+
+    # Mock insert operation
+    client.insert.return_value = {"insert_count": 10}
+
+    # Mock search operation - return mock results (data should be dict, not JSON string)
+    client.search.return_value = [
+        [
+            {
+                "id": 0,
+                "distance": 0.1,
+                "entity": {"chunk_content": {"content": "mock chunk 1", "metadata": {"document_id": "doc1"}}},
+            },
+            {
+                "id": 1,
+                "distance": 0.2,
+                "entity": {"chunk_content": {"content": "mock chunk 2", "metadata": {"document_id": "doc2"}}},
+            },
+        ]
+    ]
+
+    # Mock query operation for keyword search (data should be dict, not JSON string)
+    client.query.return_value = [
+        {
+            "chunk_id": "chunk1",
+            "chunk_content": {"content": "mock chunk 1", "metadata": {"document_id": "doc1"}},
+            "score": 0.9,
+        },
+        {
+            "chunk_id": "chunk2",
+            "chunk_content": {"content": "mock chunk 2", "metadata": {"document_id": "doc2"}},
+            "score": 0.8,
+        },
+        {
+            "chunk_id": "chunk3",
+            "chunk_content": {"content": "mock chunk 3", "metadata": {"document_id": "doc3"}},
+            "score": 0.7,
+        },
+    ]
+
+    return client
+
+
+@pytest.fixture
+async def milvus_index(mock_milvus_client):
+    """Create a MilvusIndex with mocked client."""
+    index = MilvusIndex(client=mock_milvus_client, collection_name="test_collection")
+    yield index
+    # No real cleanup needed since we're using mocks
+
+
+async def test_add_chunks(milvus_index, sample_chunks, sample_embeddings, mock_milvus_client):
+    # Setup: collection doesn't exist initially, then exists after creation
+    mock_milvus_client.has_collection.side_effect = [False, True]
+
+    await milvus_index.add_chunks(sample_chunks, sample_embeddings)
+
+    # Verify collection was created and data was inserted
+    mock_milvus_client.create_collection.assert_called_once()
+    mock_milvus_client.insert.assert_called_once()
+
+    # Verify the insert call had the right number of chunks
+    insert_call = mock_milvus_client.insert.call_args
+    assert len(insert_call[1]["data"]) == len(sample_chunks)
+
+
+async def test_query_chunks_vector(
+    milvus_index, sample_chunks, sample_embeddings, embedding_dimension, mock_milvus_client
+):
+    # Setup: Add chunks first
+    mock_milvus_client.has_collection.return_value = True
+    await milvus_index.add_chunks(sample_chunks, sample_embeddings)
+
+    # Test vector search
+    query_embedding = np.random.rand(embedding_dimension).astype(np.float32)
+    response = await milvus_index.query_vector(query_embedding, k=2, score_threshold=0.0)
+
+    assert isinstance(response, QueryChunksResponse)
+    assert len(response.chunks) == 2
+    mock_milvus_client.search.assert_called_once()
+
+
+async def test_query_chunks_keyword_search(milvus_index, sample_chunks, sample_embeddings, mock_milvus_client):
+    mock_milvus_client.has_collection.return_value = True
+    await milvus_index.add_chunks(sample_chunks, sample_embeddings)
+
+    # Test keyword search
+    query_string = "Sentence 5"
+    response = await milvus_index.query_keyword(query_string=query_string, k=2, score_threshold=0.0)
+
+    assert isinstance(response, QueryChunksResponse)
+    assert len(response.chunks) == 2
+
+
+async def test_bm25_fallback_to_simple_search(milvus_index, sample_chunks, sample_embeddings, mock_milvus_client):
+    """Test that when BM25 search fails, the system falls back to simple text search."""
+    mock_milvus_client.has_collection.return_value = True
+    await milvus_index.add_chunks(sample_chunks, sample_embeddings)
+
+    # Force BM25 search to fail
+    mock_milvus_client.search.side_effect = Exception("BM25 search not available")
+
+    # Mock simple text search results
+    mock_milvus_client.query.return_value = [
+        {
+            "chunk_id": "chunk1",
+            "chunk_content": {"content": "Python programming language", "metadata": {"document_id": "doc1"}},
+        },
+        {
+            "chunk_id": "chunk2",
+            "chunk_content": {"content": "Machine learning algorithms", "metadata": {"document_id": "doc2"}},
+        },
+    ]
+
+    # Test keyword search that should fall back to simple text search
+    query_string = "Python"
+    response = await milvus_index.query_keyword(query_string=query_string, k=3, score_threshold=0.0)
+
+    # Verify response structure
+    assert isinstance(response, QueryChunksResponse)
+    assert len(response.chunks) > 0, "Fallback search should return results"
+
+    # Verify that simple text search was used (query method called instead of search)
+    mock_milvus_client.query.assert_called_once()
+    mock_milvus_client.search.assert_called_once()  # Called once but failed
+
+    # Verify the query uses parameterized filter with filter_params
+    query_call_args = mock_milvus_client.query.call_args
+    assert "filter" in query_call_args[1], "Query should include filter for text search"
+    assert "filter_params" in query_call_args[1], "Query should use parameterized filter"
+    assert query_call_args[1]["filter_params"]["content"] == "Python", "Filter params should contain the search term"
+
+    # Verify all returned chunks have score 1.0 (simple binary scoring)
+    assert all(score == 1.0 for score in response.scores), "Simple text search should use binary scoring"
+
+
+async def test_delete_collection(milvus_index, mock_milvus_client):
+    # Test collection deletion
+    mock_milvus_client.has_collection.return_value = True
+
+    await milvus_index.delete()
+
+    mock_milvus_client.drop_collection.assert_called_once_with(collection_name=milvus_index.collection_name)
+
+
+async def test_query_hybrid_search_rrf(
+    milvus_index, sample_chunks, sample_embeddings, embedding_dimension, mock_milvus_client
+):
+    """Test hybrid search with RRF reranker."""
+    mock_milvus_client.has_collection.return_value = True
+    await milvus_index.add_chunks(sample_chunks, sample_embeddings)
+
+    # Mock hybrid search results
+    mock_milvus_client.hybrid_search.return_value = [
+        [
+            {
+                "id": 0,
+                "distance": 0.1,
+                "entity": {"chunk_content": {"content": "mock chunk 1", "metadata": {"document_id": "doc1"}}},
+            },
+            {
+                "id": 1,
+                "distance": 0.2,
+                "entity": {"chunk_content": {"content": "mock chunk 2", "metadata": {"document_id": "doc2"}}},
+            },
+        ]
+    ]
+
+    # Test hybrid search with RRF reranker
+    query_embedding = np.random.rand(embedding_dimension).astype(np.float32)
+    query_string = "test query"
+    response = await milvus_index.query_hybrid(
+        embedding=query_embedding,
+        query_string=query_string,
+        k=2,
+        score_threshold=0.0,
+        reranker_type="rrf",
+        reranker_params={"impact_factor": 60.0},
+    )
+
+    assert isinstance(response, QueryChunksResponse)
+    assert len(response.chunks) == 2
+    assert len(response.scores) == 2
+
+    # Verify hybrid search was called with correct parameters
+    mock_milvus_client.hybrid_search.assert_called_once()
+    call_args = mock_milvus_client.hybrid_search.call_args
+
+    # Check that the request contains both vector and BM25 search requests
+    reqs = call_args[1]["reqs"]
+    assert len(reqs) == 2
+    assert reqs[0].anns_field == "vector"
+    assert reqs[1].anns_field == "sparse"
+    ranker = call_args[1]["ranker"]
+    assert ranker is not None
+
+
+async def test_query_hybrid_search_weighted(
+    milvus_index, sample_chunks, sample_embeddings, embedding_dimension, mock_milvus_client
+):
+    """Test hybrid search with weighted reranker."""
+    mock_milvus_client.has_collection.return_value = True
+    await milvus_index.add_chunks(sample_chunks, sample_embeddings)
+
+    # Mock hybrid search results
+    mock_milvus_client.hybrid_search.return_value = [
+        [
+            {
+                "id": 0,
+                "distance": 0.1,
+                "entity": {"chunk_content": {"content": "mock chunk 1", "metadata": {"document_id": "doc1"}}},
+            },
+            {
+                "id": 1,
+                "distance": 0.2,
+                "entity": {"chunk_content": {"content": "mock chunk 2", "metadata": {"document_id": "doc2"}}},
+            },
+        ]
+    ]
+
+    # Test hybrid search with weighted reranker
+    query_embedding = np.random.rand(embedding_dimension).astype(np.float32)
+    query_string = "test query"
+    response = await milvus_index.query_hybrid(
+        embedding=query_embedding,
+        query_string=query_string,
+        k=2,
+        score_threshold=0.0,
+        reranker_type="weighted",
+        reranker_params={"alpha": 0.7},
+    )
+
+    assert isinstance(response, QueryChunksResponse)
+    assert len(response.chunks) == 2
+    assert len(response.scores) == 2
+
+    # Verify hybrid search was called with correct parameters
+    mock_milvus_client.hybrid_search.assert_called_once()
+    call_args = mock_milvus_client.hybrid_search.call_args
+    ranker = call_args[1]["ranker"]
+    assert ranker is not None
+
+
+async def test_query_hybrid_search_default_rrf(
+    milvus_index, sample_chunks, sample_embeddings, embedding_dimension, mock_milvus_client
+):
+    """Test hybrid search with default RRF reranker (no reranker_type specified)."""
+    mock_milvus_client.has_collection.return_value = True
+    await milvus_index.add_chunks(sample_chunks, sample_embeddings)
+
+    # Mock hybrid search results
+    mock_milvus_client.hybrid_search.return_value = [
+        [
+            {
+                "id": 0,
+                "distance": 0.1,
+                "entity": {"chunk_content": {"content": "mock chunk 1", "metadata": {"document_id": "doc1"}}},
+            },
+        ]
+    ]
+
+    # Test hybrid search with default reranker (should be RRF)
+    query_embedding = np.random.rand(embedding_dimension).astype(np.float32)
+    query_string = "test query"
+    response = await milvus_index.query_hybrid(
+        embedding=query_embedding,
+        query_string=query_string,
+        k=1,
+        score_threshold=0.0,
+        reranker_type="unknown_type",  # Should default to RRF
+        reranker_params=None,  # Should use default impact_factor
+    )
+
+    assert isinstance(response, QueryChunksResponse)
+    assert len(response.chunks) == 1
+
+    # Verify hybrid search was called with RRF reranker
+    mock_milvus_client.hybrid_search.assert_called_once()
+    call_args = mock_milvus_client.hybrid_search.call_args
+    ranker = call_args[1]["ranker"]
+    assert ranker is not None
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@ -9,7 +9,6 @@ from unittest.mock import AsyncMock, MagicMock, patch

 import numpy as np
 import pytest
-import pytest_asyncio

 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import EmbeddingsResponse, Inference
@ -91,13 +90,13 @@ def faiss_config():
    return config


-@pytest_asyncio.fixture
+@pytest.fixture
 async def faiss_index(embedding_dimension):
    index = await FaissIndex.create(dimension=embedding_dimension)
    yield index


-@pytest_asyncio.fixture
+@pytest.fixture
 async def faiss_adapter(faiss_config, mock_inference_api, mock_files_api) -> FaissVectorIOAdapter:
    # Create the adapter
    adapter = FaissVectorIOAdapter(config=faiss_config, inference_api=mock_inference_api, files_api=mock_files_api)
@ -113,7 +112,6 @@ async def faiss_adapter(faiss_config, mock_inference_api, mock_files_api) -> Fai
        yield adapter


-@pytest.mark.asyncio
 async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_identical(
    faiss_index, sample_chunks, sample_embeddings, embedding_dimension
 ):
@ -136,7 +134,6 @@ async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_
        assert response.chunks[1] == sample_chunks[1]


-@pytest.mark.asyncio
 async def test_health_success():
    """Test that the health check returns OK status when faiss is working correctly."""
    # Create a fresh instance of FaissVectorIOAdapter for testing
@ -160,7 +157,6 @@ async def test_health_success():
        mock_index_flat.assert_called_once_with(128)  # VECTOR_DIMENSION is 128


-@pytest.mark.asyncio
 async def test_health_failure():
    """Test that the health check returns ERROR status when faiss encounters an error."""
    # Create a fresh instance of FaissVectorIOAdapter for testing
--- a/tests/unit/providers/vector_io/test_qdrant.py
+++ b/tests/unit/providers/vector_io/test_qdrant.py
@ -10,7 +10,6 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch

 import pytest
-import pytest_asyncio

 from llama_stack.apis.inference import EmbeddingsResponse, Inference
 from llama_stack.apis.vector_io import (
@ -24,6 +23,7 @@ from llama_stack.providers.inline.vector_io.qdrant.config import (
 from llama_stack.providers.remote.vector_io.qdrant.qdrant import (
    QdrantVectorIOAdapter,
 )
+from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig

 # This test is a unit test for the QdrantVectorIOAdapter class. This should only contain
 # tests which are specific to this class. More general (API-level) tests should be placed in
@ -37,7 +37,8 @@ from llama_stack.providers.remote.vector_io.qdrant.qdrant import (

@pytest.fixture
 def qdrant_config(tmp_path) -> InlineQdrantVectorIOConfig:
-    return InlineQdrantVectorIOConfig(path=os.path.join(tmp_path, "qdrant.db"))
+    kvstore_config = SqliteKVStoreConfig(db_name=os.path.join(tmp_path, "test_kvstore.db"))
+    return InlineQdrantVectorIOConfig(path=os.path.join(tmp_path, "qdrant.db"), kvstore=kvstore_config)


@pytest.fixture(scope="session")
@ -51,6 +52,9 @@ def mock_vector_db(vector_db_id) -> MagicMock:
    mock_vector_db.embedding_model = "embedding_model"
    mock_vector_db.identifier = vector_db_id
    mock_vector_db.embedding_dimension = 384
+    mock_vector_db.model_dump_json.return_value = (
+        '{"identifier": "' + vector_db_id + '", "embedding_model": "embedding_model", "embedding_dimension": 384}'
+    )
    return mock_vector_db


@ -68,9 +72,9 @@ def mock_api_service(sample_embeddings):
    return mock_api_service


-@pytest_asyncio.fixture
+@pytest.fixture
 async def qdrant_adapter(qdrant_config, mock_vector_db_store, mock_api_service, loop) -> QdrantVectorIOAdapter:
-    adapter = QdrantVectorIOAdapter(config=qdrant_config, inference_api=mock_api_service)
+    adapter = QdrantVectorIOAdapter(config=qdrant_config, inference_api=mock_api_service, files_api=None)
    adapter.vector_db_store = mock_vector_db_store
    await adapter.initialize()
    yield adapter
@ -80,7 +84,6 @@ async def qdrant_adapter(qdrant_config, mock_vector_db_store, mock_api_service,
 __QUERY = "Sample query"


-@pytest.mark.asyncio
@pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 60)])
 async def test_qdrant_adapter_returns_expected_chunks(
    qdrant_adapter: QdrantVectorIOAdapter,
@ -111,7 +114,6 @@ def _prepare_for_json(value: Any) -> str:


@patch("llama_stack.providers.utils.telemetry.trace_protocol._prepare_for_json", new=_prepare_for_json)
-@pytest.mark.asyncio
 async def test_qdrant_register_and_unregister_vector_db(
    qdrant_adapter: QdrantVectorIOAdapter,
    mock_vector_db,
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@ -8,7 +8,6 @@ import asyncio

 import numpy as np
 import pytest
-import pytest_asyncio

 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
@ -34,23 +33,21 @@ def loop():
    return asyncio.new_event_loop()


-@pytest_asyncio.fixture
+@pytest.fixture
 async def sqlite_vec_index(embedding_dimension, tmp_path_factory):
    temp_dir = tmp_path_factory.getbasetemp()
    db_path = str(temp_dir / "test_sqlite.db")
-    index = await SQLiteVecIndex.create(dimension=embedding_dimension, db_path=db_path, bank_id="test_bank")
+    index = await SQLiteVecIndex.create(dimension=embedding_dimension, db_path=db_path, bank_id="test_bank.123")
    yield index
    await index.delete()


-@pytest.mark.asyncio
 async def test_query_chunk_metadata(sqlite_vec_index, sample_chunks_with_metadata, sample_embeddings_with_metadata):
    await sqlite_vec_index.add_chunks(sample_chunks_with_metadata, sample_embeddings_with_metadata)
    response = await sqlite_vec_index.query_vector(sample_embeddings_with_metadata[-1], k=2, score_threshold=0.0)
    assert response.chunks[0].chunk_metadata == sample_chunks_with_metadata[-1].chunk_metadata


-@pytest.mark.asyncio
 async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sample_embeddings):
    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
    query_string = "Sentence 5"
@ -68,7 +65,6 @@ async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sa
    assert len(response_no_results.chunks) == 0, f"Expected 0 results, but got {len(response_no_results.chunks)}"


-@pytest.mark.asyncio
 async def test_query_chunks_hybrid(sqlite_vec_index, sample_chunks, sample_embeddings):
    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)

@ -90,7 +86,6 @@ async def test_query_chunks_hybrid(sqlite_vec_index, sample_chunks, sample_embed
    assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))


-@pytest.mark.asyncio
 async def test_query_chunks_full_text_search_k_greater_than_results(sqlite_vec_index, sample_chunks, sample_embeddings):
    # Re-initialize with a clean index
    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@ -103,7 +98,6 @@ async def test_query_chunks_full_text_search_k_greater_than_results(sqlite_vec_i
    assert any("Sentence 1 from document 0" in chunk.content for chunk in response.chunks), "Expected chunk not found"


-@pytest.mark.asyncio
 async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dimension):
    """Test that chunk IDs do not conflict across batches when inserting chunks."""
    # Reduce batch size to force multiple batches for same document
@ -116,7 +110,7 @@ async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dime
    cur = connection.cursor()

    # Retrieve all chunk IDs to check for duplicates
-    cur.execute(f"SELECT id FROM {sqlite_vec_index.metadata_table}")
+    cur.execute(f"SELECT id FROM [{sqlite_vec_index.metadata_table}]")
    chunk_ids = [row[0] for row in cur.fetchall()]
    cur.close()
    connection.close()
@ -134,7 +128,6 @@ async def sqlite_vec_adapter(sqlite_connection):
    await adapter.shutdown()


-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_no_keyword_matches(sqlite_vec_index, sample_chunks, sample_embeddings):
    """Test hybrid search when keyword search returns no matches - should still return vector results."""
    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@ -163,7 +156,6 @@ async def test_query_chunks_hybrid_no_keyword_matches(sqlite_vec_index, sample_c
    assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))


-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_score_threshold(sqlite_vec_index, sample_chunks, sample_embeddings):
    """Test hybrid search with a high score threshold."""
    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@ -185,7 +177,6 @@ async def test_query_chunks_hybrid_score_threshold(sqlite_vec_index, sample_chun
    assert len(response.chunks) == 0


-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_different_embedding(
    sqlite_vec_index, sample_chunks, sample_embeddings, embedding_dimension
 ):
@ -211,7 +202,6 @@ async def test_query_chunks_hybrid_different_embedding(
    assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))


-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_rrf_ranking(sqlite_vec_index, sample_chunks, sample_embeddings):
    """Test that RRF properly combines rankings when documents appear in both search methods."""
    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@ -236,7 +226,6 @@ async def test_query_chunks_hybrid_rrf_ranking(sqlite_vec_index, sample_chunks,
    assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))


-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_score_selection(sqlite_vec_index, sample_chunks, sample_embeddings):
    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)

@ -284,7 +273,6 @@ async def test_query_chunks_hybrid_score_selection(sqlite_vec_index, sample_chun
    assert response.scores[0] == pytest.approx(2.0 / 61.0, rel=1e-6)  # Should behave like RRF


-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_mixed_results(sqlite_vec_index, sample_chunks, sample_embeddings):
    """Test hybrid search with documents that appear in only one search method."""
    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@ -313,7 +301,6 @@ async def test_query_chunks_hybrid_mixed_results(sqlite_vec_index, sample_chunks
    assert "document-2" in doc_ids  # From keyword search


-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_weighted_reranker_parametrization(
    sqlite_vec_index, sample_chunks, sample_embeddings
 ):
@ -369,7 +356,6 @@ async def test_query_chunks_hybrid_weighted_reranker_parametrization(
    )


-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_rrf_impact_factor(sqlite_vec_index, sample_chunks, sample_embeddings):
    """Test RRFReRanker with different impact factors."""
    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@ -401,7 +387,6 @@ async def test_query_chunks_hybrid_rrf_impact_factor(sqlite_vec_index, sample_ch
    assert response.scores[0] == pytest.approx(2.0 / 101.0, rel=1e-6)


-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_edge_cases(sqlite_vec_index, sample_chunks, sample_embeddings):
    await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)

@ -445,7 +430,6 @@ async def test_query_chunks_hybrid_edge_cases(sqlite_vec_index, sample_chunks, s
    assert len(response.chunks) <= 100


-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_tie_breaking(
    sqlite_vec_index, sample_embeddings, embedding_dimension, tmp_path_factory
 ):
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@ -25,12 +25,10 @@ from llama_stack.providers.remote.vector_io.milvus.milvus import VECTOR_DBS_PREF
 # -v -s --tb=short --disable-warnings --asyncio-mode=auto


-@pytest.mark.asyncio
 async def test_initialize_index(vector_index):
    await vector_index.initialize()


-@pytest.mark.asyncio
 async def test_add_chunks_query_vector(vector_index, sample_chunks, sample_embeddings):
    vector_index.delete()
    vector_index.initialize()
@ -40,7 +38,6 @@ async def test_add_chunks_query_vector(vector_index, sample_chunks, sample_embed
    vector_index.delete()


-@pytest.mark.asyncio
 async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimension):
    embeddings = np.random.rand(len(sample_chunks), embedding_dimension).astype(np.float32)
    await vector_index.add_chunks(sample_chunks, embeddings)
@ -54,7 +51,6 @@ async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimensio
    assert len(contents) == len(set(contents))


-@pytest.mark.asyncio
 async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter):
    key = f"{VECTOR_DBS_PREFIX}db1"
    dummy = VectorDB(
@ -65,7 +61,6 @@ async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter):
    await vector_io_adapter.initialize()


-@pytest.mark.asyncio
 async def test_persistence_across_adapter_restarts(vector_io_adapter):
    await vector_io_adapter.initialize()
    dummy = VectorDB(
@ -79,7 +74,6 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter):
    await vector_io_adapter.shutdown()


-@pytest.mark.asyncio
 async def test_register_and_unregister_vector_db(vector_io_adapter):
    unique_id = f"foo_db_{np.random.randint(1e6)}"
    dummy = VectorDB(
@ -92,17 +86,19 @@ async def test_register_and_unregister_vector_db(vector_io_adapter):
    assert dummy.identifier not in vector_io_adapter.cache


-@pytest.mark.asyncio
-async def test_query_unregistered_raises(vector_io_adapter):
+async def test_query_unregistered_raises(vector_io_adapter, vector_provider):
    fake_emb = np.zeros(8, dtype=np.float32)
-    with pytest.raises(ValueError):
-        await vector_io_adapter.query_chunks("no_such_db", fake_emb)
+    if vector_provider == "chroma":
+        with pytest.raises(AttributeError):
+            await vector_io_adapter.query_chunks("no_such_db", fake_emb)
+    else:
+        with pytest.raises(ValueError):
+            await vector_io_adapter.query_chunks("no_such_db", fake_emb)


-@pytest.mark.asyncio
 async def test_insert_chunks_calls_underlying_index(vector_io_adapter):
    fake_index = AsyncMock()
-    vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=fake_index)
+    vector_io_adapter.cache["db1"] = fake_index

    chunks = ["chunk1", "chunk2"]
    await vector_io_adapter.insert_chunks("db1", chunks)
@ -110,7 +106,6 @@ async def test_insert_chunks_calls_underlying_index(vector_io_adapter):
    fake_index.insert_chunks.assert_awaited_once_with(chunks)


-@pytest.mark.asyncio
 async def test_insert_chunks_missing_db_raises(vector_io_adapter):
    vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None)

@ -118,11 +113,10 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter):
        await vector_io_adapter.insert_chunks("db_not_exist", [])


-@pytest.mark.asyncio
 async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter):
    expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1])
    fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected))
-    vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=fake_index)
+    vector_io_adapter.cache["db1"] = fake_index

    response = await vector_io_adapter.query_chunks("db1", "my_query", {"param": 1})

@ -130,7 +124,6 @@ async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter
    assert response is expected


-@pytest.mark.asyncio
 async def test_query_chunks_missing_db_raises(vector_io_adapter):
    vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None)

@ -138,7 +131,6 @@ async def test_query_chunks_missing_db_raises(vector_io_adapter):
        await vector_io_adapter.query_chunks("db_missing", "q", None)


-@pytest.mark.asyncio
 async def test_save_openai_vector_store(vector_io_adapter):
    store_id = "vs_1234"
    openai_vector_store = {
@ -155,7 +147,6 @@ async def test_save_openai_vector_store(vector_io_adapter):
    assert vector_io_adapter.openai_vector_stores[openai_vector_store["id"]] == openai_vector_store


-@pytest.mark.asyncio
 async def test_update_openai_vector_store(vector_io_adapter):
    store_id = "vs_1234"
    openai_vector_store = {
@ -172,7 +163,6 @@ async def test_update_openai_vector_store(vector_io_adapter):
    assert vector_io_adapter.openai_vector_stores[openai_vector_store["id"]] == openai_vector_store


-@pytest.mark.asyncio
 async def test_delete_openai_vector_store(vector_io_adapter):
    store_id = "vs_1234"
    openai_vector_store = {
@ -188,7 +178,6 @@ async def test_delete_openai_vector_store(vector_io_adapter):
    assert openai_vector_store["id"] not in vector_io_adapter.openai_vector_stores


-@pytest.mark.asyncio
 async def test_load_openai_vector_stores(vector_io_adapter):
    store_id = "vs_1234"
    openai_vector_store = {
@ -204,7 +193,6 @@ async def test_load_openai_vector_stores(vector_io_adapter):
    assert loaded_stores[store_id] == openai_vector_store


-@pytest.mark.asyncio
 async def test_save_openai_vector_store_file(vector_io_adapter, tmp_path_factory):
    store_id = "vs_1234"
    file_id = "file_1234"
@ -226,7 +214,6 @@ async def test_save_openai_vector_store_file(vector_io_adapter, tmp_path_factory
    await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)


-@pytest.mark.asyncio
 async def test_update_openai_vector_store_file(vector_io_adapter, tmp_path_factory):
    store_id = "vs_1234"
    file_id = "file_1234"
@ -260,7 +247,6 @@ async def test_update_openai_vector_store_file(vector_io_adapter, tmp_path_facto
    assert loaded_contents != file_info


-@pytest.mark.asyncio
 async def test_load_openai_vector_store_file_contents(vector_io_adapter, tmp_path_factory):
    store_id = "vs_1234"
    file_id = "file_1234"
@ -284,7 +270,6 @@ async def test_load_openai_vector_store_file_contents(vector_io_adapter, tmp_pat
    assert loaded_contents == file_contents


-@pytest.mark.asyncio
 async def test_delete_openai_vector_store_file_from_storage(vector_io_adapter, tmp_path_factory):
    store_id = "vs_1234"
    file_id = "file_1234"
@ -305,5 +290,7 @@ async def test_delete_openai_vector_store_file_from_storage(vector_io_adapter, t
    await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
    await vector_io_adapter._delete_openai_vector_store_file_from_storage(store_id, file_id)

+    loaded_file_info = await vector_io_adapter._load_openai_vector_store_file(store_id, file_id)
+    assert loaded_file_info == {}
    loaded_contents = await vector_io_adapter._load_openai_vector_store_file_contents(store_id, file_id)
    assert loaded_contents == []
--- a/tests/unit/providers/vector_io/test_vector_utils.py
+++ b/tests/unit/providers/vector_io/test_vector_utils.py
@ -5,7 +5,7 @@
 # the root directory of this source tree.

 from llama_stack.apis.vector_io import Chunk, ChunkMetadata
-from llama_stack.providers.utils.vector_io.chunk_utils import generate_chunk_id
+from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id

 # This test is a unit test for the chunk_utils.py helpers. This should only contain
 # tests which are specific to this file. More general (API-level) tests should be placed in