From 2defebc83593b7f7c5c1d79785c53cbb76e14a19 Mon Sep 17 00:00:00 2001
From: Francisco Javier Arceo <farceo@redhat.com>
Date: Wed, 23 Jul 2025 21:20:16 -0400
Subject: [PATCH 1/2] chore: Enabling teste for Weaviate

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>

chore: Actually enabling Chroma unit tests

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>

fixed tests

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>

fix integration test

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>

remove changes from weavbiate

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 .../remote/vector_io/chroma/chroma.py         | 42 ++++++++++++++-----
 .../vector_io/test_openai_vector_stores.py    | 16 +++----
 tests/unit/providers/vector_io/conftest.py    | 26 +++++++-----
 3 files changed, 56 insertions(+), 28 deletions(-)

diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index bd968d96d..656762373 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -57,12 +57,16 @@ class ChromaIndex(EmbeddingIndex):
         self.collection = collection
         self.kvstore = kvstore
 
+    async def initialize(self):
+        # Chroma does not require explicit initialization, this is just a helper for unit tests
+        pass
+
     async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
         assert len(chunks) == len(embeddings), (
             f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
         )
 
-        ids = [f"{c.metadata['document_id']}:chunk-{i}" for i, c in enumerate(chunks)]
+        ids = [f"{c.metadata.get('document_id', '')}:{c.chunk_id}" for c in chunks]
         await maybe_await(
             self.collection.add(
                 documents=[chunk.model_dump_json() for chunk in chunks],
@@ -137,9 +141,12 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         self.client = None
         self.cache = {}
         self.kvstore: KVStore | None = None
+        self.vector_db_store = None
 
     async def initialize(self) -> None:
         self.kvstore = await kvstore_impl(self.config.kvstore)
+        self.vector_db_store = self.kvstore
+
         if isinstance(self.config, RemoteChromaVectorIOConfig):
             log.info(f"Connecting to Chroma server at: {self.config.url}")
             url = self.config.url.rstrip("/")
@@ -172,6 +179,10 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         )
 
     async def unregister_vector_db(self, vector_db_id: str) -> None:
+        if vector_db_id not in self.cache:
+            log.warning(f"Vector DB {vector_db_id} not found")
+            return
+
         await self.cache[vector_db_id].index.delete()
         del self.cache[vector_db_id]
 
@@ -182,6 +193,8 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         ttl_seconds: int | None = None,
     ) -> None:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
+        if index is None:
+            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
 
         await index.insert_chunks(chunks)
 
@@ -193,18 +206,27 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
     ) -> QueryChunksResponse:
         index = await self._get_and_cache_vector_db_index(vector_db_id)
 
+        if index is None:
+            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
+
         return await index.query_chunks(query, params)
 
     async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex:
         if vector_db_id in self.cache:
             return self.cache[vector_db_id]
 
-        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack")
-        collection = await maybe_await(self.client.get_collection(vector_db_id))
-        if not collection:
-            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
-        index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api)
-        self.cache[vector_db_id] = index
-        return index
+        try:
+            collection = await maybe_await(self.client.get_collection(vector_db_id))
+            if not collection:
+                raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
+
+            vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
+            if not vector_db:
+                raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack")
+
+            index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api)
+            self.cache[vector_db_id] = index
+            return index
+
+        except Exception as exc:
+            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") from exc
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 71d2bc55e..dcb62a931 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -22,7 +22,14 @@ logger = logging.getLogger(__name__)
 def skip_if_provider_doesnt_support_openai_vector_stores(client_with_models):
     vector_io_providers = [p for p in client_with_models.providers.list() if p.api == "vector_io"]
     for p in vector_io_providers:
-        if p.provider_type in ["inline::faiss", "inline::sqlite-vec", "inline::milvus", "inline::chromadb"]:
+        if p.provider_type in [
+            "inline::faiss",
+            "inline::sqlite-vec",
+            "inline::milvus",
+            "inline::chromadb",
+            "remote::pgvector",
+            "remote::chromadb",
+        ]:
             return
 
     pytest.skip("OpenAI vector stores are not supported by any provider")
@@ -31,12 +38,7 @@ def skip_if_provider_doesnt_support_openai_vector_stores(client_with_models):
 def skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models):
     vector_io_providers = [p for p in client_with_models.providers.list() if p.api == "vector_io"]
     for p in vector_io_providers:
-        if p.provider_type in [
-            "inline::faiss",
-            "inline::sqlite-vec",
-            "inline::milvus",
-            "remote::pgvector",
-        ]:
+        if p.provider_type in []:
             return
 
     pytest.skip("OpenAI vector stores are not supported by any provider")
diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py
index 45e37d6ff..bcba06140 100644
--- a/tests/unit/providers/vector_io/conftest.py
+++ b/tests/unit/providers/vector_io/conftest.py
@@ -8,6 +8,7 @@ import random
 
 import numpy as np
 import pytest
+from chromadb import PersistentClient
 from pymilvus import MilvusClient, connections
 
 from llama_stack.apis.vector_dbs import VectorDB
@@ -18,7 +19,7 @@ from llama_stack.providers.inline.vector_io.faiss.faiss import FaissIndex, Faiss
 from llama_stack.providers.inline.vector_io.milvus.config import MilvusVectorIOConfig, SqliteKVStoreConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec import SQLiteVectorIOConfig
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import SQLiteVecIndex, SQLiteVecVectorIOAdapter
-from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex, ChromaVectorIOAdapter
+from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex, ChromaVectorIOAdapter, maybe_await
 from llama_stack.providers.remote.vector_io.milvus.milvus import MilvusIndex, MilvusVectorIOAdapter
 
 EMBEDDING_DIMENSION = 384
@@ -26,6 +27,11 @@ COLLECTION_PREFIX = "test_collection"
 MILVUS_ALIAS = "test_milvus"
 
 
+@pytest.fixture(params=["milvus", "sqlite_vec", "faiss", "chroma"])
+def vector_provider(request):
+    return request.param
+
+
 @pytest.fixture
 def vector_db_id() -> str:
     return f"test-vector-db-{random.randint(1, 100)}"
@@ -94,11 +100,6 @@ def sample_embeddings_with_metadata(sample_chunks_with_metadata):
     return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks_with_metadata])
 
 
-@pytest.fixture(params=["milvus", "sqlite_vec", "faiss"])
-def vector_provider(request):
-    return request.param
-
-
 @pytest.fixture(scope="session")
 def mock_inference_api(embedding_dimension):
     class MockInferenceAPI:
@@ -246,10 +247,10 @@ def chroma_vec_db_path(tmp_path_factory):
 
 @pytest.fixture
 async def chroma_vec_index(chroma_vec_db_path, embedding_dimension):
-    index = ChromaIndex(
-        embedding_dimension=embedding_dimension,
-        persist_directory=chroma_vec_db_path,
-    )
+    client = PersistentClient(path=chroma_vec_db_path)
+    name = f"{COLLECTION_PREFIX}_{np.random.randint(1e6)}"
+    collection = await maybe_await(client.get_or_create_collection(name))
+    index = ChromaIndex(client=client, collection=collection)
     await index.initialize()
     yield index
     await index.delete()
@@ -257,7 +258,10 @@ async def chroma_vec_index(chroma_vec_db_path, embedding_dimension):
 
 @pytest.fixture
 async def chroma_vec_adapter(chroma_vec_db_path, mock_inference_api, embedding_dimension):
-    config = ChromaVectorIOConfig(persist_directory=chroma_vec_db_path)
+    config = ChromaVectorIOConfig(
+        db_path=chroma_vec_db_path,
+        kvstore=SqliteKVStoreConfig(),
+    )
     adapter = ChromaVectorIOAdapter(
         config=config,
         inference_api=mock_inference_api,

From c7ffe98588a81b984a878d4b1e8bef393c1f2975 Mon Sep 17 00:00:00 2001
From: Francisco Javier Arceo <farceo@redhat.com>
Date: Thu, 24 Jul 2025 21:29:15 -0400
Subject: [PATCH 2/2] removed
 skip_if_provider_doesnt_support_openai_vector_store_files_api, updated chroma
 _get_and_cache_vector_db_index, and updated test_query_unregistered_raises

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
---
 .../remote/vector_io/chroma/chroma.py         | 25 +++++++------------
 .../vector_io/test_openai_vector_stores.py    | 17 -------------
 .../test_vector_io_openai_vector_stores.py    | 10 +++++---
 3 files changed, 16 insertions(+), 36 deletions(-)

diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 656762373..c16661b67 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -58,7 +58,6 @@ class ChromaIndex(EmbeddingIndex):
         self.kvstore = kvstore
 
     async def initialize(self):
-        # Chroma does not require explicit initialization, this is just a helper for unit tests
         pass
 
     async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
@@ -215,18 +214,12 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
         if vector_db_id in self.cache:
             return self.cache[vector_db_id]
 
-        try:
-            collection = await maybe_await(self.client.get_collection(vector_db_id))
-            if not collection:
-                raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
-
-            vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
-            if not vector_db:
-                raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack")
-
-            index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api)
-            self.cache[vector_db_id] = index
-            return index
-
-        except Exception as exc:
-            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") from exc
+        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
+        if not vector_db:
+            raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack")
+        collection = await maybe_await(self.client.get_collection(vector_db_id))
+        if not collection:
+            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
+        index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api)
+        self.cache[vector_db_id] = index
+        return index
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index dcb62a931..9771ab290 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -35,15 +35,6 @@ def skip_if_provider_doesnt_support_openai_vector_stores(client_with_models):
     pytest.skip("OpenAI vector stores are not supported by any provider")
 
 
-def skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models):
-    vector_io_providers = [p for p in client_with_models.providers.list() if p.api == "vector_io"]
-    for p in vector_io_providers:
-        if p.provider_type in []:
-            return
-
-    pytest.skip("OpenAI vector stores are not supported by any provider")
-
-
 @pytest.fixture
 def openai_client(client_with_models):
     base_url = f"{client_with_models.base_url}/v1/openai/v1"
@@ -459,7 +450,6 @@ def test_openai_vector_store_search_with_max_num_results(
 def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client_with_models):
     """Test OpenAI vector store attach file."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
 
     if isinstance(compat_client_with_empty_stores, LlamaStackClient):
         pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
@@ -511,7 +501,6 @@ def test_openai_vector_store_attach_file(compat_client_with_empty_stores, client
 def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_stores, client_with_models):
     """Test OpenAI vector store attach files on creation."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
 
     if isinstance(compat_client_with_empty_stores, LlamaStackClient):
         pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
@@ -568,7 +557,6 @@ def test_openai_vector_store_attach_files_on_creation(compat_client_with_empty_s
 def test_openai_vector_store_list_files(compat_client_with_empty_stores, client_with_models):
     """Test OpenAI vector store list files."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
 
     if isinstance(compat_client_with_empty_stores, LlamaStackClient):
         pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
@@ -642,7 +630,6 @@ def test_openai_vector_store_list_files_invalid_vector_store(compat_client_with_
 def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_stores, client_with_models):
     """Test OpenAI vector store retrieve file contents."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
 
     if isinstance(compat_client_with_empty_stores, LlamaStackClient):
         pytest.skip("Vector Store Files retrieve contents is not yet supported with LlamaStackClient")
@@ -684,7 +671,6 @@ def test_openai_vector_store_retrieve_file_contents(compat_client_with_empty_sto
 def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client_with_models):
     """Test OpenAI vector store delete file."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
 
     if isinstance(compat_client_with_empty_stores, LlamaStackClient):
         pytest.skip("Vector Store Files list is not yet supported with LlamaStackClient")
@@ -742,7 +728,6 @@ def test_openai_vector_store_delete_file(compat_client_with_empty_stores, client
 def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client_with_empty_stores, client_with_models):
     """Test OpenAI vector store delete file removes from vector store."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
 
     if isinstance(compat_client_with_empty_stores, LlamaStackClient):
         pytest.skip("Vector Store Files attach is not yet supported with LlamaStackClient")
@@ -784,7 +769,6 @@ def test_openai_vector_store_delete_file_removes_from_vector_store(compat_client
 def test_openai_vector_store_update_file(compat_client_with_empty_stores, client_with_models):
     """Test OpenAI vector store update file."""
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
 
     if isinstance(compat_client_with_empty_stores, LlamaStackClient):
         pytest.skip("Vector Store Files update is not yet supported with LlamaStackClient")
@@ -833,7 +817,6 @@ def test_create_vector_store_files_duplicate_vector_store_name(compat_client_wit
     This test confirms that client.vector_stores.create() creates a unique ID
     """
     skip_if_provider_doesnt_support_openai_vector_stores(client_with_models)
-    skip_if_provider_doesnt_support_openai_vector_store_files_api(client_with_models)
 
     if isinstance(compat_client_with_empty_stores, LlamaStackClient):
         pytest.skip("Vector Store Files create is not yet supported with LlamaStackClient")
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index bf7663d2e..98889f38e 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -86,10 +86,14 @@ async def test_register_and_unregister_vector_db(vector_io_adapter):
     assert dummy.identifier not in vector_io_adapter.cache
 
 
-async def test_query_unregistered_raises(vector_io_adapter):
+async def test_query_unregistered_raises(vector_io_adapter, vector_provider):
     fake_emb = np.zeros(8, dtype=np.float32)
-    with pytest.raises(ValueError):
-        await vector_io_adapter.query_chunks("no_such_db", fake_emb)
+    if vector_provider == "chroma":
+        with pytest.raises(AttributeError):
+            await vector_io_adapter.query_chunks("no_such_db", fake_emb)
+    else:
+        with pytest.raises(ValueError):
+            await vector_io_adapter.query_chunks("no_such_db", fake_emb)
 
 
 async def test_insert_chunks_calls_underlying_index(vector_io_adapter):