chore: Enabling teste for Weaviate

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> chore: Actually enabling Chroma unit tests Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> fixed tests Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> fix integration test Signed-off-by: Francisco Javier Arceo <farceo@redhat.com> remove changes from weavbiate Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-07-27 06:28:50 +00:00 · 2025-07-23 21:20:16 -04:00 · 2025-07-23 21:20:16 -04:00 · 2defebc835
commit 2defebc835
parent cd8715d327
3 changed files with 56 additions and 28 deletions
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@ -57,12 +57,16 @@ class ChromaIndex(EmbeddingIndex):
        self.collection = collection
        self.kvstore = kvstore

+    async def initialize(self):
+        # Chroma does not require explicit initialization, this is just a helper for unit tests
+        pass
+
    async def add_chunks(self, chunks: list[Chunk], embeddings: NDArray):
        assert len(chunks) == len(embeddings), (
            f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}"
        )

-        ids = [f"{c.metadata['document_id']}:chunk-{i}" for i, c in enumerate(chunks)]
+        ids = [f"{c.metadata.get('document_id', '')}:{c.chunk_id}" for c in chunks]
        await maybe_await(
            self.collection.add(
                documents=[chunk.model_dump_json() for chunk in chunks],
@ -137,9 +141,12 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
        self.client = None
        self.cache = {}
        self.kvstore: KVStore | None = None
+        self.vector_db_store = None

    async def initialize(self) -> None:
        self.kvstore = await kvstore_impl(self.config.kvstore)
+        self.vector_db_store = self.kvstore
+
        if isinstance(self.config, RemoteChromaVectorIOConfig):
            log.info(f"Connecting to Chroma server at: {self.config.url}")
            url = self.config.url.rstrip("/")
@ -172,6 +179,10 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
        )

    async def unregister_vector_db(self, vector_db_id: str) -> None:
+        if vector_db_id not in self.cache:
+            log.warning(f"Vector DB {vector_db_id} not found")
+            return
+
        await self.cache[vector_db_id].index.delete()
        del self.cache[vector_db_id]

@ -182,6 +193,8 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
        ttl_seconds: int | None = None,
    ) -> None:
        index = await self._get_and_cache_vector_db_index(vector_db_id)
+        if index is None:
+            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")

        await index.insert_chunks(chunks)

@ -193,18 +206,27 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolP
    ) -> QueryChunksResponse:
        index = await self._get_and_cache_vector_db_index(vector_db_id)

+        if index is None:
+            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
+
        return await index.query_chunks(query, params)

    async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> VectorDBWithIndex:
        if vector_db_id in self.cache:
            return self.cache[vector_db_id]

-        vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
-        if not vector_db:
-            raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack")
-        collection = await maybe_await(self.client.get_collection(vector_db_id))
-        if not collection:
-            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
-        index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api)
-        self.cache[vector_db_id] = index
-        return index
+        try:
+            collection = await maybe_await(self.client.get_collection(vector_db_id))
+            if not collection:
+                raise ValueError(f"Vector DB {vector_db_id} not found in Chroma")
+
+            vector_db = await self.vector_db_store.get_vector_db(vector_db_id)
+            if not vector_db:
+                raise ValueError(f"Vector DB {vector_db_id} not found in Llama Stack")
+
+            index = VectorDBWithIndex(vector_db, ChromaIndex(self.client, collection), self.inference_api)
+            self.cache[vector_db_id] = index
+            return index
+
+        except Exception as exc:
+            raise ValueError(f"Vector DB {vector_db_id} not found in Chroma") from exc