From 554c78ba6620b922a5eb2b4eae81ae7c7cdb471e Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Wed, 6 Aug 2025 14:51:10 +0900
Subject: [PATCH 1/9] add delete_chunk feature at chroma

---
 llama_stack/providers/remote/vector_io/chroma/chroma.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 26aeaedfb..442e64f5d 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -10,6 +10,7 @@ from typing import Any
 from urllib.parse import urlparse
 
 import chromadb
+from chromadb.api.models.AsyncCollection import AsyncCollection
 from numpy.typing import NDArray
 
 from llama_stack.apis.files import Files
@@ -116,7 +117,10 @@ class ChromaIndex(EmbeddingIndex):
         raise NotImplementedError("Keyword search is not supported in Chroma")
 
     async def delete_chunk(self, chunk_id: str) -> None:
-        raise NotImplementedError("delete_chunk is not supported in Chroma")
+        if isinstance(self.collection, AsyncCollection):
+            await self.collection.delete([chunk_id])
+        else:
+            self.collection.delete([chunk_id])
 
     async def query_hybrid(
         self,

From 26fb2088771e9f88a61347ee2af9ee26a2b42b64 Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Thu, 7 Aug 2025 10:09:29 +0900
Subject: [PATCH 2/9] add query_keyword function

---
 .../remote/vector_io/chroma/chroma.py         | 47 +++++++++++++++----
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 442e64f5d..954817837 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -10,7 +10,6 @@ from typing import Any
 from urllib.parse import urlparse
 
 import chromadb
-from chromadb.api.models.AsyncCollection import AsyncCollection
 from numpy.typing import NDArray
 
 from llama_stack.apis.files import Files
@@ -109,18 +108,46 @@ class ChromaIndex(EmbeddingIndex):
         await maybe_await(self.client.delete_collection(self.collection.name))
 
     async def query_keyword(
-        self,
-        query_string: str,
-        k: int,
-        score_threshold: float,
+            self,
+            query_string: str,
+            k: int,
+            score_threshold: float,
     ) -> QueryChunksResponse:
-        raise NotImplementedError("Keyword search is not supported in Chroma")
+        results = await maybe_await(
+            self.collection.query(
+                query_texts=[query_string],
+                where_document={"$contains": query_string},
+                n_results=k,
+                include=["documents", "distances"],
+            )
+        )
+
+        distances = results["distances"][0] if results["distances"] else []
+        documents = results["documents"][0] if results["documents"] else []
+
+        chunks = []
+        scores = []
+
+        for dist, doc in zip(distances, documents, strict=False):
+            try:
+                doc_data = json.loads(doc)
+                chunk = Chunk(**doc_data)
+            except Exception:
+                log.exception(f"Failed to parse document: {doc}")
+                continue
+
+            score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0
+
+            if score < score_threshold:
+                continue
+
+            chunks.append(chunk)
+            scores.append(score)
+
+        return QueryChunksResponse(chunks=chunks, scores=scores)
 
     async def delete_chunk(self, chunk_id: str) -> None:
-        if isinstance(self.collection, AsyncCollection):
-            await self.collection.delete([chunk_id])
-        else:
-            self.collection.delete([chunk_id])
+        await maybe_await(self.collection.delete([chunk_id]))
 
     async def query_hybrid(
         self,

From abd456232f6b0f730e05aa6074787828753beb79 Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Thu, 7 Aug 2025 10:19:44 +0900
Subject: [PATCH 3/9] apply pre-commit

---
 llama_stack/providers/remote/vector_io/chroma/chroma.py  | 8 ++++----
 tests/integration/vector_io/test_openai_vector_stores.py | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 954817837..75226a560 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -108,10 +108,10 @@ class ChromaIndex(EmbeddingIndex):
         await maybe_await(self.client.delete_collection(self.collection.name))
 
     async def query_keyword(
-            self,
-            query_string: str,
-            k: int,
-            score_threshold: float,
+        self,
+        query_string: str,
+        k: int,
+        score_threshold: float,
     ) -> QueryChunksResponse:
         results = await maybe_await(
             self.collection.query(
diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 1c9ef92b6..0a5409ad9 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -52,6 +52,7 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode
         ],
         "keyword": [
             "inline::sqlite-vec",
+            "remote::chromadb",
         ],
         "hybrid": [
             "inline::sqlite-vec",

From 5f2de49912169e2af3cb9d16a5d1e4f7f6a1ea8b Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Thu, 7 Aug 2025 10:39:32 +0900
Subject: [PATCH 4/9] add test code

---
 .../vector_io/test_openai_vector_stores.py    |   1 -
 .../providers/vector_io/remote/test_chroma.py | 124 ++++++++++++++++++
 2 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 tests/unit/providers/vector_io/remote/test_chroma.py

diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py
index 0a5409ad9..1c9ef92b6 100644
--- a/tests/integration/vector_io/test_openai_vector_stores.py
+++ b/tests/integration/vector_io/test_openai_vector_stores.py
@@ -52,7 +52,6 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode
         ],
         "keyword": [
             "inline::sqlite-vec",
-            "remote::chromadb",
         ],
         "hybrid": [
             "inline::sqlite-vec",
diff --git a/tests/unit/providers/vector_io/remote/test_chroma.py b/tests/unit/providers/vector_io/remote/test_chroma.py
new file mode 100644
index 000000000..ea9134f99
--- /dev/null
+++ b/tests/unit/providers/vector_io/remote/test_chroma.py
@@ -0,0 +1,124 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+from unittest.mock import MagicMock, patch
+
+import numpy as np
+import pytest
+
+from llama_stack.apis.vector_io import QueryChunksResponse
+
+# Mock the entire chromadb module
+chromadb_mock = MagicMock()
+chromadb_mock.AsyncHttpClient = MagicMock
+chromadb_mock.PersistentClient = MagicMock
+
+# Apply the mock before importing ChromaIndex
+with patch.dict("sys.modules", {"chromadb": chromadb_mock}):
+    from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex
+
+# This test is a unit test for the ChromaVectorIOAdapter class. This should only contain
+# tests which are specific to this class. More general (API-level) tests should be placed in
+# tests/integration/vector_io/
+#
+# How to run this test:
+#
+# pytest tests/unit/providers/vector_io/test_chroma.py \
+# -v -s --tb=short --disable-warnings --asyncio-mode=auto
+
+CHROMA_PROVIDER = "chromadb"
+
+
+@pytest.fixture
+async def mock_chroma_collection() -> MagicMock:
+    """Create a mock Chroma collection with common method behaviors."""
+    collection = MagicMock()
+    collection.name = "test_collection"
+
+    # Mock add operation
+    collection.add.return_value = None
+
+    # Mock query operation for vector search
+    collection.query.return_value = {
+        "distances": [[0.1, 0.2]],
+        "documents": [
+            [
+                json.dumps({"content": "mock chunk 1", "metadata": {"document_id": "doc1"}}),
+                json.dumps({"content": "mock chunk 2", "metadata": {"document_id": "doc2"}}),
+            ]
+        ],
+    }
+
+    # Mock delete operation
+    collection.delete.return_value = None
+
+    return collection
+
+
+@pytest.fixture
+async def mock_chroma_client(mock_chroma_collection):
+    """Create a mock Chroma client with common method behaviors."""
+    client = MagicMock()
+
+    # Mock collection operations
+    client.get_or_create_collection.return_value = mock_chroma_collection
+    client.get_collection.return_value = mock_chroma_collection
+    client.delete_collection.return_value = None
+
+    return client
+
+
+@pytest.fixture
+async def chroma_index(mock_chroma_client, mock_chroma_collection):
+    """Create a ChromaIndex with mocked client and collection."""
+    index = ChromaIndex(client=mock_chroma_client, collection=mock_chroma_collection)
+    yield index
+    # No real cleanup needed since we're using mocks
+
+
+async def test_add_chunks(chroma_index, sample_chunks, sample_embeddings, mock_chroma_collection):
+    await chroma_index.add_chunks(sample_chunks, sample_embeddings)
+
+    # Verify data was inserted
+    mock_chroma_collection.add.assert_called_once()
+
+    # Verify the add call had the right number of chunks
+    add_call = mock_chroma_collection.add.call_args
+    assert len(add_call[1]["documents"]) == len(sample_chunks)
+
+
+async def test_query_chunks_vector(
+    chroma_index, sample_chunks, sample_embeddings, embedding_dimension, mock_chroma_collection
+):
+    # Setup: Add chunks first
+    await chroma_index.add_chunks(sample_chunks, sample_embeddings)
+
+    # Test vector search
+    query_embedding = np.random.rand(embedding_dimension).astype(np.float32)
+    response = await chroma_index.query_vector(query_embedding, k=2, score_threshold=0.0)
+
+    assert isinstance(response, QueryChunksResponse)
+    assert len(response.chunks) == 2
+    mock_chroma_collection.query.assert_called_once()
+
+
+async def test_query_chunks_keyword_search(chroma_index, sample_chunks, sample_embeddings, mock_chroma_collection):
+    await chroma_index.add_chunks(sample_chunks, sample_embeddings)
+
+    # Test keyword search
+    query_string = "Sentence 5"
+    response = await chroma_index.query_keyword(query_string=query_string, k=2, score_threshold=0.0)
+
+    assert isinstance(response, QueryChunksResponse)
+    assert len(response.chunks) == 2
+
+
+async def test_delete_collection(chroma_index, mock_chroma_client):
+    # Test collection deletion
+    await chroma_index.delete()
+
+    mock_chroma_client.delete_collection.assert_called_once_with(chroma_index.collection.name)

From db0ce0d7e275ef65f61fa1a25a95072cfa61f73c Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Mon, 18 Aug 2025 15:59:53 +0800
Subject: [PATCH 5/9] apply Reranker class at chromaDB

---
 docs/source/providers/agents/index.md         |  12 +-
 docs/source/providers/batches/index.md        |   8 +-
 docs/source/providers/inference/index.md      |   6 +-
 .../remote/vector_io/chroma/chroma.py         |  52 +++++++-
 .../providers/utils/vector_io/vector_utils.py | 111 ++++++++++++++++++
 5 files changed, 175 insertions(+), 14 deletions(-)

diff --git a/docs/source/providers/agents/index.md b/docs/source/providers/agents/index.md
index a2c48d4b9..046db6bff 100644
--- a/docs/source/providers/agents/index.md
+++ b/docs/source/providers/agents/index.md
@@ -4,12 +4,12 @@
 
 Agents API for creating and interacting with agentic systems.
 
-    Main functionalities provided by this API:
-    - Create agents with specific instructions and ability to use tools.
-    - Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
-    - Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
-    - Agents can be provided with various shields (see the Safety API for more details).
-    - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
+Main functionalities provided by this API:
+- Create agents with specific instructions and ability to use tools.
+- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
+- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
+- Agents can be provided with various shields (see the Safety API for more details).
+- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
 
 This section contains documentation for all available providers for the **agents** API.
 
diff --git a/docs/source/providers/batches/index.md b/docs/source/providers/batches/index.md
index 2a39a626c..f427a599b 100644
--- a/docs/source/providers/batches/index.md
+++ b/docs/source/providers/batches/index.md
@@ -4,11 +4,11 @@
 
 Protocol for batch processing API operations.
 
-    The Batches API enables efficient processing of multiple requests in a single operation,
-    particularly useful for processing large datasets, batch evaluation workflows, and
-    cost-effective inference at scale.
+The Batches API enables efficient processing of multiple requests in a single operation,
+particularly useful for processing large datasets, batch evaluation workflows, and
+cost-effective inference at scale.
 
-    Note: This API is currently under active development and may undergo changes.
+Note: This API is currently under active development and may undergo changes.
 
 This section contains documentation for all available providers for the **batches** API.
 
diff --git a/docs/source/providers/inference/index.md b/docs/source/providers/inference/index.md
index b6d215474..291e8e525 100644
--- a/docs/source/providers/inference/index.md
+++ b/docs/source/providers/inference/index.md
@@ -4,9 +4,9 @@
 
 Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
-    - LLM models: these models generate "raw" and "chat" (conversational) completions.
-    - Embedding models: these models generate embeddings to be used for semantic search.
+This API provides the raw interface to the underlying models. Two kinds of models are supported:
+- LLM models: these models generate "raw" and "chat" (conversational) completions.
+- Embedding models: these models generate embeddings to be used for semantic search.
 
 This section contains documentation for all available providers for the **inference** API.
 
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 9c7a7732a..98332b37e 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 import asyncio
+import heapq
 import json
 import logging
 from typing import Any
@@ -30,6 +31,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     EmbeddingIndex,
     VectorDBWithIndex,
 )
+from llama_stack.providers.utils.vector_io.vector_utils import Reranker
 
 from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
 
@@ -161,7 +163,55 @@ class ChromaIndex(EmbeddingIndex):
         reranker_type: str,
         reranker_params: dict[str, Any] | None = None,
     ) -> QueryChunksResponse:
-        raise NotImplementedError("Hybrid search is not supported in Chroma")
+        """
+        Hybrid search combining vector similarity and keyword search using configurable reranking.
+        Args:
+            embedding: The query embedding vector
+            query_string: The text query for keyword search
+            k: Number of results to return
+            score_threshold: Minimum similarity score threshold
+            reranker_type: Type of reranker to use ("rrf" or "weighted")
+            reranker_params: Parameters for the reranker
+        Returns:
+            QueryChunksResponse with combined results
+        """
+        if reranker_params is None:
+            reranker_params = {}
+
+        # Get results from both search methods
+        vector_response = await self.query_vector(embedding, k, score_threshold)
+        keyword_response = await self.query_keyword(query_string, k, score_threshold)
+
+        # Convert responses to score dictionaries using chunk_id
+        vector_scores = {
+            chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False)
+        }
+        keyword_scores = {
+            chunk.chunk_id: score
+            for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False)
+        }
+
+        # Combine scores using the reranking utility
+        combined_scores = Reranker.combine_search_results(vector_scores, keyword_scores, reranker_type, reranker_params)
+
+        # Efficient top-k selection because it only tracks the k best candidates it's seen so far
+        top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1])
+
+        # Filter by score threshold
+        filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold]
+
+        # Create a map of chunk_id to chunk for both responses
+        chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks}
+
+        # Use the map to look up chunks by their IDs
+        chunks = []
+        scores = []
+        for doc_id, score in filtered_items:
+            if doc_id in chunk_map:
+                chunks.append(chunk_map[doc_id])
+                scores.append(score)
+
+        return QueryChunksResponse(chunks=chunks, scores=scores)
 
 
 class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate):
diff --git a/llama_stack/providers/utils/vector_io/vector_utils.py b/llama_stack/providers/utils/vector_io/vector_utils.py
index f2888043e..e6dbcb2b5 100644
--- a/llama_stack/providers/utils/vector_io/vector_utils.py
+++ b/llama_stack/providers/utils/vector_io/vector_utils.py
@@ -37,3 +37,114 @@ def sanitize_collection_name(name: str, weaviate_format=False) -> str:
     else:
         s = proper_case(re.sub(r"[^a-zA-Z0-9]", "", name))
     return s
+
+
+class Reranker:
+    @staticmethod
+    def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
+        """
+        Normalize scores to 0-1 range using min-max normalization.
+        Args:
+            scores: dictionary of scores with document IDs as keys and scores as values
+        Returns:
+            Normalized scores with document IDs as keys and normalized scores as values
+        """
+        if not scores:
+            return {}
+        min_score, max_score = min(scores.values()), max(scores.values())
+        score_range = max_score - min_score
+        if score_range > 0:
+            return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()}
+        return dict.fromkeys(scores, 1.0)
+
+    @staticmethod
+    def weighted_rerank(
+        vector_scores: dict[str, float],
+        keyword_scores: dict[str, float],
+        alpha: float = 0.5,
+    ) -> dict[str, float]:
+        """
+        Rerank via weighted average of scores.
+        Args:
+            vector_scores: scores from vector search
+            keyword_scores: scores from keyword search
+            alpha: weight factor between 0 and 1 (default: 0.5)
+                   0 = keyword only, 1 = vector only, 0.5 = equal weight
+        Returns:
+            All unique document IDs with weighted combined scores
+        """
+        all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
+        normalized_vector_scores = Reranker._normalize_scores(vector_scores)
+        normalized_keyword_scores = Reranker._normalize_scores(keyword_scores)
+
+        # Weighted formula: score = (1-alpha) * keyword_score + alpha * vector_score
+        # alpha=0 means keyword only, alpha=1 means vector only
+        return {
+            doc_id: ((1 - alpha) * normalized_keyword_scores.get(doc_id, 0.0))
+            + (alpha * normalized_vector_scores.get(doc_id, 0.0))
+            for doc_id in all_ids
+        }
+
+    @staticmethod
+    def rrf_rerank(
+        vector_scores: dict[str, float],
+        keyword_scores: dict[str, float],
+        impact_factor: float = 60.0,
+    ) -> dict[str, float]:
+        """
+        Rerank via Reciprocal Rank Fusion.
+        Args:
+            vector_scores: scores from vector search
+            keyword_scores: scores from keyword search
+            impact_factor: impact factor for RRF (default: 60.0)
+        Returns:
+            All unique document IDs with RRF combined scores
+        """
+
+        # Convert scores to ranks
+        vector_ranks = {
+            doc_id: i + 1
+            for i, (doc_id, _) in enumerate(sorted(vector_scores.items(), key=lambda x: x[1], reverse=True))
+        }
+        keyword_ranks = {
+            doc_id: i + 1
+            for i, (doc_id, _) in enumerate(sorted(keyword_scores.items(), key=lambda x: x[1], reverse=True))
+        }
+
+        all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
+        rrf_scores = {}
+        for doc_id in all_ids:
+            vector_rank = vector_ranks.get(doc_id, float("inf"))
+            keyword_rank = keyword_ranks.get(doc_id, float("inf"))
+
+            # RRF formula: score = 1/(k + r) where k is impact_factor (default: 60.0) and r is the rank
+            rrf_scores[doc_id] = (1.0 / (impact_factor + vector_rank)) + (1.0 / (impact_factor + keyword_rank))
+        return rrf_scores
+
+    @staticmethod
+    def combine_search_results(
+        vector_scores: dict[str, float],
+        keyword_scores: dict[str, float],
+        reranker_type: str = "rrf",
+        reranker_params: dict[str, float] | None = None,
+    ) -> dict[str, float]:
+        """
+        Combine vector and keyword search results using specified reranking strategy.
+        Args:
+            vector_scores: scores from vector search
+            keyword_scores: scores from keyword search
+            reranker_type: type of reranker to use (default: RERANKER_TYPE_RRF)
+            reranker_params: parameters for the reranker
+        Returns:
+            All unique document IDs with combined scores
+        """
+        if reranker_params is None:
+            reranker_params = {}
+
+        if reranker_type == "weighted":
+            alpha = reranker_params.get("alpha", 0.5)
+            return Reranker.weighted_rerank(vector_scores, keyword_scores, alpha)
+        else:
+            # Default to RRF for None, RRF, or any unknown types
+            impact_factor = reranker_params.get("impact_factor", 60.0)
+            return Reranker.rrf_rerank(vector_scores, keyword_scores, impact_factor)

From 897be1376eb8bde8bff0e290d1502e76715fcecd Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Thu, 11 Sep 2025 21:40:21 +0900
Subject: [PATCH 6/9] change Reranker to WeightedInMemoryAggregator

---
 docs/source/providers/batches/index.md        |  12 +-
 .../remote/vector_io/chroma/chroma.py         |   6 +-
 .../providers/utils/vector_io/vector_utils.py |   1 -
 pyproject.toml                                |  17 +--
 .../providers/vector_io/remote/test_chroma.py | 124 ------------------
 uv.lock                                       |   4 +-
 6 files changed, 22 insertions(+), 142 deletions(-)
 delete mode 100644 tests/unit/providers/vector_io/remote/test_chroma.py

diff --git a/docs/source/providers/batches/index.md b/docs/source/providers/batches/index.md
index d6d2fa9a3..20fa19212 100644
--- a/docs/source/providers/batches/index.md
+++ b/docs/source/providers/batches/index.md
@@ -3,15 +3,15 @@
 ## Overview
 
 The Batches API enables efficient processing of multiple requests in a single operation,
-    particularly useful for processing large datasets, batch evaluation workflows, and
-    cost-effective inference at scale.
+particularly useful for processing large datasets, batch evaluation workflows, and
+cost-effective inference at scale.
 
-    The API is designed to allow use of openai client libraries for seamless integration.
+The API is designed to allow use of openai client libraries for seamless integration.
 
-    This API provides the following extensions:
-     - idempotent batch creation
+This API provides the following extensions:
+ - idempotent batch creation
 
-    Note: This API is currently under active development and may undergo changes.
+Note: This API is currently under active development and may undergo changes.
 
 This section contains documentation for all available providers for the **batches** API.
 
diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 426d62473..5aaf91ee7 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -31,7 +31,7 @@ from llama_stack.providers.utils.memory.vector_store import (
     EmbeddingIndex,
     VectorDBWithIndex,
 )
-from llama_stack.providers.utils.vector_io.vector_utils import Reranker
+from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator
 
 from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig
 
@@ -192,7 +192,9 @@ class ChromaIndex(EmbeddingIndex):
         }
 
         # Combine scores using the reranking utility
-        combined_scores = Reranker.combine_search_results(vector_scores, keyword_scores, reranker_type, reranker_params)
+        combined_scores = WeightedInMemoryAggregator.combine_search_results(
+            vector_scores, keyword_scores, reranker_type, reranker_params
+        )
 
         # Efficient top-k selection because it only tracks the k best candidates it's seen so far
         top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1])
diff --git a/llama_stack/providers/utils/vector_io/vector_utils.py b/llama_stack/providers/utils/vector_io/vector_utils.py
index 61ebad18f..b0992f3c1 100644
--- a/llama_stack/providers/utils/vector_io/vector_utils.py
+++ b/llama_stack/providers/utils/vector_io/vector_utils.py
@@ -39,7 +39,6 @@ def sanitize_collection_name(name: str, weaviate_format=False) -> str:
     return s
 
 
-
 class WeightedInMemoryAggregator:
     @staticmethod
     def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
diff --git a/pyproject.toml b/pyproject.toml
index 72c4f6f9e..5fb3d2c7e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,14 +25,14 @@ classifiers = [
 ]
 dependencies = [
     "aiohttp",
-    "fastapi>=0.115.0,<1.0",                  # server
-    "fire",                                   # for MCP in LLS client
+    "fastapi>=0.115.0,<1.0", # server
+    "fire", # for MCP in LLS client
     "httpx",
     "huggingface-hub>=0.34.0,<1.0",
     "jinja2>=3.1.6",
     "jsonschema",
     "llama-stack-client>=0.2.21",
-    "openai>=1.100.0",                                # for expires_after support
+    "openai>=1.100.0", # for expires_after support
     "prompt-toolkit",
     "python-dotenv",
     "python-jose[cryptography]",
@@ -43,12 +43,13 @@ dependencies = [
     "tiktoken",
     "pillow",
     "h11>=0.16.0",
-    "python-multipart>=0.0.20",               # For fastapi Form
-    "uvicorn>=0.34.0",                        # server
-    "opentelemetry-sdk>=1.30.0",              # server
+    "python-multipart>=0.0.20", # For fastapi Form
+    "uvicorn>=0.34.0", # server
+    "opentelemetry-sdk>=1.30.0", # server
     "opentelemetry-exporter-otlp-proto-http>=1.30.0", # server
-    "aiosqlite>=0.21.0",                      # server - for metadata store
-    "asyncpg",                                # for metadata store
+    "aiosqlite>=0.21.0", # server - for metadata store
+    "asyncpg", # for metadata store
+    "pre-commit>=4.2.0",
 ]
 
 [project.optional-dependencies]
diff --git a/tests/unit/providers/vector_io/remote/test_chroma.py b/tests/unit/providers/vector_io/remote/test_chroma.py
deleted file mode 100644
index ea9134f99..000000000
--- a/tests/unit/providers/vector_io/remote/test_chroma.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import json
-from unittest.mock import MagicMock, patch
-
-import numpy as np
-import pytest
-
-from llama_stack.apis.vector_io import QueryChunksResponse
-
-# Mock the entire chromadb module
-chromadb_mock = MagicMock()
-chromadb_mock.AsyncHttpClient = MagicMock
-chromadb_mock.PersistentClient = MagicMock
-
-# Apply the mock before importing ChromaIndex
-with patch.dict("sys.modules", {"chromadb": chromadb_mock}):
-    from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex
-
-# This test is a unit test for the ChromaVectorIOAdapter class. This should only contain
-# tests which are specific to this class. More general (API-level) tests should be placed in
-# tests/integration/vector_io/
-#
-# How to run this test:
-#
-# pytest tests/unit/providers/vector_io/test_chroma.py \
-# -v -s --tb=short --disable-warnings --asyncio-mode=auto
-
-CHROMA_PROVIDER = "chromadb"
-
-
-@pytest.fixture
-async def mock_chroma_collection() -> MagicMock:
-    """Create a mock Chroma collection with common method behaviors."""
-    collection = MagicMock()
-    collection.name = "test_collection"
-
-    # Mock add operation
-    collection.add.return_value = None
-
-    # Mock query operation for vector search
-    collection.query.return_value = {
-        "distances": [[0.1, 0.2]],
-        "documents": [
-            [
-                json.dumps({"content": "mock chunk 1", "metadata": {"document_id": "doc1"}}),
-                json.dumps({"content": "mock chunk 2", "metadata": {"document_id": "doc2"}}),
-            ]
-        ],
-    }
-
-    # Mock delete operation
-    collection.delete.return_value = None
-
-    return collection
-
-
-@pytest.fixture
-async def mock_chroma_client(mock_chroma_collection):
-    """Create a mock Chroma client with common method behaviors."""
-    client = MagicMock()
-
-    # Mock collection operations
-    client.get_or_create_collection.return_value = mock_chroma_collection
-    client.get_collection.return_value = mock_chroma_collection
-    client.delete_collection.return_value = None
-
-    return client
-
-
-@pytest.fixture
-async def chroma_index(mock_chroma_client, mock_chroma_collection):
-    """Create a ChromaIndex with mocked client and collection."""
-    index = ChromaIndex(client=mock_chroma_client, collection=mock_chroma_collection)
-    yield index
-    # No real cleanup needed since we're using mocks
-
-
-async def test_add_chunks(chroma_index, sample_chunks, sample_embeddings, mock_chroma_collection):
-    await chroma_index.add_chunks(sample_chunks, sample_embeddings)
-
-    # Verify data was inserted
-    mock_chroma_collection.add.assert_called_once()
-
-    # Verify the add call had the right number of chunks
-    add_call = mock_chroma_collection.add.call_args
-    assert len(add_call[1]["documents"]) == len(sample_chunks)
-
-
-async def test_query_chunks_vector(
-    chroma_index, sample_chunks, sample_embeddings, embedding_dimension, mock_chroma_collection
-):
-    # Setup: Add chunks first
-    await chroma_index.add_chunks(sample_chunks, sample_embeddings)
-
-    # Test vector search
-    query_embedding = np.random.rand(embedding_dimension).astype(np.float32)
-    response = await chroma_index.query_vector(query_embedding, k=2, score_threshold=0.0)
-
-    assert isinstance(response, QueryChunksResponse)
-    assert len(response.chunks) == 2
-    mock_chroma_collection.query.assert_called_once()
-
-
-async def test_query_chunks_keyword_search(chroma_index, sample_chunks, sample_embeddings, mock_chroma_collection):
-    await chroma_index.add_chunks(sample_chunks, sample_embeddings)
-
-    # Test keyword search
-    query_string = "Sentence 5"
-    response = await chroma_index.query_keyword(query_string=query_string, k=2, score_threshold=0.0)
-
-    assert isinstance(response, QueryChunksResponse)
-    assert len(response.chunks) == 2
-
-
-async def test_delete_collection(chroma_index, mock_chroma_client):
-    # Test collection deletion
-    await chroma_index.delete()
-
-    mock_chroma_client.delete_collection.assert_called_once_with(chroma_index.collection.name)
diff --git a/uv.lock b/uv.lock
index 065eb3876..2ca805065 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.12"
 resolution-markers = [
     "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@@ -1767,6 +1767,7 @@ dependencies = [
     { name = "opentelemetry-exporter-otlp-proto-http" },
     { name = "opentelemetry-sdk" },
     { name = "pillow" },
+    { name = "pre-commit" },
     { name = "prompt-toolkit" },
     { name = "pydantic" },
     { name = "python-dotenv" },
@@ -1892,6 +1893,7 @@ requires-dist = [
     { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
     { name = "pandas", marker = "extra == 'ui'" },
     { name = "pillow" },
+    { name = "pre-commit", specifier = ">=4.2.0" },
     { name = "prompt-toolkit" },
     { name = "pydantic", specifier = ">=2" },
     { name = "python-dotenv" },

From bfc8a3b99df51120036aa44bf1f5bfa6541cd0f3 Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Thu, 11 Sep 2025 23:09:31 +0900
Subject: [PATCH 7/9] change exception log parse to chunk

---
 llama_stack/providers/remote/vector_io/chroma/chroma.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py
index 5aaf91ee7..2f4f94b53 100644
--- a/llama_stack/providers/remote/vector_io/chroma/chroma.py
+++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py
@@ -136,7 +136,7 @@ class ChromaIndex(EmbeddingIndex):
                 doc_data = json.loads(doc)
                 chunk = Chunk(**doc_data)
             except Exception:
-                log.exception(f"Failed to parse document: {doc}")
+                log.exception(f"Failed to load chunk: {doc}")
                 continue
 
             score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0

From f3bd532461ebcbb112becf80bbc5bab3bf43b549 Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Thu, 11 Sep 2025 23:11:24 +0900
Subject: [PATCH 8/9] delete blank line in vector_utils.py

---
 llama_stack/providers/utils/vector_io/vector_utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llama_stack/providers/utils/vector_io/vector_utils.py b/llama_stack/providers/utils/vector_io/vector_utils.py
index b0992f3c1..e55ac75ae 100644
--- a/llama_stack/providers/utils/vector_io/vector_utils.py
+++ b/llama_stack/providers/utils/vector_io/vector_utils.py
@@ -78,7 +78,6 @@ class WeightedInMemoryAggregator:
             All unique document IDs with weighted combined scores
         """
         all_ids = set(vector_scores.keys()) | set(keyword_scores.keys())
-
         normalized_vector_scores = WeightedInMemoryAggregator._normalize_scores(vector_scores)
         normalized_keyword_scores = WeightedInMemoryAggregator._normalize_scores(keyword_scores)
 
@@ -152,7 +151,6 @@ class WeightedInMemoryAggregator:
 
         if reranker_type == "weighted":
             alpha = reranker_params.get("alpha", 0.5)
-
             return WeightedInMemoryAggregator.weighted_rerank(vector_scores, keyword_scores, alpha)
         else:
             # Default to RRF for None, RRF, or any unknown types

From 571f998c78997887b0850503d37bb6c8746dcbad Mon Sep 17 00:00:00 2001
From: kimbwook <bwook00@naver.com>
Date: Thu, 11 Sep 2025 23:13:14 +0900
Subject: [PATCH 9/9] delete pre-commit in pyproject.toml

---
 pyproject.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 5fb3d2c7e..b4dd3ece9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,7 +49,6 @@ dependencies = [
     "opentelemetry-exporter-otlp-proto-http>=1.30.0", # server
     "aiosqlite>=0.21.0", # server - for metadata store
     "asyncpg", # for metadata store
-    "pre-commit>=4.2.0",
 ]
 
 [project.optional-dependencies]