From 554c78ba6620b922a5eb2b4eae81ae7c7cdb471e Mon Sep 17 00:00:00 2001 From: kimbwook Date: Wed, 6 Aug 2025 14:51:10 +0900 Subject: [PATCH 01/16] add delete_chunk feature at chroma --- llama_stack/providers/remote/vector_io/chroma/chroma.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 26aeaedfb..442e64f5d 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -10,6 +10,7 @@ from typing import Any from urllib.parse import urlparse import chromadb +from chromadb.api.models.AsyncCollection import AsyncCollection from numpy.typing import NDArray from llama_stack.apis.files import Files @@ -116,7 +117,10 @@ class ChromaIndex(EmbeddingIndex): raise NotImplementedError("Keyword search is not supported in Chroma") async def delete_chunk(self, chunk_id: str) -> None: - raise NotImplementedError("delete_chunk is not supported in Chroma") + if isinstance(self.collection, AsyncCollection): + await self.collection.delete([chunk_id]) + else: + self.collection.delete([chunk_id]) async def query_hybrid( self, From 26fb2088771e9f88a61347ee2af9ee26a2b42b64 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Thu, 7 Aug 2025 10:09:29 +0900 Subject: [PATCH 02/16] add query_keyword function --- .../remote/vector_io/chroma/chroma.py | 47 +++++++++++++++---- 1 file changed, 37 insertions(+), 10 deletions(-) diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 442e64f5d..954817837 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -10,7 +10,6 @@ from typing import Any from urllib.parse import urlparse import chromadb -from chromadb.api.models.AsyncCollection import AsyncCollection from numpy.typing import NDArray from llama_stack.apis.files import Files @@ -109,18 +108,46 @@ class ChromaIndex(EmbeddingIndex): await maybe_await(self.client.delete_collection(self.collection.name)) async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, + self, + query_string: str, + k: int, + score_threshold: float, ) -> QueryChunksResponse: - raise NotImplementedError("Keyword search is not supported in Chroma") + results = await maybe_await( + self.collection.query( + query_texts=[query_string], + where_document={"$contains": query_string}, + n_results=k, + include=["documents", "distances"], + ) + ) + + distances = results["distances"][0] if results["distances"] else [] + documents = results["documents"][0] if results["documents"] else [] + + chunks = [] + scores = [] + + for dist, doc in zip(distances, documents, strict=False): + try: + doc_data = json.loads(doc) + chunk = Chunk(**doc_data) + except Exception: + log.exception(f"Failed to parse document: {doc}") + continue + + score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0 + + if score < score_threshold: + continue + + chunks.append(chunk) + scores.append(score) + + return QueryChunksResponse(chunks=chunks, scores=scores) async def delete_chunk(self, chunk_id: str) -> None: - if isinstance(self.collection, AsyncCollection): - await self.collection.delete([chunk_id]) - else: - self.collection.delete([chunk_id]) + await maybe_await(self.collection.delete([chunk_id])) async def query_hybrid( self, From abd456232f6b0f730e05aa6074787828753beb79 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Thu, 7 Aug 2025 10:19:44 +0900 Subject: [PATCH 03/16] apply pre-commit --- llama_stack/providers/remote/vector_io/chroma/chroma.py | 8 ++++---- tests/integration/vector_io/test_openai_vector_stores.py | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 954817837..75226a560 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -108,10 +108,10 @@ class ChromaIndex(EmbeddingIndex): await maybe_await(self.client.delete_collection(self.collection.name)) async def query_keyword( - self, - query_string: str, - k: int, - score_threshold: float, + self, + query_string: str, + k: int, + score_threshold: float, ) -> QueryChunksResponse: results = await maybe_await( self.collection.query( diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index 1c9ef92b6..0a5409ad9 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -52,6 +52,7 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode ], "keyword": [ "inline::sqlite-vec", + "remote::chromadb", ], "hybrid": [ "inline::sqlite-vec", From 5f2de49912169e2af3cb9d16a5d1e4f7f6a1ea8b Mon Sep 17 00:00:00 2001 From: kimbwook Date: Thu, 7 Aug 2025 10:39:32 +0900 Subject: [PATCH 04/16] add test code --- .../vector_io/test_openai_vector_stores.py | 1 - .../providers/vector_io/remote/test_chroma.py | 124 ++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) create mode 100644 tests/unit/providers/vector_io/remote/test_chroma.py diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index 0a5409ad9..1c9ef92b6 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -52,7 +52,6 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode ], "keyword": [ "inline::sqlite-vec", - "remote::chromadb", ], "hybrid": [ "inline::sqlite-vec", diff --git a/tests/unit/providers/vector_io/remote/test_chroma.py b/tests/unit/providers/vector_io/remote/test_chroma.py new file mode 100644 index 000000000..ea9134f99 --- /dev/null +++ b/tests/unit/providers/vector_io/remote/test_chroma.py @@ -0,0 +1,124 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +from unittest.mock import MagicMock, patch + +import numpy as np +import pytest + +from llama_stack.apis.vector_io import QueryChunksResponse + +# Mock the entire chromadb module +chromadb_mock = MagicMock() +chromadb_mock.AsyncHttpClient = MagicMock +chromadb_mock.PersistentClient = MagicMock + +# Apply the mock before importing ChromaIndex +with patch.dict("sys.modules", {"chromadb": chromadb_mock}): + from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex + +# This test is a unit test for the ChromaVectorIOAdapter class. This should only contain +# tests which are specific to this class. More general (API-level) tests should be placed in +# tests/integration/vector_io/ +# +# How to run this test: +# +# pytest tests/unit/providers/vector_io/test_chroma.py \ +# -v -s --tb=short --disable-warnings --asyncio-mode=auto + +CHROMA_PROVIDER = "chromadb" + + +@pytest.fixture +async def mock_chroma_collection() -> MagicMock: + """Create a mock Chroma collection with common method behaviors.""" + collection = MagicMock() + collection.name = "test_collection" + + # Mock add operation + collection.add.return_value = None + + # Mock query operation for vector search + collection.query.return_value = { + "distances": [[0.1, 0.2]], + "documents": [ + [ + json.dumps({"content": "mock chunk 1", "metadata": {"document_id": "doc1"}}), + json.dumps({"content": "mock chunk 2", "metadata": {"document_id": "doc2"}}), + ] + ], + } + + # Mock delete operation + collection.delete.return_value = None + + return collection + + +@pytest.fixture +async def mock_chroma_client(mock_chroma_collection): + """Create a mock Chroma client with common method behaviors.""" + client = MagicMock() + + # Mock collection operations + client.get_or_create_collection.return_value = mock_chroma_collection + client.get_collection.return_value = mock_chroma_collection + client.delete_collection.return_value = None + + return client + + +@pytest.fixture +async def chroma_index(mock_chroma_client, mock_chroma_collection): + """Create a ChromaIndex with mocked client and collection.""" + index = ChromaIndex(client=mock_chroma_client, collection=mock_chroma_collection) + yield index + # No real cleanup needed since we're using mocks + + +async def test_add_chunks(chroma_index, sample_chunks, sample_embeddings, mock_chroma_collection): + await chroma_index.add_chunks(sample_chunks, sample_embeddings) + + # Verify data was inserted + mock_chroma_collection.add.assert_called_once() + + # Verify the add call had the right number of chunks + add_call = mock_chroma_collection.add.call_args + assert len(add_call[1]["documents"]) == len(sample_chunks) + + +async def test_query_chunks_vector( + chroma_index, sample_chunks, sample_embeddings, embedding_dimension, mock_chroma_collection +): + # Setup: Add chunks first + await chroma_index.add_chunks(sample_chunks, sample_embeddings) + + # Test vector search + query_embedding = np.random.rand(embedding_dimension).astype(np.float32) + response = await chroma_index.query_vector(query_embedding, k=2, score_threshold=0.0) + + assert isinstance(response, QueryChunksResponse) + assert len(response.chunks) == 2 + mock_chroma_collection.query.assert_called_once() + + +async def test_query_chunks_keyword_search(chroma_index, sample_chunks, sample_embeddings, mock_chroma_collection): + await chroma_index.add_chunks(sample_chunks, sample_embeddings) + + # Test keyword search + query_string = "Sentence 5" + response = await chroma_index.query_keyword(query_string=query_string, k=2, score_threshold=0.0) + + assert isinstance(response, QueryChunksResponse) + assert len(response.chunks) == 2 + + +async def test_delete_collection(chroma_index, mock_chroma_client): + # Test collection deletion + await chroma_index.delete() + + mock_chroma_client.delete_collection.assert_called_once_with(chroma_index.collection.name) From db0ce0d7e275ef65f61fa1a25a95072cfa61f73c Mon Sep 17 00:00:00 2001 From: kimbwook Date: Mon, 18 Aug 2025 15:59:53 +0800 Subject: [PATCH 05/16] apply Reranker class at chromaDB --- docs/source/providers/agents/index.md | 12 +- docs/source/providers/batches/index.md | 8 +- docs/source/providers/inference/index.md | 6 +- .../remote/vector_io/chroma/chroma.py | 52 +++++++- .../providers/utils/vector_io/vector_utils.py | 111 ++++++++++++++++++ 5 files changed, 175 insertions(+), 14 deletions(-) diff --git a/docs/source/providers/agents/index.md b/docs/source/providers/agents/index.md index a2c48d4b9..046db6bff 100644 --- a/docs/source/providers/agents/index.md +++ b/docs/source/providers/agents/index.md @@ -4,12 +4,12 @@ Agents API for creating and interacting with agentic systems. - Main functionalities provided by this API: - - Create agents with specific instructions and ability to use tools. - - Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn". - - Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details). - - Agents can be provided with various shields (see the Safety API for more details). - - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details. +Main functionalities provided by this API: +- Create agents with specific instructions and ability to use tools. +- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn". +- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details). +- Agents can be provided with various shields (see the Safety API for more details). +- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details. This section contains documentation for all available providers for the **agents** API. diff --git a/docs/source/providers/batches/index.md b/docs/source/providers/batches/index.md index 2a39a626c..f427a599b 100644 --- a/docs/source/providers/batches/index.md +++ b/docs/source/providers/batches/index.md @@ -4,11 +4,11 @@ Protocol for batch processing API operations. - The Batches API enables efficient processing of multiple requests in a single operation, - particularly useful for processing large datasets, batch evaluation workflows, and - cost-effective inference at scale. +The Batches API enables efficient processing of multiple requests in a single operation, +particularly useful for processing large datasets, batch evaluation workflows, and +cost-effective inference at scale. - Note: This API is currently under active development and may undergo changes. +Note: This API is currently under active development and may undergo changes. This section contains documentation for all available providers for the **batches** API. diff --git a/docs/source/providers/inference/index.md b/docs/source/providers/inference/index.md index b6d215474..291e8e525 100644 --- a/docs/source/providers/inference/index.md +++ b/docs/source/providers/inference/index.md @@ -4,9 +4,9 @@ Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Two kinds of models are supported: - - LLM models: these models generate "raw" and "chat" (conversational) completions. - - Embedding models: these models generate embeddings to be used for semantic search. +This API provides the raw interface to the underlying models. Two kinds of models are supported: +- LLM models: these models generate "raw" and "chat" (conversational) completions. +- Embedding models: these models generate embeddings to be used for semantic search. This section contains documentation for all available providers for the **inference** API. diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 9c7a7732a..98332b37e 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. import asyncio +import heapq import json import logging from typing import Any @@ -30,6 +31,7 @@ from llama_stack.providers.utils.memory.vector_store import ( EmbeddingIndex, VectorDBWithIndex, ) +from llama_stack.providers.utils.vector_io.vector_utils import Reranker from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig @@ -161,7 +163,55 @@ class ChromaIndex(EmbeddingIndex): reranker_type: str, reranker_params: dict[str, Any] | None = None, ) -> QueryChunksResponse: - raise NotImplementedError("Hybrid search is not supported in Chroma") + """ + Hybrid search combining vector similarity and keyword search using configurable reranking. + Args: + embedding: The query embedding vector + query_string: The text query for keyword search + k: Number of results to return + score_threshold: Minimum similarity score threshold + reranker_type: Type of reranker to use ("rrf" or "weighted") + reranker_params: Parameters for the reranker + Returns: + QueryChunksResponse with combined results + """ + if reranker_params is None: + reranker_params = {} + + # Get results from both search methods + vector_response = await self.query_vector(embedding, k, score_threshold) + keyword_response = await self.query_keyword(query_string, k, score_threshold) + + # Convert responses to score dictionaries using chunk_id + vector_scores = { + chunk.chunk_id: score for chunk, score in zip(vector_response.chunks, vector_response.scores, strict=False) + } + keyword_scores = { + chunk.chunk_id: score + for chunk, score in zip(keyword_response.chunks, keyword_response.scores, strict=False) + } + + # Combine scores using the reranking utility + combined_scores = Reranker.combine_search_results(vector_scores, keyword_scores, reranker_type, reranker_params) + + # Efficient top-k selection because it only tracks the k best candidates it's seen so far + top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1]) + + # Filter by score threshold + filtered_items = [(doc_id, score) for doc_id, score in top_k_items if score >= score_threshold] + + # Create a map of chunk_id to chunk for both responses + chunk_map = {c.chunk_id: c for c in vector_response.chunks + keyword_response.chunks} + + # Use the map to look up chunks by their IDs + chunks = [] + scores = [] + for doc_id, score in filtered_items: + if doc_id in chunk_map: + chunks.append(chunk_map[doc_id]) + scores.append(score) + + return QueryChunksResponse(chunks=chunks, scores=scores) class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorDBsProtocolPrivate): diff --git a/llama_stack/providers/utils/vector_io/vector_utils.py b/llama_stack/providers/utils/vector_io/vector_utils.py index f2888043e..e6dbcb2b5 100644 --- a/llama_stack/providers/utils/vector_io/vector_utils.py +++ b/llama_stack/providers/utils/vector_io/vector_utils.py @@ -37,3 +37,114 @@ def sanitize_collection_name(name: str, weaviate_format=False) -> str: else: s = proper_case(re.sub(r"[^a-zA-Z0-9]", "", name)) return s + + +class Reranker: + @staticmethod + def _normalize_scores(scores: dict[str, float]) -> dict[str, float]: + """ + Normalize scores to 0-1 range using min-max normalization. + Args: + scores: dictionary of scores with document IDs as keys and scores as values + Returns: + Normalized scores with document IDs as keys and normalized scores as values + """ + if not scores: + return {} + min_score, max_score = min(scores.values()), max(scores.values()) + score_range = max_score - min_score + if score_range > 0: + return {doc_id: (score - min_score) / score_range for doc_id, score in scores.items()} + return dict.fromkeys(scores, 1.0) + + @staticmethod + def weighted_rerank( + vector_scores: dict[str, float], + keyword_scores: dict[str, float], + alpha: float = 0.5, + ) -> dict[str, float]: + """ + Rerank via weighted average of scores. + Args: + vector_scores: scores from vector search + keyword_scores: scores from keyword search + alpha: weight factor between 0 and 1 (default: 0.5) + 0 = keyword only, 1 = vector only, 0.5 = equal weight + Returns: + All unique document IDs with weighted combined scores + """ + all_ids = set(vector_scores.keys()) | set(keyword_scores.keys()) + normalized_vector_scores = Reranker._normalize_scores(vector_scores) + normalized_keyword_scores = Reranker._normalize_scores(keyword_scores) + + # Weighted formula: score = (1-alpha) * keyword_score + alpha * vector_score + # alpha=0 means keyword only, alpha=1 means vector only + return { + doc_id: ((1 - alpha) * normalized_keyword_scores.get(doc_id, 0.0)) + + (alpha * normalized_vector_scores.get(doc_id, 0.0)) + for doc_id in all_ids + } + + @staticmethod + def rrf_rerank( + vector_scores: dict[str, float], + keyword_scores: dict[str, float], + impact_factor: float = 60.0, + ) -> dict[str, float]: + """ + Rerank via Reciprocal Rank Fusion. + Args: + vector_scores: scores from vector search + keyword_scores: scores from keyword search + impact_factor: impact factor for RRF (default: 60.0) + Returns: + All unique document IDs with RRF combined scores + """ + + # Convert scores to ranks + vector_ranks = { + doc_id: i + 1 + for i, (doc_id, _) in enumerate(sorted(vector_scores.items(), key=lambda x: x[1], reverse=True)) + } + keyword_ranks = { + doc_id: i + 1 + for i, (doc_id, _) in enumerate(sorted(keyword_scores.items(), key=lambda x: x[1], reverse=True)) + } + + all_ids = set(vector_scores.keys()) | set(keyword_scores.keys()) + rrf_scores = {} + for doc_id in all_ids: + vector_rank = vector_ranks.get(doc_id, float("inf")) + keyword_rank = keyword_ranks.get(doc_id, float("inf")) + + # RRF formula: score = 1/(k + r) where k is impact_factor (default: 60.0) and r is the rank + rrf_scores[doc_id] = (1.0 / (impact_factor + vector_rank)) + (1.0 / (impact_factor + keyword_rank)) + return rrf_scores + + @staticmethod + def combine_search_results( + vector_scores: dict[str, float], + keyword_scores: dict[str, float], + reranker_type: str = "rrf", + reranker_params: dict[str, float] | None = None, + ) -> dict[str, float]: + """ + Combine vector and keyword search results using specified reranking strategy. + Args: + vector_scores: scores from vector search + keyword_scores: scores from keyword search + reranker_type: type of reranker to use (default: RERANKER_TYPE_RRF) + reranker_params: parameters for the reranker + Returns: + All unique document IDs with combined scores + """ + if reranker_params is None: + reranker_params = {} + + if reranker_type == "weighted": + alpha = reranker_params.get("alpha", 0.5) + return Reranker.weighted_rerank(vector_scores, keyword_scores, alpha) + else: + # Default to RRF for None, RRF, or any unknown types + impact_factor = reranker_params.get("impact_factor", 60.0) + return Reranker.rrf_rerank(vector_scores, keyword_scores, impact_factor) From 897be1376eb8bde8bff0e290d1502e76715fcecd Mon Sep 17 00:00:00 2001 From: kimbwook Date: Thu, 11 Sep 2025 21:40:21 +0900 Subject: [PATCH 06/16] change Reranker to WeightedInMemoryAggregator --- docs/source/providers/batches/index.md | 12 +- .../remote/vector_io/chroma/chroma.py | 6 +- .../providers/utils/vector_io/vector_utils.py | 1 - pyproject.toml | 17 +-- .../providers/vector_io/remote/test_chroma.py | 124 ------------------ uv.lock | 4 +- 6 files changed, 22 insertions(+), 142 deletions(-) delete mode 100644 tests/unit/providers/vector_io/remote/test_chroma.py diff --git a/docs/source/providers/batches/index.md b/docs/source/providers/batches/index.md index d6d2fa9a3..20fa19212 100644 --- a/docs/source/providers/batches/index.md +++ b/docs/source/providers/batches/index.md @@ -3,15 +3,15 @@ ## Overview The Batches API enables efficient processing of multiple requests in a single operation, - particularly useful for processing large datasets, batch evaluation workflows, and - cost-effective inference at scale. +particularly useful for processing large datasets, batch evaluation workflows, and +cost-effective inference at scale. - The API is designed to allow use of openai client libraries for seamless integration. +The API is designed to allow use of openai client libraries for seamless integration. - This API provides the following extensions: - - idempotent batch creation +This API provides the following extensions: + - idempotent batch creation - Note: This API is currently under active development and may undergo changes. +Note: This API is currently under active development and may undergo changes. This section contains documentation for all available providers for the **batches** API. diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 426d62473..5aaf91ee7 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -31,7 +31,7 @@ from llama_stack.providers.utils.memory.vector_store import ( EmbeddingIndex, VectorDBWithIndex, ) -from llama_stack.providers.utils.vector_io.vector_utils import Reranker +from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig @@ -192,7 +192,9 @@ class ChromaIndex(EmbeddingIndex): } # Combine scores using the reranking utility - combined_scores = Reranker.combine_search_results(vector_scores, keyword_scores, reranker_type, reranker_params) + combined_scores = WeightedInMemoryAggregator.combine_search_results( + vector_scores, keyword_scores, reranker_type, reranker_params + ) # Efficient top-k selection because it only tracks the k best candidates it's seen so far top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1]) diff --git a/llama_stack/providers/utils/vector_io/vector_utils.py b/llama_stack/providers/utils/vector_io/vector_utils.py index 61ebad18f..b0992f3c1 100644 --- a/llama_stack/providers/utils/vector_io/vector_utils.py +++ b/llama_stack/providers/utils/vector_io/vector_utils.py @@ -39,7 +39,6 @@ def sanitize_collection_name(name: str, weaviate_format=False) -> str: return s - class WeightedInMemoryAggregator: @staticmethod def _normalize_scores(scores: dict[str, float]) -> dict[str, float]: diff --git a/pyproject.toml b/pyproject.toml index 72c4f6f9e..5fb3d2c7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,14 +25,14 @@ classifiers = [ ] dependencies = [ "aiohttp", - "fastapi>=0.115.0,<1.0", # server - "fire", # for MCP in LLS client + "fastapi>=0.115.0,<1.0", # server + "fire", # for MCP in LLS client "httpx", "huggingface-hub>=0.34.0,<1.0", "jinja2>=3.1.6", "jsonschema", "llama-stack-client>=0.2.21", - "openai>=1.100.0", # for expires_after support + "openai>=1.100.0", # for expires_after support "prompt-toolkit", "python-dotenv", "python-jose[cryptography]", @@ -43,12 +43,13 @@ dependencies = [ "tiktoken", "pillow", "h11>=0.16.0", - "python-multipart>=0.0.20", # For fastapi Form - "uvicorn>=0.34.0", # server - "opentelemetry-sdk>=1.30.0", # server + "python-multipart>=0.0.20", # For fastapi Form + "uvicorn>=0.34.0", # server + "opentelemetry-sdk>=1.30.0", # server "opentelemetry-exporter-otlp-proto-http>=1.30.0", # server - "aiosqlite>=0.21.0", # server - for metadata store - "asyncpg", # for metadata store + "aiosqlite>=0.21.0", # server - for metadata store + "asyncpg", # for metadata store + "pre-commit>=4.2.0", ] [project.optional-dependencies] diff --git a/tests/unit/providers/vector_io/remote/test_chroma.py b/tests/unit/providers/vector_io/remote/test_chroma.py deleted file mode 100644 index ea9134f99..000000000 --- a/tests/unit/providers/vector_io/remote/test_chroma.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json -from unittest.mock import MagicMock, patch - -import numpy as np -import pytest - -from llama_stack.apis.vector_io import QueryChunksResponse - -# Mock the entire chromadb module -chromadb_mock = MagicMock() -chromadb_mock.AsyncHttpClient = MagicMock -chromadb_mock.PersistentClient = MagicMock - -# Apply the mock before importing ChromaIndex -with patch.dict("sys.modules", {"chromadb": chromadb_mock}): - from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex - -# This test is a unit test for the ChromaVectorIOAdapter class. This should only contain -# tests which are specific to this class. More general (API-level) tests should be placed in -# tests/integration/vector_io/ -# -# How to run this test: -# -# pytest tests/unit/providers/vector_io/test_chroma.py \ -# -v -s --tb=short --disable-warnings --asyncio-mode=auto - -CHROMA_PROVIDER = "chromadb" - - -@pytest.fixture -async def mock_chroma_collection() -> MagicMock: - """Create a mock Chroma collection with common method behaviors.""" - collection = MagicMock() - collection.name = "test_collection" - - # Mock add operation - collection.add.return_value = None - - # Mock query operation for vector search - collection.query.return_value = { - "distances": [[0.1, 0.2]], - "documents": [ - [ - json.dumps({"content": "mock chunk 1", "metadata": {"document_id": "doc1"}}), - json.dumps({"content": "mock chunk 2", "metadata": {"document_id": "doc2"}}), - ] - ], - } - - # Mock delete operation - collection.delete.return_value = None - - return collection - - -@pytest.fixture -async def mock_chroma_client(mock_chroma_collection): - """Create a mock Chroma client with common method behaviors.""" - client = MagicMock() - - # Mock collection operations - client.get_or_create_collection.return_value = mock_chroma_collection - client.get_collection.return_value = mock_chroma_collection - client.delete_collection.return_value = None - - return client - - -@pytest.fixture -async def chroma_index(mock_chroma_client, mock_chroma_collection): - """Create a ChromaIndex with mocked client and collection.""" - index = ChromaIndex(client=mock_chroma_client, collection=mock_chroma_collection) - yield index - # No real cleanup needed since we're using mocks - - -async def test_add_chunks(chroma_index, sample_chunks, sample_embeddings, mock_chroma_collection): - await chroma_index.add_chunks(sample_chunks, sample_embeddings) - - # Verify data was inserted - mock_chroma_collection.add.assert_called_once() - - # Verify the add call had the right number of chunks - add_call = mock_chroma_collection.add.call_args - assert len(add_call[1]["documents"]) == len(sample_chunks) - - -async def test_query_chunks_vector( - chroma_index, sample_chunks, sample_embeddings, embedding_dimension, mock_chroma_collection -): - # Setup: Add chunks first - await chroma_index.add_chunks(sample_chunks, sample_embeddings) - - # Test vector search - query_embedding = np.random.rand(embedding_dimension).astype(np.float32) - response = await chroma_index.query_vector(query_embedding, k=2, score_threshold=0.0) - - assert isinstance(response, QueryChunksResponse) - assert len(response.chunks) == 2 - mock_chroma_collection.query.assert_called_once() - - -async def test_query_chunks_keyword_search(chroma_index, sample_chunks, sample_embeddings, mock_chroma_collection): - await chroma_index.add_chunks(sample_chunks, sample_embeddings) - - # Test keyword search - query_string = "Sentence 5" - response = await chroma_index.query_keyword(query_string=query_string, k=2, score_threshold=0.0) - - assert isinstance(response, QueryChunksResponse) - assert len(response.chunks) == 2 - - -async def test_delete_collection(chroma_index, mock_chroma_client): - # Test collection deletion - await chroma_index.delete() - - mock_chroma_client.delete_collection.assert_called_once_with(chroma_index.collection.name) diff --git a/uv.lock b/uv.lock index 065eb3876..2ca805065 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.12" resolution-markers = [ "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", @@ -1767,6 +1767,7 @@ dependencies = [ { name = "opentelemetry-exporter-otlp-proto-http" }, { name = "opentelemetry-sdk" }, { name = "pillow" }, + { name = "pre-commit" }, { name = "prompt-toolkit" }, { name = "pydantic" }, { name = "python-dotenv" }, @@ -1892,6 +1893,7 @@ requires-dist = [ { name = "opentelemetry-sdk", specifier = ">=1.30.0" }, { name = "pandas", marker = "extra == 'ui'" }, { name = "pillow" }, + { name = "pre-commit", specifier = ">=4.2.0" }, { name = "prompt-toolkit" }, { name = "pydantic", specifier = ">=2" }, { name = "python-dotenv" }, From bfc8a3b99df51120036aa44bf1f5bfa6541cd0f3 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Thu, 11 Sep 2025 23:09:31 +0900 Subject: [PATCH 07/16] change exception log parse to chunk --- llama_stack/providers/remote/vector_io/chroma/chroma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 5aaf91ee7..2f4f94b53 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -136,7 +136,7 @@ class ChromaIndex(EmbeddingIndex): doc_data = json.loads(doc) chunk = Chunk(**doc_data) except Exception: - log.exception(f"Failed to parse document: {doc}") + log.exception(f"Failed to load chunk: {doc}") continue score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0 From f3bd532461ebcbb112becf80bbc5bab3bf43b549 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Thu, 11 Sep 2025 23:11:24 +0900 Subject: [PATCH 08/16] delete blank line in vector_utils.py --- llama_stack/providers/utils/vector_io/vector_utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/llama_stack/providers/utils/vector_io/vector_utils.py b/llama_stack/providers/utils/vector_io/vector_utils.py index b0992f3c1..e55ac75ae 100644 --- a/llama_stack/providers/utils/vector_io/vector_utils.py +++ b/llama_stack/providers/utils/vector_io/vector_utils.py @@ -78,7 +78,6 @@ class WeightedInMemoryAggregator: All unique document IDs with weighted combined scores """ all_ids = set(vector_scores.keys()) | set(keyword_scores.keys()) - normalized_vector_scores = WeightedInMemoryAggregator._normalize_scores(vector_scores) normalized_keyword_scores = WeightedInMemoryAggregator._normalize_scores(keyword_scores) @@ -152,7 +151,6 @@ class WeightedInMemoryAggregator: if reranker_type == "weighted": alpha = reranker_params.get("alpha", 0.5) - return WeightedInMemoryAggregator.weighted_rerank(vector_scores, keyword_scores, alpha) else: # Default to RRF for None, RRF, or any unknown types From 571f998c78997887b0850503d37bb6c8746dcbad Mon Sep 17 00:00:00 2001 From: kimbwook Date: Thu, 11 Sep 2025 23:13:14 +0900 Subject: [PATCH 09/16] delete pre-commit in pyproject.toml --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5fb3d2c7e..b4dd3ece9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,7 +49,6 @@ dependencies = [ "opentelemetry-exporter-otlp-proto-http>=1.30.0", # server "aiosqlite>=0.21.0", # server - for metadata store "asyncpg", # for metadata store - "pre-commit>=4.2.0", ] [project.optional-dependencies] From 1c7daec39666883a5a399dad35adb2db7be85b35 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Tue, 14 Oct 2025 14:41:13 +0900 Subject: [PATCH 10/16] add "remote::chromadb" at test_openai_vector_stores.py --- tests/integration/vector_io/test_openai_vector_stores.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/vector_io/test_openai_vector_stores.py b/tests/integration/vector_io/test_openai_vector_stores.py index 347b43145..5fdfacad2 100644 --- a/tests/integration/vector_io/test_openai_vector_stores.py +++ b/tests/integration/vector_io/test_openai_vector_stores.py @@ -60,6 +60,7 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode "remote::milvus", "remote::pgvector", "remote::weaviate", + "remote::chromadb", ], "hybrid": [ "inline::milvus", @@ -67,6 +68,7 @@ def skip_if_provider_doesnt_support_openai_vector_stores_search(client_with_mode "remote::milvus", "remote::pgvector", "remote::weaviate", + "remote::chromadb", ], } supported_providers = search_mode_support.get(search_mode, []) From 6b9024422af0db4d474359d77fa7650bae453ef4 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Tue, 14 Oct 2025 23:19:58 +0900 Subject: [PATCH 11/16] apply pre-commit --- uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uv.lock b/uv.lock index cf738be58..0fcb02768 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.12" resolution-markers = [ "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", From b52299d789296ab0106dc42c1cb850cca1fe42b8 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 14 Oct 2025 15:06:31 +0000 Subject: [PATCH 12/16] style: apply pre-commit fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Applied by @github-actions bot via pre-commit workflow --- docs/docs/providers/agents/index.mdx | 4 ++-- docs/docs/providers/batches/index.mdx | 24 ++++++++++++------------ docs/docs/providers/files/index.mdx | 4 ++-- docs/docs/providers/inference/index.mdx | 16 ++++++++-------- docs/docs/providers/safety/index.mdx | 4 ++-- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx index 52b92734e..06eb104af 100644 --- a/docs/docs/providers/agents/index.mdx +++ b/docs/docs/providers/agents/index.mdx @@ -1,7 +1,7 @@ --- description: "Agents -APIs for creating and interacting with agentic systems." + APIs for creating and interacting with agentic systems." sidebar_label: Agents title: Agents --- @@ -12,6 +12,6 @@ title: Agents Agents -APIs for creating and interacting with agentic systems. + APIs for creating and interacting with agentic systems. This section contains documentation for all available providers for the **agents** API. diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx index 18e5e314d..2c64b277f 100644 --- a/docs/docs/providers/batches/index.mdx +++ b/docs/docs/providers/batches/index.mdx @@ -1,14 +1,14 @@ --- description: "The Batches API enables efficient processing of multiple requests in a single operation, -particularly useful for processing large datasets, batch evaluation workflows, and -cost-effective inference at scale. + particularly useful for processing large datasets, batch evaluation workflows, and + cost-effective inference at scale. -The API is designed to allow use of openai client libraries for seamless integration. + The API is designed to allow use of openai client libraries for seamless integration. -This API provides the following extensions: - - idempotent batch creation + This API provides the following extensions: + - idempotent batch creation -Note: This API is currently under active development and may undergo changes." + Note: This API is currently under active development and may undergo changes." sidebar_label: Batches title: Batches --- @@ -18,14 +18,14 @@ title: Batches ## Overview The Batches API enables efficient processing of multiple requests in a single operation, -particularly useful for processing large datasets, batch evaluation workflows, and -cost-effective inference at scale. + particularly useful for processing large datasets, batch evaluation workflows, and + cost-effective inference at scale. -The API is designed to allow use of openai client libraries for seamless integration. + The API is designed to allow use of openai client libraries for seamless integration. -This API provides the following extensions: - - idempotent batch creation + This API provides the following extensions: + - idempotent batch creation -Note: This API is currently under active development and may undergo changes. + Note: This API is currently under active development and may undergo changes. This section contains documentation for all available providers for the **batches** API. diff --git a/docs/docs/providers/files/index.mdx b/docs/docs/providers/files/index.mdx index c61c4f1b6..19e338035 100644 --- a/docs/docs/providers/files/index.mdx +++ b/docs/docs/providers/files/index.mdx @@ -1,7 +1,7 @@ --- description: "Files -This API is used to upload documents that can be used with other Llama Stack APIs." + This API is used to upload documents that can be used with other Llama Stack APIs." sidebar_label: Files title: Files --- @@ -12,6 +12,6 @@ title: Files Files -This API is used to upload documents that can be used with other Llama Stack APIs. + This API is used to upload documents that can be used with other Llama Stack APIs. This section contains documentation for all available providers for the **files** API. diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index 322c95ee7..c2bf69962 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -1,11 +1,11 @@ --- description: "Inference -Llama Stack Inference API for generating completions, chat completions, and embeddings. + Llama Stack Inference API for generating completions, chat completions, and embeddings. -This API provides the raw interface to the underlying models. Two kinds of models are supported: -- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. -- Embedding models: these models generate embeddings to be used for semantic search." + This API provides the raw interface to the underlying models. Two kinds of models are supported: + - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search." sidebar_label: Inference title: Inference --- @@ -16,10 +16,10 @@ title: Inference Inference -Llama Stack Inference API for generating completions, chat completions, and embeddings. + Llama Stack Inference API for generating completions, chat completions, and embeddings. -This API provides the raw interface to the underlying models. Two kinds of models are supported: -- LLM models: these models generate "raw" and "chat" (conversational) completions. -- Embedding models: these models generate embeddings to be used for semantic search. + This API provides the raw interface to the underlying models. Two kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. This section contains documentation for all available providers for the **inference** API. diff --git a/docs/docs/providers/safety/index.mdx b/docs/docs/providers/safety/index.mdx index 038565475..4e2de4f33 100644 --- a/docs/docs/providers/safety/index.mdx +++ b/docs/docs/providers/safety/index.mdx @@ -1,7 +1,7 @@ --- description: "Safety -OpenAI-compatible Moderations API." + OpenAI-compatible Moderations API." sidebar_label: Safety title: Safety --- @@ -12,6 +12,6 @@ title: Safety Safety -OpenAI-compatible Moderations API. + OpenAI-compatible Moderations API. This section contains documentation for all available providers for the **safety** API. From 53f09a7a65cb9d14ae078d331efd18b5529a62d5 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Wed, 15 Oct 2025 11:10:06 +0900 Subject: [PATCH 13/16] change multi-line import to single-line --- docs/docs/providers/agents/index.mdx | 4 ++-- docs/docs/providers/batches/index.mdx | 24 +++++++++---------- docs/docs/providers/files/index.mdx | 4 ++-- docs/docs/providers/inference/index.mdx | 16 ++++++------- docs/docs/providers/safety/index.mdx | 4 ++-- .../remote/vector_io/chroma/chroma.py | 12 +++------- 6 files changed, 29 insertions(+), 35 deletions(-) diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx index 06eb104af..52b92734e 100644 --- a/docs/docs/providers/agents/index.mdx +++ b/docs/docs/providers/agents/index.mdx @@ -1,7 +1,7 @@ --- description: "Agents - APIs for creating and interacting with agentic systems." +APIs for creating and interacting with agentic systems." sidebar_label: Agents title: Agents --- @@ -12,6 +12,6 @@ title: Agents Agents - APIs for creating and interacting with agentic systems. +APIs for creating and interacting with agentic systems. This section contains documentation for all available providers for the **agents** API. diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx index 2c64b277f..18e5e314d 100644 --- a/docs/docs/providers/batches/index.mdx +++ b/docs/docs/providers/batches/index.mdx @@ -1,14 +1,14 @@ --- description: "The Batches API enables efficient processing of multiple requests in a single operation, - particularly useful for processing large datasets, batch evaluation workflows, and - cost-effective inference at scale. +particularly useful for processing large datasets, batch evaluation workflows, and +cost-effective inference at scale. - The API is designed to allow use of openai client libraries for seamless integration. +The API is designed to allow use of openai client libraries for seamless integration. - This API provides the following extensions: - - idempotent batch creation +This API provides the following extensions: + - idempotent batch creation - Note: This API is currently under active development and may undergo changes." +Note: This API is currently under active development and may undergo changes." sidebar_label: Batches title: Batches --- @@ -18,14 +18,14 @@ title: Batches ## Overview The Batches API enables efficient processing of multiple requests in a single operation, - particularly useful for processing large datasets, batch evaluation workflows, and - cost-effective inference at scale. +particularly useful for processing large datasets, batch evaluation workflows, and +cost-effective inference at scale. - The API is designed to allow use of openai client libraries for seamless integration. +The API is designed to allow use of openai client libraries for seamless integration. - This API provides the following extensions: - - idempotent batch creation +This API provides the following extensions: + - idempotent batch creation - Note: This API is currently under active development and may undergo changes. +Note: This API is currently under active development and may undergo changes. This section contains documentation for all available providers for the **batches** API. diff --git a/docs/docs/providers/files/index.mdx b/docs/docs/providers/files/index.mdx index 19e338035..c61c4f1b6 100644 --- a/docs/docs/providers/files/index.mdx +++ b/docs/docs/providers/files/index.mdx @@ -1,7 +1,7 @@ --- description: "Files - This API is used to upload documents that can be used with other Llama Stack APIs." +This API is used to upload documents that can be used with other Llama Stack APIs." sidebar_label: Files title: Files --- @@ -12,6 +12,6 @@ title: Files Files - This API is used to upload documents that can be used with other Llama Stack APIs. +This API is used to upload documents that can be used with other Llama Stack APIs. This section contains documentation for all available providers for the **files** API. diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index c2bf69962..322c95ee7 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -1,11 +1,11 @@ --- description: "Inference - Llama Stack Inference API for generating completions, chat completions, and embeddings. +Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Two kinds of models are supported: - - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. - - Embedding models: these models generate embeddings to be used for semantic search." +This API provides the raw interface to the underlying models. Two kinds of models are supported: +- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. +- Embedding models: these models generate embeddings to be used for semantic search." sidebar_label: Inference title: Inference --- @@ -16,10 +16,10 @@ title: Inference Inference - Llama Stack Inference API for generating completions, chat completions, and embeddings. +Llama Stack Inference API for generating completions, chat completions, and embeddings. - This API provides the raw interface to the underlying models. Two kinds of models are supported: - - LLM models: these models generate "raw" and "chat" (conversational) completions. - - Embedding models: these models generate embeddings to be used for semantic search. +This API provides the raw interface to the underlying models. Two kinds of models are supported: +- LLM models: these models generate "raw" and "chat" (conversational) completions. +- Embedding models: these models generate embeddings to be used for semantic search. This section contains documentation for all available providers for the **inference** API. diff --git a/docs/docs/providers/safety/index.mdx b/docs/docs/providers/safety/index.mdx index 4e2de4f33..038565475 100644 --- a/docs/docs/providers/safety/index.mdx +++ b/docs/docs/providers/safety/index.mdx @@ -1,7 +1,7 @@ --- description: "Safety - OpenAI-compatible Moderations API." +OpenAI-compatible Moderations API." sidebar_label: Safety title: Safety --- @@ -12,6 +12,6 @@ title: Safety Safety - OpenAI-compatible Moderations API. +OpenAI-compatible Moderations API. This section contains documentation for all available providers for the **safety** API. diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 89a528890..10857a773 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -22,22 +22,16 @@ from llama_stack.apis.vector_io import ( ) from llama_stack.log import get_logger from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate -from llama_stack.providers.inline.vector_io.chroma import ( - ChromaVectorIOConfig as InlineChromaVectorIOConfig, -) +from llama_stack.providers.inline.vector_io.chroma import ChromaVectorIOConfig as InlineChromaVectorIOConfig from llama_stack.providers.utils.kvstore import kvstore_impl from llama_stack.providers.utils.kvstore.api import KVStore -from llama_stack.providers.utils.memory.openai_vector_store_mixin import ( - OpenAIVectorStoreMixin, -) +from llama_stack.providers.utils.memory.openai_vector_store_mixin import OpenAIVectorStoreMixin from llama_stack.providers.utils.memory.vector_store import ( ChunkForDeletion, EmbeddingIndex, VectorDBWithIndex, ) -from llama_stack.providers.utils.vector_io.vector_utils import ( - WeightedInMemoryAggregator, -) +from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig From 1b82c3c97ea96e95781edb939e98db72eea130c7 Mon Sep 17 00:00:00 2001 From: kimbwook Date: Wed, 15 Oct 2025 11:47:56 +0900 Subject: [PATCH 14/16] removed error handling for Chunk and add error handling for maybe_await --- .../remote/vector_io/chroma/chroma.py | 37 ++++++++++++------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 10857a773..03c407122 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -116,14 +116,29 @@ class ChromaIndex(EmbeddingIndex): k: int, score_threshold: float, ) -> QueryChunksResponse: - results = await maybe_await( - self.collection.query( - query_texts=[query_string], - where_document={"$contains": query_string}, - n_results=k, - include=["documents", "distances"], + """ + Perform keyword search using Chroma's built-in where_document feature. + + Args: + query_string: The text query for keyword search + k: Number of results to return + score_threshold: Minimum similarity score threshold + + Returns: + QueryChunksResponse with combined results + """ + try: + results = await maybe_await( + self.collection.query( + query_texts=[query_string], + where_document={"$contains": query_string}, + n_results=k, + include=["documents", "distances"], + ) ) - ) + except Exception as e: + log.error(f"Chroma client keyword search failed: {e}") + raise distances = results["distances"][0] if results["distances"] else [] documents = results["documents"][0] if results["documents"] else [] @@ -132,12 +147,8 @@ class ChromaIndex(EmbeddingIndex): scores = [] for dist, doc in zip(distances, documents, strict=False): - try: - doc_data = json.loads(doc) - chunk = Chunk(**doc_data) - except Exception: - log.exception(f"Failed to load chunk: {doc}") - continue + doc_data = json.loads(doc) + chunk = Chunk(**doc_data) score = 1.0 / (1.0 + float(dist)) if dist is not None else 1.0 From 85a42cfe1a5ef08843f6b2776dcdb3b2988110a1 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 15 Oct 2025 10:12:57 +0000 Subject: [PATCH 15/16] style: apply pre-commit fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Applied by @github-actions bot via pre-commit workflow --- docs/docs/providers/agents/index.mdx | 4 ++-- docs/docs/providers/batches/index.mdx | 24 ++++++++++++------------ docs/docs/providers/files/index.mdx | 4 ++-- docs/docs/providers/inference/index.mdx | 16 ++++++++-------- docs/docs/providers/safety/index.mdx | 4 ++-- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/docs/docs/providers/agents/index.mdx b/docs/docs/providers/agents/index.mdx index 52b92734e..06eb104af 100644 --- a/docs/docs/providers/agents/index.mdx +++ b/docs/docs/providers/agents/index.mdx @@ -1,7 +1,7 @@ --- description: "Agents -APIs for creating and interacting with agentic systems." + APIs for creating and interacting with agentic systems." sidebar_label: Agents title: Agents --- @@ -12,6 +12,6 @@ title: Agents Agents -APIs for creating and interacting with agentic systems. + APIs for creating and interacting with agentic systems. This section contains documentation for all available providers for the **agents** API. diff --git a/docs/docs/providers/batches/index.mdx b/docs/docs/providers/batches/index.mdx index 18e5e314d..2c64b277f 100644 --- a/docs/docs/providers/batches/index.mdx +++ b/docs/docs/providers/batches/index.mdx @@ -1,14 +1,14 @@ --- description: "The Batches API enables efficient processing of multiple requests in a single operation, -particularly useful for processing large datasets, batch evaluation workflows, and -cost-effective inference at scale. + particularly useful for processing large datasets, batch evaluation workflows, and + cost-effective inference at scale. -The API is designed to allow use of openai client libraries for seamless integration. + The API is designed to allow use of openai client libraries for seamless integration. -This API provides the following extensions: - - idempotent batch creation + This API provides the following extensions: + - idempotent batch creation -Note: This API is currently under active development and may undergo changes." + Note: This API is currently under active development and may undergo changes." sidebar_label: Batches title: Batches --- @@ -18,14 +18,14 @@ title: Batches ## Overview The Batches API enables efficient processing of multiple requests in a single operation, -particularly useful for processing large datasets, batch evaluation workflows, and -cost-effective inference at scale. + particularly useful for processing large datasets, batch evaluation workflows, and + cost-effective inference at scale. -The API is designed to allow use of openai client libraries for seamless integration. + The API is designed to allow use of openai client libraries for seamless integration. -This API provides the following extensions: - - idempotent batch creation + This API provides the following extensions: + - idempotent batch creation -Note: This API is currently under active development and may undergo changes. + Note: This API is currently under active development and may undergo changes. This section contains documentation for all available providers for the **batches** API. diff --git a/docs/docs/providers/files/index.mdx b/docs/docs/providers/files/index.mdx index c61c4f1b6..19e338035 100644 --- a/docs/docs/providers/files/index.mdx +++ b/docs/docs/providers/files/index.mdx @@ -1,7 +1,7 @@ --- description: "Files -This API is used to upload documents that can be used with other Llama Stack APIs." + This API is used to upload documents that can be used with other Llama Stack APIs." sidebar_label: Files title: Files --- @@ -12,6 +12,6 @@ title: Files Files -This API is used to upload documents that can be used with other Llama Stack APIs. + This API is used to upload documents that can be used with other Llama Stack APIs. This section contains documentation for all available providers for the **files** API. diff --git a/docs/docs/providers/inference/index.mdx b/docs/docs/providers/inference/index.mdx index 322c95ee7..c2bf69962 100644 --- a/docs/docs/providers/inference/index.mdx +++ b/docs/docs/providers/inference/index.mdx @@ -1,11 +1,11 @@ --- description: "Inference -Llama Stack Inference API for generating completions, chat completions, and embeddings. + Llama Stack Inference API for generating completions, chat completions, and embeddings. -This API provides the raw interface to the underlying models. Two kinds of models are supported: -- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. -- Embedding models: these models generate embeddings to be used for semantic search." + This API provides the raw interface to the underlying models. Two kinds of models are supported: + - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search." sidebar_label: Inference title: Inference --- @@ -16,10 +16,10 @@ title: Inference Inference -Llama Stack Inference API for generating completions, chat completions, and embeddings. + Llama Stack Inference API for generating completions, chat completions, and embeddings. -This API provides the raw interface to the underlying models. Two kinds of models are supported: -- LLM models: these models generate "raw" and "chat" (conversational) completions. -- Embedding models: these models generate embeddings to be used for semantic search. + This API provides the raw interface to the underlying models. Two kinds of models are supported: + - LLM models: these models generate "raw" and "chat" (conversational) completions. + - Embedding models: these models generate embeddings to be used for semantic search. This section contains documentation for all available providers for the **inference** API. diff --git a/docs/docs/providers/safety/index.mdx b/docs/docs/providers/safety/index.mdx index 038565475..4e2de4f33 100644 --- a/docs/docs/providers/safety/index.mdx +++ b/docs/docs/providers/safety/index.mdx @@ -1,7 +1,7 @@ --- description: "Safety -OpenAI-compatible Moderations API." + OpenAI-compatible Moderations API." sidebar_label: Safety title: Safety --- @@ -12,6 +12,6 @@ title: Safety Safety -OpenAI-compatible Moderations API. + OpenAI-compatible Moderations API. This section contains documentation for all available providers for the **safety** API. From b3403cdd84a8ecd6f5af7c3b1385f0f5f0f93e9b Mon Sep 17 00:00:00 2001 From: kimbwook Date: Tue, 2 Dec 2025 20:17:19 +0900 Subject: [PATCH 16/16] race condition in concurrent file attachment to vector stores --- .../utils/memory/openai_vector_store_mixin.py | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py index bbfd60e25..dcf1286c0 100644 --- a/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py +++ b/src/llama_stack/providers/utils/memory/openai_vector_store_mixin.py @@ -92,6 +92,13 @@ class OpenAIVectorStoreMixin(ABC): self.kvstore = kvstore self._last_file_batch_cleanup_time = 0 self._file_batch_tasks: dict[str, asyncio.Task[None]] = {} + self._vector_store_locks: dict[str, asyncio.Lock] = {} + + def _get_vector_store_lock(self, vector_store_id: str) -> asyncio.Lock: + """Get or create a lock for a specific vector store.""" + if vector_store_id not in self._vector_store_locks: + self._vector_store_locks[vector_store_id] = asyncio.Lock() + return self._vector_store_locks[vector_store_id] async def _save_openai_vector_store(self, store_id: str, store_info: dict[str, Any]) -> None: """Save vector store metadata to persistent storage.""" @@ -831,16 +838,18 @@ class OpenAIVectorStoreMixin(ABC): await self._save_openai_vector_store_file(vector_store_id, file_id, file_info, dict_chunks) # Update file_ids and file_counts in vector store metadata - store_info = self.openai_vector_stores[vector_store_id].copy() - store_info["file_ids"].append(file_id) - store_info["file_counts"]["total"] += 1 - store_info["file_counts"][vector_store_file_object.status] += 1 + # Use lock to prevent race condition when multiple files are attached concurrently + async with self._get_vector_store_lock(vector_store_id): + store_info = self.openai_vector_stores[vector_store_id].copy() + # Deep copy file_counts to avoid mutating shared dict + store_info["file_counts"] = store_info["file_counts"].copy() + store_info["file_ids"] = store_info["file_ids"].copy() + store_info["file_ids"].append(file_id) + store_info["file_counts"]["total"] += 1 + store_info["file_counts"][vector_store_file_object.status] += 1 - # Save updated vector store to persistent storage - await self._save_openai_vector_store(vector_store_id, store_info) - - # Update vector store in-memory cache - self.openai_vector_stores[vector_store_id] = store_info + # Save updated vector store to persistent storage + await self._save_openai_vector_store(vector_store_id, store_info) return vector_store_file_object