diff --git a/docs/source/providers/batches/index.md b/docs/source/providers/batches/index.md index d6d2fa9a3..20fa19212 100644 --- a/docs/source/providers/batches/index.md +++ b/docs/source/providers/batches/index.md @@ -3,15 +3,15 @@ ## Overview The Batches API enables efficient processing of multiple requests in a single operation, - particularly useful for processing large datasets, batch evaluation workflows, and - cost-effective inference at scale. +particularly useful for processing large datasets, batch evaluation workflows, and +cost-effective inference at scale. - The API is designed to allow use of openai client libraries for seamless integration. +The API is designed to allow use of openai client libraries for seamless integration. - This API provides the following extensions: - - idempotent batch creation +This API provides the following extensions: + - idempotent batch creation - Note: This API is currently under active development and may undergo changes. +Note: This API is currently under active development and may undergo changes. This section contains documentation for all available providers for the **batches** API. diff --git a/llama_stack/providers/remote/vector_io/chroma/chroma.py b/llama_stack/providers/remote/vector_io/chroma/chroma.py index 426d62473..5aaf91ee7 100644 --- a/llama_stack/providers/remote/vector_io/chroma/chroma.py +++ b/llama_stack/providers/remote/vector_io/chroma/chroma.py @@ -31,7 +31,7 @@ from llama_stack.providers.utils.memory.vector_store import ( EmbeddingIndex, VectorDBWithIndex, ) -from llama_stack.providers.utils.vector_io.vector_utils import Reranker +from llama_stack.providers.utils.vector_io.vector_utils import WeightedInMemoryAggregator from .config import ChromaVectorIOConfig as RemoteChromaVectorIOConfig @@ -192,7 +192,9 @@ class ChromaIndex(EmbeddingIndex): } # Combine scores using the reranking utility - combined_scores = Reranker.combine_search_results(vector_scores, keyword_scores, reranker_type, reranker_params) + combined_scores = WeightedInMemoryAggregator.combine_search_results( + vector_scores, keyword_scores, reranker_type, reranker_params + ) # Efficient top-k selection because it only tracks the k best candidates it's seen so far top_k_items = heapq.nlargest(k, combined_scores.items(), key=lambda x: x[1]) diff --git a/llama_stack/providers/utils/vector_io/vector_utils.py b/llama_stack/providers/utils/vector_io/vector_utils.py index 61ebad18f..b0992f3c1 100644 --- a/llama_stack/providers/utils/vector_io/vector_utils.py +++ b/llama_stack/providers/utils/vector_io/vector_utils.py @@ -39,7 +39,6 @@ def sanitize_collection_name(name: str, weaviate_format=False) -> str: return s - class WeightedInMemoryAggregator: @staticmethod def _normalize_scores(scores: dict[str, float]) -> dict[str, float]: diff --git a/pyproject.toml b/pyproject.toml index 72c4f6f9e..5fb3d2c7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,14 +25,14 @@ classifiers = [ ] dependencies = [ "aiohttp", - "fastapi>=0.115.0,<1.0", # server - "fire", # for MCP in LLS client + "fastapi>=0.115.0,<1.0", # server + "fire", # for MCP in LLS client "httpx", "huggingface-hub>=0.34.0,<1.0", "jinja2>=3.1.6", "jsonschema", "llama-stack-client>=0.2.21", - "openai>=1.100.0", # for expires_after support + "openai>=1.100.0", # for expires_after support "prompt-toolkit", "python-dotenv", "python-jose[cryptography]", @@ -43,12 +43,13 @@ dependencies = [ "tiktoken", "pillow", "h11>=0.16.0", - "python-multipart>=0.0.20", # For fastapi Form - "uvicorn>=0.34.0", # server - "opentelemetry-sdk>=1.30.0", # server + "python-multipart>=0.0.20", # For fastapi Form + "uvicorn>=0.34.0", # server + "opentelemetry-sdk>=1.30.0", # server "opentelemetry-exporter-otlp-proto-http>=1.30.0", # server - "aiosqlite>=0.21.0", # server - for metadata store - "asyncpg", # for metadata store + "aiosqlite>=0.21.0", # server - for metadata store + "asyncpg", # for metadata store + "pre-commit>=4.2.0", ] [project.optional-dependencies] diff --git a/tests/unit/providers/vector_io/remote/test_chroma.py b/tests/unit/providers/vector_io/remote/test_chroma.py deleted file mode 100644 index ea9134f99..000000000 --- a/tests/unit/providers/vector_io/remote/test_chroma.py +++ /dev/null @@ -1,124 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -import json -from unittest.mock import MagicMock, patch - -import numpy as np -import pytest - -from llama_stack.apis.vector_io import QueryChunksResponse - -# Mock the entire chromadb module -chromadb_mock = MagicMock() -chromadb_mock.AsyncHttpClient = MagicMock -chromadb_mock.PersistentClient = MagicMock - -# Apply the mock before importing ChromaIndex -with patch.dict("sys.modules", {"chromadb": chromadb_mock}): - from llama_stack.providers.remote.vector_io.chroma.chroma import ChromaIndex - -# This test is a unit test for the ChromaVectorIOAdapter class. This should only contain -# tests which are specific to this class. More general (API-level) tests should be placed in -# tests/integration/vector_io/ -# -# How to run this test: -# -# pytest tests/unit/providers/vector_io/test_chroma.py \ -# -v -s --tb=short --disable-warnings --asyncio-mode=auto - -CHROMA_PROVIDER = "chromadb" - - -@pytest.fixture -async def mock_chroma_collection() -> MagicMock: - """Create a mock Chroma collection with common method behaviors.""" - collection = MagicMock() - collection.name = "test_collection" - - # Mock add operation - collection.add.return_value = None - - # Mock query operation for vector search - collection.query.return_value = { - "distances": [[0.1, 0.2]], - "documents": [ - [ - json.dumps({"content": "mock chunk 1", "metadata": {"document_id": "doc1"}}), - json.dumps({"content": "mock chunk 2", "metadata": {"document_id": "doc2"}}), - ] - ], - } - - # Mock delete operation - collection.delete.return_value = None - - return collection - - -@pytest.fixture -async def mock_chroma_client(mock_chroma_collection): - """Create a mock Chroma client with common method behaviors.""" - client = MagicMock() - - # Mock collection operations - client.get_or_create_collection.return_value = mock_chroma_collection - client.get_collection.return_value = mock_chroma_collection - client.delete_collection.return_value = None - - return client - - -@pytest.fixture -async def chroma_index(mock_chroma_client, mock_chroma_collection): - """Create a ChromaIndex with mocked client and collection.""" - index = ChromaIndex(client=mock_chroma_client, collection=mock_chroma_collection) - yield index - # No real cleanup needed since we're using mocks - - -async def test_add_chunks(chroma_index, sample_chunks, sample_embeddings, mock_chroma_collection): - await chroma_index.add_chunks(sample_chunks, sample_embeddings) - - # Verify data was inserted - mock_chroma_collection.add.assert_called_once() - - # Verify the add call had the right number of chunks - add_call = mock_chroma_collection.add.call_args - assert len(add_call[1]["documents"]) == len(sample_chunks) - - -async def test_query_chunks_vector( - chroma_index, sample_chunks, sample_embeddings, embedding_dimension, mock_chroma_collection -): - # Setup: Add chunks first - await chroma_index.add_chunks(sample_chunks, sample_embeddings) - - # Test vector search - query_embedding = np.random.rand(embedding_dimension).astype(np.float32) - response = await chroma_index.query_vector(query_embedding, k=2, score_threshold=0.0) - - assert isinstance(response, QueryChunksResponse) - assert len(response.chunks) == 2 - mock_chroma_collection.query.assert_called_once() - - -async def test_query_chunks_keyword_search(chroma_index, sample_chunks, sample_embeddings, mock_chroma_collection): - await chroma_index.add_chunks(sample_chunks, sample_embeddings) - - # Test keyword search - query_string = "Sentence 5" - response = await chroma_index.query_keyword(query_string=query_string, k=2, score_threshold=0.0) - - assert isinstance(response, QueryChunksResponse) - assert len(response.chunks) == 2 - - -async def test_delete_collection(chroma_index, mock_chroma_client): - # Test collection deletion - await chroma_index.delete() - - mock_chroma_client.delete_collection.assert_called_once_with(chroma_index.collection.name) diff --git a/uv.lock b/uv.lock index 065eb3876..2ca805065 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.12" resolution-markers = [ "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", @@ -1767,6 +1767,7 @@ dependencies = [ { name = "opentelemetry-exporter-otlp-proto-http" }, { name = "opentelemetry-sdk" }, { name = "pillow" }, + { name = "pre-commit" }, { name = "prompt-toolkit" }, { name = "pydantic" }, { name = "python-dotenv" }, @@ -1892,6 +1893,7 @@ requires-dist = [ { name = "opentelemetry-sdk", specifier = ">=1.30.0" }, { name = "pandas", marker = "extra == 'ui'" }, { name = "pillow" }, + { name = "pre-commit", specifier = ">=4.2.0" }, { name = "prompt-toolkit" }, { name = "pydantic", specifier = ">=2" }, { name = "python-dotenv" },