diff --git a/.cursor/rules/general.mdc b/.cursor/rules/general.mdc deleted file mode 100644 index 24daef2ba..000000000 --- a/.cursor/rules/general.mdc +++ /dev/null @@ -1,9 +0,0 @@ ---- -description: General rules always applicable across the project -globs: -alwaysApply: true ---- -# Style - -- Comments must add value to code. Don't write filler comments explaining what you are doing next; they just add noise. -- Add a comment to clarify surprising behavior which would not be obvious. Good variable naming and clear code organization is more important. diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 4aba604dd..d68af5615 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -5,4 +5,19 @@ updates: - package-ecosystem: "github-actions" directory: "/" # Will use the default workflow location of `.github/workflows` schedule: - interval: "daily" + interval: "weekly" + day: "saturday" + commit-message: + prefix: chore(github-deps) + - package-ecosystem: "uv" + directory: "/" + schedule: + interval: "weekly" + day: "saturday" + # ignore all non-security updates: https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#open-pull-requests-limit + open-pull-requests-limit: 0 + labels: + - type/dependencies + - python + commit-message: + prefix: chore(python-deps) diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index ec782c331..0af46e1f0 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -1,4 +1,4 @@ -name: Integration tests +name: Integration Tests on: push: @@ -15,8 +15,14 @@ on: - '.github/workflows/integration-tests.yml' # This workflow jobs: - ollama: + test-matrix: runs-on: ubuntu-latest + strategy: + matrix: + # Listing tests manually since some of them currently fail + # TODO: generate matrix list from tests/integration when fixed + test-type: [inference, datasets, inspect, scoring, post_training, providers] + fail-fast: false # we want to run all tests regardless of failure steps: - name: Checkout repository @@ -43,6 +49,8 @@ jobs: run: | uv sync --extra dev --extra test uv pip install ollama faiss-cpu + # always test against the latest version of the client + uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main uv pip install -e . - name: Wait for Ollama to start @@ -72,17 +80,17 @@ jobs: echo "Waiting for Llama Stack server..." for i in {1..30}; do if curl -s http://localhost:8321/v1/health | grep -q "OK"; then - echo " Llama Stack server is up!" + echo "Llama Stack server is up!" exit 0 fi sleep 1 done - echo " Llama Stack server failed to start" + echo "Llama Stack server failed to start" cat server.log exit 1 - - name: Run Inference Integration Tests + - name: Run Integration Tests env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" run: | - uv run pytest -v tests/integration/inference --stack-config=ollama --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2 + uv run pytest -v tests/integration/${{ matrix.test-type }} --stack-config=ollama --text-model="meta-llama/Llama-3.2-3B-Instruct" --embedding-model=all-MiniLM-L6-v2 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e458fec0a..505d6b162 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -86,7 +86,7 @@ LLAMA_STACK_CONFIG= And then use this dotenv file when running client SDK tests via the following: ```bash -uv run --env-file .env -- pytest -v tests/api/inference/test_text_inference.py +uv run --env-file .env -- pytest -v tests/integration/inference/test_text_inference.py ``` ## Pre-commit Hooks diff --git a/README.md b/README.md index aade9c15f..d2adc3376 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/) [![License](https://img.shields.io/pypi/l/llama_stack.svg)](https://github.com/meta-llama/llama-stack/blob/main/LICENSE) [![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/llama-stack) -![Unit](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main) +[![Unit Tests](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain) +[![Integration Tests](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain) [**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 6d545e7c1..ddfba8974 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -1126,6 +1126,7 @@ "application/json": { "schema": { "$ref": "#/components/schemas/Benchmark" + "$ref": "#/components/schemas/Benchmark" } } } @@ -1168,14 +1169,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/Dataset" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/Dataset" } } } @@ -1250,14 +1244,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/Model" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/Model" } } } @@ -1332,14 +1319,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/Shield" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/Shield" } } } @@ -1642,14 +1622,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/PostTrainingJobArtifactsResponse" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/PostTrainingJobArtifactsResponse" } } } @@ -1691,14 +1664,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/PostTrainingJobStatusResponse" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/PostTrainingJobStatusResponse" } } } @@ -1773,14 +1739,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/FileUploadResponse" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/FileUploadResponse" } } } @@ -1882,14 +1841,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/VectorDB" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/VectorDB" } } } diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index 41b11d9e0..275c02f43 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -774,6 +774,7 @@ paths: application/json: schema: $ref: '#/components/schemas/Benchmark' + $ref: '#/components/schemas/Benchmark' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -802,9 +803,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/Dataset' - - type: 'null' + $ref: '#/components/schemas/Dataset' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -855,9 +854,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/Model' - - type: 'null' + $ref: '#/components/schemas/Model' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -908,9 +905,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/Shield' - - type: 'null' + $ref: '#/components/schemas/Shield' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -1112,9 +1107,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' - - type: 'null' + $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -1142,9 +1135,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/PostTrainingJobStatusResponse' - - type: 'null' + $ref: '#/components/schemas/PostTrainingJobStatusResponse' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -1195,9 +1186,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/FileUploadResponse' - - type: 'null' + $ref: '#/components/schemas/FileUploadResponse' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -1266,9 +1255,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/VectorDB' - - type: 'null' + $ref: '#/components/schemas/VectorDB' '400': $ref: '#/components/responses/BadRequest400' '429': diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py index a2553f905..879ac95e2 100644 --- a/docs/openapi_generator/generate.py +++ b/docs/openapi_generator/generate.py @@ -12,7 +12,7 @@ from datetime import datetime from pathlib import Path - +import sys import fire import ruamel.yaml as yaml @@ -21,7 +21,7 @@ from llama_stack.distribution.stack import LlamaStack # noqa: E402 from .pyopenapi.options import Options # noqa: E402 from .pyopenapi.specification import Info, Server # noqa: E402 -from .pyopenapi.utility import Specification # noqa: E402 +from .pyopenapi.utility import Specification, validate_api_method_return_types # noqa: E402 def str_presenter(dumper, data): @@ -39,6 +39,14 @@ def main(output_dir: str): if not output_dir.exists(): raise ValueError(f"Directory {output_dir} does not exist") + # Validate API protocols before generating spec + print("Validating API method return types...") + return_type_errors = validate_api_method_return_types() + if return_type_errors: + print("\nAPI Method Return Type Validation Errors:\n") + for error in return_type_errors: + print(error) + sys.exit(1) now = str(datetime.now()) print( "Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now diff --git a/docs/openapi_generator/pyopenapi/utility.py b/docs/openapi_generator/pyopenapi/utility.py index f134aab4b..f60a33bb7 100644 --- a/docs/openapi_generator/pyopenapi/utility.py +++ b/docs/openapi_generator/pyopenapi/utility.py @@ -6,16 +6,19 @@ import json import typing +import inspect +import os from pathlib import Path from typing import TextIO +from typing import Any, Dict, List, Optional, Protocol, Type, Union, get_type_hints, get_origin, get_args from llama_stack.strong_typing.schema import object_to_json, StrictJsonType +from llama_stack.distribution.resolver import api_protocol_map from .generator import Generator from .options import Options from .specification import Document - THIS_DIR = Path(__file__).parent @@ -114,3 +117,37 @@ class Specification: ) f.write(html) + +def is_optional_type(type_: Any) -> bool: + """Check if a type is Optional.""" + origin = get_origin(type_) + args = get_args(type_) + return origin is Optional or (origin is Union and type(None) in args) + + +def validate_api_method_return_types() -> List[str]: + """Validate that all API methods have proper return types.""" + errors = [] + protocols = api_protocol_map() + + for protocol_name, protocol in protocols.items(): + methods = inspect.getmembers(protocol, predicate=inspect.isfunction) + + for method_name, method in methods: + if not hasattr(method, '__webmethod__'): + continue + + # Only check GET methods + if method.__webmethod__.method != "GET": + continue + + hints = get_type_hints(method) + + if 'return' not in hints: + errors.append(f"Method {protocol_name}.{method_name} has no return type annotation") + else: + return_type = hints['return'] + if is_optional_type(return_type): + errors.append(f"Method {protocol_name}.{method_name} returns Optional type") + + return errors diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md index 37a7e7974..9b8c1b9ad 100644 --- a/docs/source/distributions/building_distro.md +++ b/docs/source/distributions/building_distro.md @@ -185,8 +185,12 @@ llama stack build --config llama_stack/templates/ollama/build.yaml ::: :::{tab-item} Building Container -> [!TIP] -> Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman. + +```{admonition} Podman Alternative +:class: tip + +Podman is supported as an alternative to Docker. Set `CONTAINER_BINARY` to `podman` in your environment to use Podman. +``` To build a container image, you may start off from a template and use the `--image-type container` flag to specify `container` as the build image type. diff --git a/docs/source/playground/index.md b/docs/source/playground/index.md index 9691609ab..1d52de73f 100644 --- a/docs/source/playground/index.md +++ b/docs/source/playground/index.md @@ -92,6 +92,8 @@ Interactive pages for users to play with and explore Llama Stack API capabilitie ## Starting the Llama Stack Playground +### Llama CLI + To start the Llama Stack Playground, run the following commands: 1. Start up the Llama Stack API server @@ -107,3 +109,28 @@ cd llama_stack/distribution/ui pip install -r requirements.txt streamlit run app.py ``` + +### Docker + +Playground can also be started in a docker image: + +```sh +export LLAMA_STACK_URL=http://localhost:11434 + +docker run \ + -p 8501:8501 \ + -e LLAMA_STACK_ENDPOINT=$LLAMA_STACK_URL \ + quay.io/jland/llama-stack-playground +``` + +## Configurable Environment Variables + +## Environment Variables + +| Environment Variable | Description | Default Value | +|----------------------------|------------------------------------|---------------------------| +| LLAMA_STACK_ENDPOINT | The endpoint for the Llama Stack | http://localhost:8321 | +| FIREWORKS_API_KEY | API key for Fireworks provider | (empty string) | +| TOGETHER_API_KEY | API key for Together provider | (empty string) | +| SAMBANOVA_API_KEY | API key for SambaNova provider | (empty string) | +| OPENAI_API_KEY | API key for OpenAI provider | (empty string) | diff --git a/docs/source/providers/vector_io/qdrant.md b/docs/source/providers/vector_io/qdrant.md index a0de0be98..8b0cbeef8 100644 --- a/docs/source/providers/vector_io/qdrant.md +++ b/docs/source/providers/vector_io/qdrant.md @@ -3,21 +3,36 @@ orphan: true --- # Qdrant -[Qdrant](https://qdrant.tech/documentation/) is a remote vector database provider for Llama Stack. It +[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It allows you to store and query vectors directly in memory. That means you'll get fast and efficient vector retrieval. +> By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in +> memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative. +> +> \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\] + + + ## Features -- Easy to use +- Lightweight and easy to use - Fully integrated with Llama Stack +- Apache 2.0 license terms +- Store embeddings and their metadata +- Supports search by + [Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/) + and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search +- [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/) +- [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/) +- [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/) ## Usage To use Qdrant in your Llama Stack project, follow these steps: 1. Install the necessary dependencies. -2. Configure your Llama Stack project to use Faiss. +2. Configure your Llama Stack project to use Qdrant. 3. Start storing and querying vectors. ## Installation diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index d033d0b70..616371c7d 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -201,7 +201,7 @@ class Datasets(Protocol): async def get_dataset( self, dataset_id: str, - ) -> Optional[Dataset]: ... + ) -> Dataset: ... @webmethod(route="/datasets", method="GET") async def list_datasets(self) -> ListDatasetsResponse: ... diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py index e215a41c1..2cb1390dc 100644 --- a/llama_stack/apis/eval/eval.py +++ b/llama_stack/apis/eval/eval.py @@ -117,7 +117,7 @@ class Eval(Protocol): """ @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET") - async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]: + async def job_status(self, benchmark_id: str, job_id: str) -> JobStatus: """Get the status of a job. :param benchmark_id: The ID of the benchmark to run the evaluation on. diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py index f17fadc8c..65c1ead6a 100644 --- a/llama_stack/apis/files/files.py +++ b/llama_stack/apis/files/files.py @@ -115,7 +115,7 @@ class Files(Protocol): async def get_upload_session_info( self, upload_id: str, - ) -> Optional[FileUploadResponse]: + ) -> FileUploadResponse: """ Returns information about an existsing upload session diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index 64b9510ea..893ebc179 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -66,7 +66,7 @@ class Models(Protocol): async def get_model( self, model_id: str, - ) -> Optional[Model]: ... + ) -> Model: ... @webmethod(route="/models", method="POST") async def register_model( diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py index ed15c6de4..636eb7e7b 100644 --- a/llama_stack/apis/post_training/post_training.py +++ b/llama_stack/apis/post_training/post_training.py @@ -202,10 +202,10 @@ class PostTraining(Protocol): async def get_training_jobs(self) -> ListPostTrainingJobsResponse: ... @webmethod(route="/post-training/job/status", method="GET") - async def get_training_job_status(self, job_uuid: str) -> Optional[PostTrainingJobStatusResponse]: ... + async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse: ... @webmethod(route="/post-training/job/cancel", method="POST") async def cancel_training_job(self, job_uuid: str) -> None: ... @webmethod(route="/post-training/job/artifacts", method="GET") - async def get_training_job_artifacts(self, job_uuid: str) -> Optional[PostTrainingJobArtifactsResponse]: ... + async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: ... diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py index 52508d2ec..b02a7a0c4 100644 --- a/llama_stack/apis/scoring_functions/scoring_functions.py +++ b/llama_stack/apis/scoring_functions/scoring_functions.py @@ -135,7 +135,7 @@ class ScoringFunctions(Protocol): async def list_scoring_functions(self) -> ListScoringFunctionsResponse: ... @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET") - async def get_scoring_function(self, scoring_fn_id: str, /) -> Optional[ScoringFn]: ... + async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn: ... @webmethod(route="/scoring-functions", method="POST") async def register_scoring_function( diff --git a/llama_stack/apis/shields/shields.py b/llama_stack/apis/shields/shields.py index ec1179ac4..67f3bd27b 100644 --- a/llama_stack/apis/shields/shields.py +++ b/llama_stack/apis/shields/shields.py @@ -49,7 +49,7 @@ class Shields(Protocol): async def list_shields(self) -> ListShieldsResponse: ... @webmethod(route="/shields/{identifier:path}", method="GET") - async def get_shield(self, identifier: str) -> Optional[Shield]: ... + async def get_shield(self, identifier: str) -> Shield: ... @webmethod(route="/shields", method="POST") async def register_shield( diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py index 9a4aa322f..fe6c33919 100644 --- a/llama_stack/apis/vector_dbs/vector_dbs.py +++ b/llama_stack/apis/vector_dbs/vector_dbs.py @@ -50,7 +50,7 @@ class VectorDBs(Protocol): async def get_vector_db( self, vector_db_id: str, - ) -> Optional[VectorDB]: ... + ) -> VectorDB: ... @webmethod(route="/vector-dbs", method="POST") async def register_vector_db( diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index 533993421..5dea942f7 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -219,8 +219,11 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): async def list_models(self) -> ListModelsResponse: return ListModelsResponse(data=await self.get_all_with_type("model")) - async def get_model(self, model_id: str) -> Optional[Model]: - return await self.get_object_by_identifier("model", model_id) + async def get_model(self, model_id: str) -> Model: + model = await self.get_object_by_identifier("model", model_id) + if model is None: + raise ValueError(f"Model '{model_id}' not found") + return model async def register_model( self, @@ -267,8 +270,11 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): async def list_shields(self) -> ListShieldsResponse: return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value)) - async def get_shield(self, identifier: str) -> Optional[Shield]: - return await self.get_object_by_identifier("shield", identifier) + async def get_shield(self, identifier: str) -> Shield: + shield = await self.get_object_by_identifier("shield", identifier) + if shield is None: + raise ValueError(f"Shield '{identifier}' not found") + return shield async def register_shield( self, @@ -303,8 +309,11 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs): async def list_vector_dbs(self) -> ListVectorDBsResponse: return ListVectorDBsResponse(data=await self.get_all_with_type("vector_db")) - async def get_vector_db(self, vector_db_id: str) -> Optional[VectorDB]: - return await self.get_object_by_identifier("vector_db", vector_db_id) + async def get_vector_db(self, vector_db_id: str) -> VectorDB: + vector_db = await self.get_object_by_identifier("vector_db", vector_db_id) + if vector_db is None: + raise ValueError(f"Vector DB '{vector_db_id}' not found") + return vector_db async def register_vector_db( self, @@ -355,8 +364,11 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets): async def list_datasets(self) -> ListDatasetsResponse: return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value)) - async def get_dataset(self, dataset_id: str) -> Optional[Dataset]: - return await self.get_object_by_identifier("dataset", dataset_id) + async def get_dataset(self, dataset_id: str) -> Dataset: + dataset = await self.get_object_by_identifier("dataset", dataset_id) + if dataset is None: + raise ValueError(f"Dataset '{dataset_id}' not found") + return dataset async def register_dataset( self, @@ -408,8 +420,11 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions): async def list_scoring_functions(self) -> ListScoringFunctionsResponse: return ListScoringFunctionsResponse(data=await self.get_all_with_type(ResourceType.scoring_function.value)) - async def get_scoring_function(self, scoring_fn_id: str) -> Optional[ScoringFn]: - return await self.get_object_by_identifier("scoring_function", scoring_fn_id) + async def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn: + scoring_fn = await self.get_object_by_identifier("scoring_function", scoring_fn_id) + if scoring_fn is None: + raise ValueError(f"Scoring function '{scoring_fn_id}' not found") + return scoring_fn async def register_scoring_function( self, @@ -445,8 +460,11 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks): async def list_benchmarks(self) -> ListBenchmarksResponse: return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark")) - async def get_benchmark(self, benchmark_id: str) -> Optional[Benchmark]: - return await self.get_object_by_identifier("benchmark", benchmark_id) + async def get_benchmark(self, benchmark_id: str) -> Benchmark: + benchmark = await self.get_object_by_identifier("benchmark", benchmark_id) + if benchmark is None: + raise ValueError(f"Benchmark '{benchmark_id}' not found") + return benchmark async def register_benchmark( self, @@ -490,7 +508,10 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups): return ListToolGroupsResponse(data=await self.get_all_with_type("tool_group")) async def get_tool_group(self, toolgroup_id: str) -> ToolGroup: - return await self.get_object_by_identifier("tool_group", toolgroup_id) + tool_group = await self.get_object_by_identifier("tool_group", toolgroup_id) + if tool_group is None: + raise ValueError(f"Tool group '{toolgroup_id}' not found") + return tool_group async def get_tool(self, tool_name: str) -> Tool: return await self.get_object_by_identifier("tool", tool_name) diff --git a/llama_stack/distribution/ui/Containerfile b/llama_stack/distribution/ui/Containerfile new file mode 100644 index 000000000..a97f25753 --- /dev/null +++ b/llama_stack/distribution/ui/Containerfile @@ -0,0 +1,11 @@ +# More info on playground configuration can be found here: +# https://llama-stack.readthedocs.io/en/latest/playground + +FROM python:3.9-slim +WORKDIR /app +COPY . /app/ +RUN /usr/local/bin/python -m pip install --upgrade pip && \ + /usr/local/bin/pip3 install -r requirements.txt +EXPOSE 8501 + +ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"] diff --git a/llama_stack/distribution/ui/README.md b/llama_stack/distribution/ui/README.md index f3df3f07a..fe660544f 100644 --- a/llama_stack/distribution/ui/README.md +++ b/llama_stack/distribution/ui/README.md @@ -40,3 +40,13 @@ cd llama_stack/distribution/ui pip install -r requirements.txt streamlit run app.py ``` + +## Environment Variables + +| Environment Variable | Description | Default Value | +|----------------------------|------------------------------------|---------------------------| +| LLAMA_STACK_ENDPOINT | The endpoint for the Llama Stack | http://localhost:8321 | +| FIREWORKS_API_KEY | API key for Fireworks provider | (empty string) | +| TOGETHER_API_KEY | API key for Together provider | (empty string) | +| SAMBANOVA_API_KEY | API key for SambaNova provider | (empty string) | +| OPENAI_API_KEY | API key for OpenAI provider | (empty string) | diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 03692bcc7..88b6e9697 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -614,118 +614,133 @@ class ChatAgent(ShieldRunnerMixin): logger.debug(f"completion message with EOM (iter: {n_iter}): {str(message)}") input_messages = input_messages + [message] else: - logger.debug(f"completion message (iter: {n_iter}) from the model: {str(message)}") - # 1. Start the tool execution step and progress - step_id = str(uuid.uuid4()) - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepStartPayload( - step_type=StepType.tool_execution.value, - step_id=step_id, - ) - ) - ) - tool_call = message.tool_calls[0] - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepProgressPayload( - step_type=StepType.tool_execution.value, - step_id=step_id, - tool_call=tool_call, - delta=ToolCallDelta( - parse_status=ToolCallParseStatus.in_progress, - tool_call=tool_call, - ), - ) - ) - ) + input_messages = input_messages + [message] - # If tool is a client tool, yield CompletionMessage and return - if tool_call.tool_name in client_tools: - # NOTE: mark end_of_message to indicate to client that it may - # call the tool and continue the conversation with the tool's response. - message.stop_reason = StopReason.end_of_message + # Process tool calls in the message + client_tool_calls = [] + non_client_tool_calls = [] + + # Separate client and non-client tool calls + for tool_call in message.tool_calls: + if tool_call.tool_name in client_tools: + client_tool_calls.append(tool_call) + else: + non_client_tool_calls.append(tool_call) + + # Process non-client tool calls first + for tool_call in non_client_tool_calls: + step_id = str(uuid.uuid4()) + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepStartPayload( + step_type=StepType.tool_execution.value, + step_id=step_id, + ) + ) + ) + + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepProgressPayload( + step_type=StepType.tool_execution.value, + step_id=step_id, + delta=ToolCallDelta( + parse_status=ToolCallParseStatus.in_progress, + tool_call=tool_call, + ), + ) + ) + ) + + # Execute the tool call + async with tracing.span( + "tool_execution", + { + "tool_name": tool_call.tool_name, + "input": message.model_dump_json(), + }, + ) as span: + tool_execution_start_time = datetime.now(timezone.utc).isoformat() + tool_result = await self.execute_tool_call_maybe( + session_id, + tool_call, + ) + if tool_result.content is None: + raise ValueError( + f"Tool call result (id: {tool_call.call_id}, name: {tool_call.tool_name}) does not have any content" + ) + result_message = ToolResponseMessage( + call_id=tool_call.call_id, + content=tool_result.content, + ) + span.set_attribute("output", result_message.model_dump_json()) + + # Store tool execution step + tool_execution_step = ToolExecutionStep( + step_id=step_id, + turn_id=turn_id, + tool_calls=[tool_call], + tool_responses=[ + ToolResponse( + call_id=tool_call.call_id, + tool_name=tool_call.tool_name, + content=tool_result.content, + metadata=tool_result.metadata, + ) + ], + started_at=tool_execution_start_time, + completed_at=datetime.now(timezone.utc).isoformat(), + ) + + # Yield the step completion event + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepCompletePayload( + step_type=StepType.tool_execution.value, + step_id=step_id, + step_details=tool_execution_step, + ) + ) + ) + + # Add the result message to input_messages for the next iteration + input_messages.append(result_message) + + # TODO: add tool-input touchpoint and a "start" event for this step also + # but that needs a lot more refactoring of Tool code potentially + if (type(result_message.content) is str) and ( + out_attachment := _interpret_content_as_attachment(result_message.content) + ): + # NOTE: when we push this message back to the model, the model may ignore the + # attached file path etc. since the model is trained to only provide a user message + # with the summary. We keep all generated attachments and then attach them to final message + output_attachments.append(out_attachment) + + # If there are client tool calls, yield a message with only those tool calls + if client_tool_calls: await self.storage.set_in_progress_tool_call_step( session_id, turn_id, ToolExecutionStep( step_id=step_id, turn_id=turn_id, - tool_calls=[tool_call], + tool_calls=client_tool_calls, tool_responses=[], started_at=datetime.now(timezone.utc).isoformat(), ), ) - yield message + + # Create a copy of the message with only client tool calls + client_message = message.model_copy(deep=True) + client_message.tool_calls = client_tool_calls + # NOTE: mark end_of_message to indicate to client that it may + # call the tool and continue the conversation with the tool's response. + client_message.stop_reason = StopReason.end_of_message + + # Yield the message with client tool calls + yield client_message return - # If tool is a builtin server tool, execute it - tool_name = tool_call.tool_name - if isinstance(tool_name, BuiltinTool): - tool_name = tool_name.value - async with tracing.span( - "tool_execution", - { - "tool_name": tool_name, - "input": message.model_dump_json(), - }, - ) as span: - tool_execution_start_time = datetime.now(timezone.utc).isoformat() - tool_call = message.tool_calls[0] - tool_result = await self.execute_tool_call_maybe( - session_id, - tool_call, - ) - if tool_result.content is None: - raise ValueError( - f"Tool call result (id: {tool_call.call_id}, name: {tool_call.tool_name}) does not have any content" - ) - result_messages = [ - ToolResponseMessage( - call_id=tool_call.call_id, - content=tool_result.content, - ) - ] - assert len(result_messages) == 1, "Currently not supporting multiple messages" - result_message = result_messages[0] - span.set_attribute("output", result_message.model_dump_json()) - - yield AgentTurnResponseStreamChunk( - event=AgentTurnResponseEvent( - payload=AgentTurnResponseStepCompletePayload( - step_type=StepType.tool_execution.value, - step_id=step_id, - step_details=ToolExecutionStep( - step_id=step_id, - turn_id=turn_id, - tool_calls=[tool_call], - tool_responses=[ - ToolResponse( - call_id=result_message.call_id, - tool_name=tool_call.tool_name, - content=result_message.content, - metadata=tool_result.metadata, - ) - ], - started_at=tool_execution_start_time, - completed_at=datetime.now(timezone.utc).isoformat(), - ), - ) - ) - ) - - # TODO: add tool-input touchpoint and a "start" event for this step also - # but that needs a lot more refactoring of Tool code potentially - if (type(result_message.content) is str) and ( - out_attachment := _interpret_content_as_attachment(result_message.content) - ): - # NOTE: when we push this message back to the model, the model may ignore the - # attached file path etc. since the model is trained to only provide a user message - # with the summary. We keep all generated attachments and then attach them to final message - output_attachments.append(out_attachment) - - input_messages = input_messages + [message, result_message] - async def _initialize_tools( self, toolgroups_for_turn: Optional[List[AgentToolGroup]] = None, @@ -891,16 +906,14 @@ class ChatAgent(ShieldRunnerMixin): if memory_tool and code_interpreter_tool: # if both memory and code_interpreter are available, we download the URLs # and attach the data to the last message. - msg = await attachment_message(self.tempdir, url_items) - input_messages.append(msg) + await attachment_message(self.tempdir, url_items, input_messages[-1]) # Since memory is present, add all the data to the memory bank await self.add_to_session_vector_db(session_id, documents) elif code_interpreter_tool: # if only code_interpreter is available, we download the URLs to a tempdir # and attach the path to them as a message to inference with the # assumption that the model invokes the code_interpreter tool with the path - msg = await attachment_message(self.tempdir, url_items) - input_messages.append(msg) + await attachment_message(self.tempdir, url_items, input_messages[-1]) elif memory_tool: # if only memory is available, we load the data from the URLs and content items to the memory bank await self.add_to_session_vector_db(session_id, documents) @@ -967,8 +980,8 @@ async def load_data_from_urls(urls: List[URL]) -> List[str]: return data -async def attachment_message(tempdir: str, urls: List[URL]) -> ToolResponseMessage: - content = [] +async def attachment_message(tempdir: str, urls: List[URL], message: UserMessage) -> None: + contents = [] for url in urls: uri = url.uri @@ -988,16 +1001,19 @@ async def attachment_message(tempdir: str, urls: List[URL]) -> ToolResponseMessa else: raise ValueError(f"Unsupported URL {url}") - content.append( + contents.append( TextContentItem( text=f'# User provided a file accessible to you at "{filepath}"\nYou can use code_interpreter to load and inspect it.' ) ) - return ToolResponseMessage( - call_id="", - content=content, - ) + if isinstance(message.content, list): + message.content.extend(contents) + else: + if isinstance(message.content, str): + message.content = [TextContentItem(text=message.content)] + contents + else: + message.content = [message.content] + contents def _interpret_content_as_attachment( diff --git a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py index af0987fa8..e514e3781 100644 --- a/llama_stack/providers/inline/safety/llama_guard/llama_guard.py +++ b/llama_stack/providers/inline/safety/llama_guard/llama_guard.py @@ -227,13 +227,6 @@ class LlamaGuardShield: if len(messages) >= 2 and (messages[0].role == Role.user.value and messages[1].role == Role.user.value): messages = messages[1:] - for i in range(1, len(messages)): - if messages[i].role == messages[i - 1].role: - for i, m in enumerate(messages): - print(f"{i}: {m.role}: {m.content}") - raise ValueError( - f"Messages must alternate between user and assistant. Message {i} has the same role as message {i - 1}" - ) return messages async def run(self, messages: List[Message]) -> RunShieldResponse: diff --git a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py b/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py index 4b97914c5..9610b9b46 100644 --- a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py +++ b/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py @@ -5,6 +5,7 @@ # the root directory of this source tree. +import asyncio import logging import os import tempfile @@ -37,7 +38,7 @@ class CodeInterpreterToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime): async def initialize(self): pass - async def register_tool(self, tool: Tool): + async def register_tool(self, tool: Tool) -> None: pass async def unregister_tool(self, tool_id: str) -> None: @@ -65,7 +66,7 @@ class CodeInterpreterToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime): # Use environment variable to control bwrap usage force_disable_bwrap = os.environ.get("DISABLE_CODE_SANDBOX", "").lower() in ("1", "true", "yes") req = CodeExecutionRequest(scripts=[script], use_bwrap=not force_disable_bwrap) - res = self.code_executor.execute(req) + res = await asyncio.to_thread(self.code_executor.execute, req) pieces = [res["process_status"]] for out_type in ["stdout", "stderr"]: res_out = res[out_type] diff --git a/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/llama_stack/providers/inline/vector_io/qdrant/__init__.py new file mode 100644 index 000000000..8f0b91c61 --- /dev/null +++ b/llama_stack/providers/inline/vector_io/qdrant/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Dict + +from llama_stack.providers.datatypes import Api, ProviderSpec + +from .config import QdrantVectorIOConfig + + +async def get_adapter_impl(config: QdrantVectorIOConfig, deps: Dict[Api, ProviderSpec]): + from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter + + impl = QdrantVectorIOAdapter(config, deps[Api.inference]) + await impl.initialize() + return impl diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py new file mode 100644 index 000000000..282e951b0 --- /dev/null +++ b/llama_stack/providers/inline/vector_io/qdrant/config.py @@ -0,0 +1,23 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from typing import Any, Dict + +from pydantic import BaseModel + +from llama_stack.schema_utils import json_schema_type + + +@json_schema_type +class QdrantVectorIOConfig(BaseModel): + path: str + + @classmethod + def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]: + return { + "path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", + } diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py index fbc495d83..93031763d 100644 --- a/llama_stack/providers/registry/vector_io.py +++ b/llama_stack/providers/registry/vector_io.py @@ -92,6 +92,14 @@ def available_providers() -> List[ProviderSpec]: ), api_dependencies=[Api.inference], ), + InlineProviderSpec( + api=Api.vector_io, + provider_type="inline::qdrant", + pip_packages=["qdrant-client"], + module="llama_stack.providers.inline.vector_io.qdrant", + config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig", + api_dependencies=[Api.inference], + ), remote_provider_spec( Api.vector_io, AdapterSpec( diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index b59da79eb..69e6335c6 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -6,6 +6,7 @@ import logging import warnings +from functools import lru_cache from typing import AsyncIterator, List, Optional, Union from openai import APIConnectionError, AsyncOpenAI, BadRequestError @@ -82,12 +83,42 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): # ) self._config = config - # make sure the client lives longer than any async calls - self._client = AsyncOpenAI( - base_url=f"{self._config.url}/v1", - api_key=(self._config.api_key.get_secret_value() if self._config.api_key else "NO KEY"), - timeout=self._config.timeout, - ) + + @lru_cache # noqa: B019 + def _get_client(self, provider_model_id: str) -> AsyncOpenAI: + """ + For hosted models, https://integrate.api.nvidia.com/v1 is the primary base_url. However, + some models are hosted on different URLs. This function returns the appropriate client + for the given provider_model_id. + + This relies on lru_cache and self._default_client to avoid creating a new client for each request + or for each model that is hosted on https://integrate.api.nvidia.com/v1. + + :param provider_model_id: The provider model ID + :return: An OpenAI client + """ + + @lru_cache # noqa: B019 + def _get_client_for_base_url(base_url: str) -> AsyncOpenAI: + """ + Maintain a single OpenAI client per base_url. + """ + return AsyncOpenAI( + base_url=base_url, + api_key=(self._config.api_key.get_secret_value() if self._config.api_key else "NO KEY"), + timeout=self._config.timeout, + ) + + special_model_urls = { + "meta/llama-3.2-11b-vision-instruct": "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-11b-vision-instruct", + "meta/llama-3.2-90b-vision-instruct": "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct", + } + + base_url = f"{self._config.url}/v1" + if _is_nvidia_hosted(self._config) and provider_model_id in special_model_urls: + base_url = special_model_urls[provider_model_id] + + return _get_client_for_base_url(base_url) async def completion( self, @@ -105,9 +136,10 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): await check_health(self._config) # this raises errors + provider_model_id = self.get_provider_model_id(model_id) request = convert_completion_request( request=CompletionRequest( - model=self.get_provider_model_id(model_id), + model=provider_model_id, content=content, sampling_params=sampling_params, response_format=response_format, @@ -118,7 +150,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): ) try: - response = await self._client.completions.create(**request) + response = await self._get_client(provider_model_id).completions.create(**request) except APIConnectionError as e: raise ConnectionError(f"Failed to connect to NVIDIA NIM at {self._config.url}: {e}") from e @@ -206,6 +238,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): await check_health(self._config) # this raises errors + provider_model_id = self.get_provider_model_id(model_id) request = await convert_chat_completion_request( request=ChatCompletionRequest( model=self.get_provider_model_id(model_id), @@ -221,7 +254,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): ) try: - response = await self._client.chat.completions.create(**request) + response = await self._get_client(provider_model_id).chat.completions.create(**request) except APIConnectionError as e: raise ConnectionError(f"Failed to connect to NVIDIA NIM at {self._config.url}: {e}") from e diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py index c75cc8926..762cffde3 100644 --- a/llama_stack/providers/remote/inference/vllm/config.py +++ b/llama_stack/providers/remote/inference/vllm/config.py @@ -25,6 +25,10 @@ class VLLMInferenceAdapterConfig(BaseModel): default="fake", description="The API token", ) + tls_verify: bool = Field( + default=True, + description="Whether to verify TLS certificates", + ) @classmethod def sample_run_config( @@ -36,4 +40,5 @@ class VLLMInferenceAdapterConfig(BaseModel): "url": url, "max_tokens": "${env.VLLM_MAX_TOKENS:4096}", "api_token": "${env.VLLM_API_TOKEN:fake}", + "tls_verify": "${env.VLLM_TLS_VERIFY:true}", } diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 4d7e66d78..f940de7ba 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -7,6 +7,7 @@ import json import logging from typing import AsyncGenerator, List, Optional, Union +import httpx from openai import AsyncOpenAI from openai.types.chat.chat_completion_chunk import ( ChatCompletionChunk as OpenAIChatCompletionChunk, @@ -229,7 +230,11 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): async def initialize(self) -> None: log.info(f"Initializing VLLM client with base_url={self.config.url}") - self.client = AsyncOpenAI(base_url=self.config.url, api_key=self.config.api_token) + self.client = AsyncOpenAI( + base_url=self.config.url, + api_key=self.config.api_token, + http_client=None if self.config.tls_verify else httpx.AsyncClient(verify=False), + ) async def shutdown(self) -> None: pass diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py index ce68aa492..6d7eebe23 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/config.py +++ b/llama_stack/providers/remote/vector_io/qdrant/config.py @@ -23,7 +23,6 @@ class QdrantVectorIOConfig(BaseModel): prefix: Optional[str] = None timeout: Optional[int] = None host: Optional[str] = None - path: Optional[str] = None @classmethod def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]: diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 586b8ca95..9e7788dc0 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -6,7 +6,7 @@ import logging import uuid -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from numpy.typing import NDArray from qdrant_client import AsyncQdrantClient, models @@ -16,12 +16,13 @@ from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate +from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig from llama_stack.providers.utils.memory.vector_store import ( EmbeddingIndex, VectorDBWithIndex, ) -from .config import QdrantVectorIOConfig +from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig log = logging.getLogger(__name__) CHUNK_ID_KEY = "_chunk_id" @@ -99,17 +100,19 @@ class QdrantIndex(EmbeddingIndex): class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate): - def __init__(self, config: QdrantVectorIOConfig, inference_api: Api.inference) -> None: + def __init__( + self, config: Union[RemoteQdrantVectorIOConfig, InlineQdrantVectorIOConfig], inference_api: Api.inference + ) -> None: self.config = config - self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True)) + self.client: AsyncQdrantClient = None self.cache = {} self.inference_api = inference_api async def initialize(self) -> None: - pass + self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True)) async def shutdown(self) -> None: - self.client.close() + await self.client.close() async def register_vector_db( self, @@ -123,6 +126,11 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate): self.cache[vector_db.identifier] = index + async def unregister_vector_db(self, vector_db_id: str) -> None: + if vector_db_id in self.cache: + await self.cache[vector_db_id].index.delete() + del self.cache[vector_db_id] + async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> Optional[VectorDBWithIndex]: if vector_db_id in self.cache: return self.cache[vector_db_id] diff --git a/llama_stack/providers/utils/inference/litellm_openai_mixin.py b/llama_stack/providers/utils/inference/litellm_openai_mixin.py index d88dc5a9e..f99883990 100644 --- a/llama_stack/providers/utils/inference/litellm_openai_mixin.py +++ b/llama_stack/providers/utils/inference/litellm_openai_mixin.py @@ -192,7 +192,11 @@ class LiteLLMOpenAIMixin( if request.tools: input_dict["tools"] = [convert_tooldef_to_openai_tool(tool) for tool in request.tools] if request.tool_config.tool_choice: - input_dict["tool_choice"] = request.tool_config.tool_choice.value + input_dict["tool_choice"] = ( + request.tool_config.tool_choice.value + if isinstance(request.tool_config.tool_choice, ToolChoice) + else request.tool_config.tool_choice + ) provider_data = self.get_request_provider_data() key_field = self.provider_data_api_key_field diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index ac37171c9..2a362f8cb 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -527,26 +527,30 @@ async def convert_message_to_openai_dict_new( async def _convert_message_content( content: InterleavedContent, ) -> Union[str, Iterable[OpenAIChatCompletionContentPartParam]]: - async def impl(): + async def impl( + content_: InterleavedContent, + ) -> Union[str, OpenAIChatCompletionContentPartParam, List[OpenAIChatCompletionContentPartParam]]: # Llama Stack and OpenAI spec match for str and text input - if isinstance(content, str): - return content - elif isinstance(content, TextContentItem): + if isinstance(content_, str): + return content_ + elif isinstance(content_, TextContentItem): return OpenAIChatCompletionContentPartTextParam( type="text", - text=content.text, + text=content_.text, ) - elif isinstance(content, ImageContentItem): + elif isinstance(content_, ImageContentItem): return OpenAIChatCompletionContentPartImageParam( type="image_url", - image_url=OpenAIImageURL(url=await convert_image_content_to_url(content)), + image_url=OpenAIImageURL(url=await convert_image_content_to_url(content_)), ) - elif isinstance(content, list): - return [await _convert_message_content(item) for item in content] + elif isinstance(content_, list): + return [await impl(item) for item in content_] else: - raise ValueError(f"Unsupported content type: {type(content)}") + raise ValueError(f"Unsupported content type: {type(content_)}") - ret = await impl() + ret = await impl(content) + + # OpenAI*Message expects a str or list if isinstance(ret, str) or isinstance(ret, list): return ret else: @@ -566,13 +570,14 @@ async def convert_message_to_openai_dict_new( OpenAIChatCompletionMessageToolCall( id=tool.call_id, function=OpenAIFunction( - name=tool.tool_name, + name=tool.tool_name if not isinstance(tool.tool_name, BuiltinTool) else tool.tool_name.value, arguments=json.dumps(tool.arguments), ), type="function", ) for tool in message.tool_calls - ], + ] + or None, ) elif isinstance(message, ToolResponseMessage): out = OpenAIChatCompletionToolMessage( @@ -858,7 +863,8 @@ async def convert_openai_chat_completion_stream( event_type = ChatCompletionResponseEventType.progress stop_reason = None - toolcall_buffer = {} + tool_call_idx_to_buffer = {} + async for chunk in stream: choice = chunk.choices[0] # assuming only one choice per chunk @@ -868,7 +874,6 @@ async def convert_openai_chat_completion_stream( # if there's a tool call, emit an event for each tool in the list # if tool call and content, emit both separately - if choice.delta.tool_calls: # the call may have content and a tool call. ChatCompletionResponseEvent # does not support both, so we emit the content first @@ -889,44 +894,53 @@ async def convert_openai_chat_completion_stream( ) if not enable_incremental_tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=next(event_type), - delta=ToolCallDelta( - tool_call=_convert_openai_tool_calls(choice.delta.tool_calls)[0], - parse_status=ToolCallParseStatus.succeeded, - ), - logprobs=_convert_openai_logprobs(logprobs), + for tool_call in choice.delta.tool_calls: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=event_type, + delta=ToolCallDelta( + tool_call=_convert_openai_tool_calls([tool_call])[0], + parse_status=ToolCallParseStatus.succeeded, + ), + logprobs=_convert_openai_logprobs(logprobs), + ) ) - ) else: - tool_call = choice.delta.tool_calls[0] - if "name" not in toolcall_buffer: - toolcall_buffer["call_id"] = tool_call.id - toolcall_buffer["name"] = None - toolcall_buffer["content"] = "" - if "arguments" not in toolcall_buffer: - toolcall_buffer["arguments"] = "" + for tool_call in choice.delta.tool_calls: + idx = tool_call.index if hasattr(tool_call, "index") else 0 - if tool_call.function.name: - toolcall_buffer["name"] = tool_call.function.name - delta = f"{toolcall_buffer['name']}(" - if tool_call.function.arguments: - toolcall_buffer["arguments"] += tool_call.function.arguments - delta = toolcall_buffer["arguments"] + if idx not in tool_call_idx_to_buffer: + tool_call_idx_to_buffer[idx] = { + "call_id": tool_call.id, + "name": None, + "arguments": "", + "content": "", + } - toolcall_buffer["content"] += delta - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=event_type, - delta=ToolCallDelta( - tool_call=delta, - parse_status=ToolCallParseStatus.in_progress, - ), - logprobs=_convert_openai_logprobs(logprobs), - ) - ) - else: + buffer = tool_call_idx_to_buffer[idx] + + if tool_call.function: + if tool_call.function.name: + buffer["name"] = tool_call.function.name + delta = f"{buffer['name']}(" + buffer["content"] += delta + + if tool_call.function.arguments: + delta = tool_call.function.arguments + buffer["arguments"] += delta + buffer["content"] += delta + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=event_type, + delta=ToolCallDelta( + tool_call=delta, + parse_status=ToolCallParseStatus.in_progress, + ), + logprobs=_convert_openai_logprobs(logprobs), + ) + ) + elif choice.delta.content: yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( event_type=event_type, @@ -935,47 +949,51 @@ async def convert_openai_chat_completion_stream( ) ) - if toolcall_buffer: - delta = ")" - toolcall_buffer["content"] += delta - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=event_type, - delta=ToolCallDelta( - tool_call=delta, - parse_status=ToolCallParseStatus.in_progress, - ), - logprobs=_convert_openai_logprobs(logprobs), - ) - ) - try: - arguments = json.loads(toolcall_buffer["arguments"]) - tool_call = ToolCall( - call_id=toolcall_buffer["call_id"], - tool_name=toolcall_buffer["name"], - arguments=arguments, - ) + for idx, buffer in tool_call_idx_to_buffer.items(): + logger.debug(f"toolcall_buffer[{idx}]: {buffer}") + if buffer["name"]: + delta = ")" + buffer["content"] += delta yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, + event_type=event_type, delta=ToolCallDelta( - tool_call=tool_call, - parse_status=ToolCallParseStatus.succeeded, + tool_call=delta, + parse_status=ToolCallParseStatus.in_progress, ), - stop_reason=stop_reason, + logprobs=None, ) ) - except json.JSONDecodeError: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta=ToolCallDelta( - tool_call=toolcall_buffer["content"], - parse_status=ToolCallParseStatus.failed, - ), - stop_reason=stop_reason, + + try: + arguments = json.loads(buffer["arguments"]) + tool_call = ToolCall( + call_id=buffer["call_id"], + tool_name=buffer["name"], + arguments=arguments, + ) + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + tool_call=tool_call, + parse_status=ToolCallParseStatus.succeeded, + ), + stop_reason=stop_reason, + ) + ) + except json.JSONDecodeError as e: + print(f"Failed to parse arguments: {e}") + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + tool_call=buffer["content"], + parse_status=ToolCallParseStatus.failed, + ), + stop_reason=stop_reason, + ) ) - ) yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 9741f5302..3830ffcdb 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -18,12 +18,14 @@ providers: url: ${env.VLLM_URL} max_tokens: ${env.VLLM_MAX_TOKENS:4096} api_token: ${env.VLLM_API_TOKEN:fake} + tls_verify: ${env.VLLM_TLS_VERIFY:true} - provider_id: vllm-safety provider_type: remote::vllm config: url: ${env.SAFETY_VLLM_URL} max_tokens: ${env.VLLM_MAX_TOKENS:4096} api_token: ${env.VLLM_API_TOKEN:fake} + tls_verify: ${env.VLLM_TLS_VERIFY:true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index e26b20e88..b6bba1252 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -18,6 +18,7 @@ providers: url: ${env.VLLM_URL} max_tokens: ${env.VLLM_MAX_TOKENS:4096} api_token: ${env.VLLM_API_TOKEN:fake} + tls_verify: ${env.VLLM_TLS_VERIFY:true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} diff --git a/pyproject.toml b/pyproject.toml index a006d69f9..f57b91462 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ dev = [ "ruamel.yaml", # needed for openapi generator ] # These are the dependencies required for running unit tests. -unit = ["sqlite-vec", "openai", "aiosqlite", "pypdf", "chardet"] +unit = ["sqlite-vec", "openai", "aiosqlite", "pypdf", "chardet", "qdrant-client"] # These are the core dependencies required for running integration tests. They are shared across all # providers. If a provider requires additional dependencies, please add them to your environment # separately. If you are using "uv" to execute your tests, you can use the "--with" flag to specify extra @@ -114,7 +114,6 @@ exclude = [ "./.git", "./docs/*", "./build", - "./scripts", "./venv", "*.pyi", ".pre-commit-config.yaml", @@ -248,6 +247,7 @@ exclude = [ "^llama_stack/providers/inline/vector_io/chroma/", "^llama_stack/providers/inline/vector_io/faiss/", "^llama_stack/providers/inline/vector_io/milvus/", + "^llama_stack/providers/inline/vector_io/qdrant/", "^llama_stack/providers/inline/vector_io/sqlite_vec/", "^llama_stack/providers/remote/agents/sample/", "^llama_stack/providers/remote/datasetio/huggingface/", diff --git a/scripts/gen-changelog.py b/scripts/gen-changelog.py index ac4053339..3df2af06b 100755 --- a/scripts/gen-changelog.py +++ b/scripts/gen-changelog.py @@ -11,7 +11,7 @@ import requests def get_all_releases(token): - url = f"https://api.github.com/repos/meta-llama/llama-stack/releases" + url = "https://api.github.com/repos/meta-llama/llama-stack/releases" headers = {"Accept": "application/vnd.github.v3+json"} if token: @@ -22,9 +22,7 @@ def get_all_releases(token): if response.status_code == 200: return response.json() else: - raise Exception( - f"Error fetching releases: {response.status_code}, {response.text}" - ) + raise Exception(f"Error fetching releases: {response.status_code}, {response.text}") def clean_release_body(body): @@ -55,7 +53,7 @@ def merge_release_notes(output_file, token=None): releases = get_all_releases(token) with open(output_file, "w", encoding="utf-8") as md_file: - md_file.write(f"# Changelog\n\n") + md_file.write("# Changelog\n\n") for release in releases: md_file.write(f"# {release['tag_name']}\n") diff --git a/tests/integration/agents/test_agents.py b/tests/integration/agents/test_agents.py index 61249ad17..581cc9f45 100644 --- a/tests/integration/agents/test_agents.py +++ b/tests/integration/agents/test_agents.py @@ -271,7 +271,7 @@ def test_custom_tool(llama_stack_client_with_mocked_inference, agent_config): client_tool = get_boiling_point agent_config = { **agent_config, - "tools": ["builtin::websearch", client_tool], + "tools": [client_tool], } agent = Agent(llama_stack_client_with_mocked_inference, **agent_config) @@ -320,42 +320,55 @@ def test_custom_tool_infinite_loop(llama_stack_client_with_mocked_inference, age assert num_tool_calls <= 5 -def test_tool_choice(llama_stack_client_with_mocked_inference, agent_config): - def run_agent(tool_choice): - client_tool = get_boiling_point - - test_agent_config = { - **agent_config, - "tool_config": {"tool_choice": tool_choice}, - "tools": [client_tool], - } - - agent = Agent(llama_stack_client_with_mocked_inference, **test_agent_config) - session_id = agent.create_session(f"test-session-{uuid4()}") - - response = agent.create_turn( - messages=[ - { - "role": "user", - "content": "What is the boiling point of polyjuice?", - }, - ], - session_id=session_id, - stream=False, - ) - - return [step for step in response.steps if step.step_type == "tool_execution"] - - tool_execution_steps = run_agent("required") +def test_tool_choice_required(llama_stack_client_with_mocked_inference, agent_config): + tool_execution_steps = run_agent_with_tool_choice( + llama_stack_client_with_mocked_inference, agent_config, "required" + ) assert len(tool_execution_steps) > 0 - tool_execution_steps = run_agent("none") + +def test_tool_choice_none(llama_stack_client_with_mocked_inference, agent_config): + tool_execution_steps = run_agent_with_tool_choice(llama_stack_client_with_mocked_inference, agent_config, "none") assert len(tool_execution_steps) == 0 - tool_execution_steps = run_agent("get_boiling_point") + +def test_tool_choice_get_boiling_point(llama_stack_client_with_mocked_inference, agent_config): + if "llama" not in agent_config["model"].lower(): + pytest.xfail("NotImplemented for non-llama models") + + tool_execution_steps = run_agent_with_tool_choice( + llama_stack_client_with_mocked_inference, agent_config, "get_boiling_point" + ) assert len(tool_execution_steps) >= 1 and tool_execution_steps[0].tool_calls[0].tool_name == "get_boiling_point" +def run_agent_with_tool_choice(client, agent_config, tool_choice): + client_tool = get_boiling_point + + test_agent_config = { + **agent_config, + "tool_config": {"tool_choice": tool_choice}, + "tools": [client_tool], + "max_infer_iters": 2, + } + + agent = Agent(client, **test_agent_config) + session_id = agent.create_session(f"test-session-{uuid4()}") + + response = agent.create_turn( + messages=[ + { + "role": "user", + "content": "What is the boiling point of polyjuice?", + }, + ], + session_id=session_id, + stream=False, + ) + + return [step for step in response.steps if step.step_type == "tool_execution"] + + @pytest.mark.parametrize("rag_tool_name", ["builtin::rag/knowledge_search", "builtin::rag"]) def test_rag_agent(llama_stack_client_with_mocked_inference, agent_config, rag_tool_name): urls = ["chat.rst", "llama3.rst", "memory_optimizations.rst", "lora_finetune.rst"] @@ -571,7 +584,7 @@ def test_rag_and_code_agent(llama_stack_client_with_mocked_inference, agent_conf [(get_boiling_point, False), (get_boiling_point_with_metadata, True)], ) def test_create_turn_response(llama_stack_client_with_mocked_inference, agent_config, client_tools): - client_tool, expectes_metadata = client_tools + client_tool, expects_metadata = client_tools agent_config = { **agent_config, "input_shields": [], @@ -597,7 +610,7 @@ def test_create_turn_response(llama_stack_client_with_mocked_inference, agent_co assert steps[0].step_type == "inference" assert steps[1].step_type == "tool_execution" assert steps[1].tool_calls[0].tool_name.startswith("get_boiling_point") - if expectes_metadata: + if expects_metadata: assert steps[1].tool_responses[0].metadata["source"] == "https://www.google.com" assert steps[2].step_type == "inference" @@ -609,3 +622,44 @@ def test_create_turn_response(llama_stack_client_with_mocked_inference, agent_co assert last_step_completed_at < step.started_at assert step.started_at < step.completed_at last_step_completed_at = step.completed_at + + +def test_multi_tool_calls(llama_stack_client_with_mocked_inference, agent_config): + if "gpt" not in agent_config["model"]: + pytest.xfail("Only tested on GPT models") + + agent_config = { + **agent_config, + "tools": [get_boiling_point], + } + + agent = Agent(llama_stack_client_with_mocked_inference, **agent_config) + session_id = agent.create_session(f"test-session-{uuid4()}") + + response = agent.create_turn( + messages=[ + { + "role": "user", + "content": "Call get_boiling_point twice to answer: What is the boiling point of polyjuice in both celsius and fahrenheit?", + }, + ], + session_id=session_id, + stream=False, + ) + steps = response.steps + assert len(steps) == 7 + assert steps[0].step_type == "shield_call" + assert steps[1].step_type == "inference" + assert steps[2].step_type == "shield_call" + assert steps[3].step_type == "tool_execution" + assert steps[4].step_type == "shield_call" + assert steps[5].step_type == "inference" + assert steps[6].step_type == "shield_call" + + tool_execution_step = steps[3] + assert len(tool_execution_step.tool_calls) == 2 + assert tool_execution_step.tool_calls[0].tool_name.startswith("get_boiling_point") + assert tool_execution_step.tool_calls[1].tool_name.startswith("get_boiling_point") + + output = response.output_message.content.lower() + assert "-100" in output and "-212" in output diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py index 3afe1389e..cb0997e1a 100644 --- a/tests/unit/providers/inference/test_remote_vllm.py +++ b/tests/unit/providers/inference/test_remote_vllm.py @@ -187,8 +187,8 @@ def test_chat_completion_doesnt_block_event_loop(caplog): loop.set_debug(True) caplog.set_level(logging.WARNING) - # Log when event loop is blocked for more than 100ms - loop.slow_callback_duration = 0.1 + # Log when event loop is blocked for more than 200ms + loop.slow_callback_duration = 0.2 # Sleep for 500ms in our delayed http response sleep_time = 0.5 diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py new file mode 100644 index 000000000..3bcd0613f --- /dev/null +++ b/tests/unit/providers/vector_io/conftest.py @@ -0,0 +1,42 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import random + +import numpy as np +import pytest + +from llama_stack.apis.vector_io import Chunk + +EMBEDDING_DIMENSION = 384 + + +@pytest.fixture +def vector_db_id() -> str: + return f"test-vector-db-{random.randint(1, 100)}" + + +@pytest.fixture(scope="session") +def embedding_dimension() -> int: + return EMBEDDING_DIMENSION + + +@pytest.fixture(scope="session") +def sample_chunks(): + """Generates chunks that force multiple batches for a single document to expose ID conflicts.""" + n, k = 10, 3 + sample = [ + Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"}) + for j in range(k) + for i in range(n) + ] + return sample + + +@pytest.fixture(scope="session") +def sample_embeddings(sample_chunks): + np.random.seed(42) + return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks]) diff --git a/tests/unit/providers/vector_io/test_qdrant.py b/tests/unit/providers/vector_io/test_qdrant.py new file mode 100644 index 000000000..bc97719c0 --- /dev/null +++ b/tests/unit/providers/vector_io/test_qdrant.py @@ -0,0 +1,135 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import asyncio +import os +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +import pytest_asyncio + +from llama_stack.apis.inference import EmbeddingsResponse, Inference +from llama_stack.apis.vector_io import ( + QueryChunksResponse, + VectorDB, + VectorDBStore, +) +from llama_stack.providers.inline.vector_io.qdrant.config import ( + QdrantVectorIOConfig as InlineQdrantVectorIOConfig, +) +from llama_stack.providers.remote.vector_io.qdrant.qdrant import ( + QdrantVectorIOAdapter, +) + +# This test is a unit test for the QdrantVectorIOAdapter class. This should only contain +# tests which are specific to this class. More general (API-level) tests should be placed in +# tests/integration/vector_io/ +# +# How to run this test: +# +# pytest tests/unit/providers/vector_io/test_qdrant.py \ +# -v -s --tb=short --disable-warnings --asyncio-mode=auto + + +@pytest.fixture +def qdrant_config(tmp_path) -> InlineQdrantVectorIOConfig: + return InlineQdrantVectorIOConfig(path=os.path.join(tmp_path, "qdrant.db")) + + +@pytest.fixture(scope="session") +def loop(): + return asyncio.new_event_loop() + + +@pytest.fixture +def mock_vector_db(vector_db_id) -> MagicMock: + mock_vector_db = MagicMock(spec=VectorDB) + mock_vector_db.embedding_model = "embedding_model" + mock_vector_db.identifier = vector_db_id + return mock_vector_db + + +@pytest.fixture +def mock_vector_db_store(mock_vector_db) -> MagicMock: + mock_store = MagicMock(spec=VectorDBStore) + mock_store.get_vector_db = AsyncMock(return_value=mock_vector_db) + return mock_store + + +@pytest.fixture +def mock_api_service(sample_embeddings): + mock_api_service = MagicMock(spec=Inference) + mock_api_service.embeddings = AsyncMock(return_value=EmbeddingsResponse(embeddings=sample_embeddings)) + return mock_api_service + + +@pytest_asyncio.fixture +async def qdrant_adapter(qdrant_config, mock_vector_db_store, mock_api_service, loop) -> QdrantVectorIOAdapter: + adapter = QdrantVectorIOAdapter(config=qdrant_config, inference_api=mock_api_service) + adapter.vector_db_store = mock_vector_db_store + await adapter.initialize() + yield adapter + await adapter.shutdown() + + +__QUERY = "Sample query" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 30)]) +async def test_qdrant_adapter_returns_expected_chunks( + qdrant_adapter: QdrantVectorIOAdapter, + vector_db_id, + sample_chunks, + sample_embeddings, + max_query_chunks, + expected_chunks, +) -> None: + assert qdrant_adapter is not None + await qdrant_adapter.insert_chunks(vector_db_id, sample_chunks) + + index = await qdrant_adapter._get_and_cache_vector_db_index(vector_db_id=vector_db_id) + assert index is not None + + response = await qdrant_adapter.query_chunks( + query=__QUERY, + vector_db_id=vector_db_id, + params={"max_chunks": max_query_chunks}, + ) + assert isinstance(response, QueryChunksResponse) + assert len(response.chunks) == expected_chunks + + +# To by-pass attempt to convert a Mock to JSON +def _prepare_for_json(value: Any) -> str: + return str(value) + + +@patch("llama_stack.providers.utils.telemetry.trace_protocol._prepare_for_json", new=_prepare_for_json) +@pytest.mark.asyncio +async def test_qdrant_register_and_unregister_vector_db( + qdrant_adapter: QdrantVectorIOAdapter, + mock_vector_db, + sample_chunks, +) -> None: + # Initially, no collections + vector_db_id = mock_vector_db.identifier + assert len((await qdrant_adapter.client.get_collections()).collections) == 0 + + # Register does not create a collection + assert not (await qdrant_adapter.client.collection_exists(vector_db_id)) + await qdrant_adapter.register_vector_db(mock_vector_db) + assert not (await qdrant_adapter.client.collection_exists(vector_db_id)) + + # First insert creates the collection + await qdrant_adapter.insert_chunks(vector_db_id, sample_chunks) + assert await qdrant_adapter.client.collection_exists(vector_db_id) + + # Unregister deletes the collection + await qdrant_adapter.unregister_vector_db(vector_db_id) + assert not (await qdrant_adapter.client.collection_exists(vector_db_id)) + assert len((await qdrant_adapter.client.get_collections()).collections) == 0 diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py index eb5660a85..cff988c53 100644 --- a/tests/unit/providers/vector_io/test_sqlite_vec.py +++ b/tests/unit/providers/vector_io/test_sqlite_vec.py @@ -29,8 +29,6 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import ( # -v -s --tb=short --disable-warnings --asyncio-mode=auto SQLITE_VEC_PROVIDER = "sqlite_vec" -EMBEDDING_DIMENSION = 384 -EMBEDDING_MODEL = "all-MiniLM-L6-v2" @pytest.fixture(scope="session") @@ -50,26 +48,8 @@ def sqlite_connection(loop): @pytest_asyncio.fixture(scope="session", autouse=True) -async def sqlite_vec_index(sqlite_connection): - return await SQLiteVecIndex.create(dimension=EMBEDDING_DIMENSION, connection=sqlite_connection, bank_id="test_bank") - - -@pytest.fixture(scope="session") -def sample_chunks(): - """Generates chunks that force multiple batches for a single document to expose ID conflicts.""" - n, k = 10, 3 - sample = [ - Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"}) - for j in range(k) - for i in range(n) - ] - return sample - - -@pytest.fixture(scope="session") -def sample_embeddings(sample_chunks): - np.random.seed(42) - return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks]) +async def sqlite_vec_index(sqlite_connection, embedding_dimension): + return await SQLiteVecIndex.create(dimension=embedding_dimension, connection=sqlite_connection, bank_id="test_bank") @pytest.mark.asyncio @@ -82,21 +62,21 @@ async def test_add_chunks(sqlite_vec_index, sample_chunks, sample_embeddings): @pytest.mark.asyncio -async def test_query_chunks(sqlite_vec_index, sample_chunks, sample_embeddings): +async def test_query_chunks(sqlite_vec_index, sample_chunks, sample_embeddings, embedding_dimension): await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) - query_embedding = np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) + query_embedding = np.random.rand(embedding_dimension).astype(np.float32) response = await sqlite_vec_index.query(query_embedding, k=2, score_threshold=0.0) assert isinstance(response, QueryChunksResponse) assert len(response.chunks) == 2 @pytest.mark.asyncio -async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks): +async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dimension): """Test that chunk IDs do not conflict across batches when inserting chunks.""" # Reduce batch size to force multiple batches for same document # since there are 10 chunks per document and batch size is 2 batch_size = 2 - sample_embeddings = np.random.rand(len(sample_chunks), EMBEDDING_DIMENSION).astype(np.float32) + sample_embeddings = np.random.rand(len(sample_chunks), embedding_dimension).astype(np.float32) await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings, batch_size=batch_size) diff --git a/uv.lock b/uv.lock index 860b29241..b63d23b14 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,4 @@ version = 1 -revision = 1 requires-python = ">=3.10" resolution-markers = [ "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", @@ -8,9 +7,12 @@ resolution-markers = [ "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version == '3.11.*' and sys_platform == 'darwin'", - "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux')", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version >= '3.12' and sys_platform == 'darwin'", + "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')", + "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "python_full_version >= '3.13' and sys_platform == 'darwin'", + "python_full_version == '3.12.*' and sys_platform == 'darwin'", ] [[package]] @@ -793,6 +795,107 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/89/30/2bd0eb03a7dee7727cd2ec643d1e992979e62d5e7443507381cce0455132/googleapis_common_protos-1.67.0-py2.py3-none-any.whl", hash = "sha256:579de760800d13616f51cf8be00c876f00a9f146d3e6510e19d1f4111758b741", size = 164985 }, ] +[[package]] +name = "grpcio" +version = "1.71.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/95/aa11fc09a85d91fbc7dd405dcb2a1e0256989d67bf89fa65ae24b3ba105a/grpcio-1.71.0.tar.gz", hash = "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c", size = 12549828 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/c5/ef610b3f988cc0cc67b765f72b8e2db06a1db14e65acb5ae7810a6b7042e/grpcio-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:c200cb6f2393468142eb50ab19613229dcc7829b5ccee8b658a36005f6669fdd", size = 5210643 }, + { url = "https://files.pythonhosted.org/packages/bf/de/c84293c961622df302c0d5d07ec6e2d4cd3874ea42f602be2df09c4ad44f/grpcio-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b2266862c5ad664a380fbbcdbdb8289d71464c42a8c29053820ee78ba0119e5d", size = 11308962 }, + { url = "https://files.pythonhosted.org/packages/7c/38/04c9e0dc8c904570c80faa1f1349b190b63e45d6b2782ec8567b050efa9d/grpcio-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:0ab8b2864396663a5b0b0d6d79495657ae85fa37dcb6498a2669d067c65c11ea", size = 5699236 }, + { url = "https://files.pythonhosted.org/packages/95/96/e7be331d1298fa605ea7c9ceafc931490edd3d5b33c4f695f1a0667f3491/grpcio-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c30f393f9d5ff00a71bb56de4aa75b8fe91b161aeb61d39528db6b768d7eac69", size = 6339767 }, + { url = "https://files.pythonhosted.org/packages/5d/b7/7e7b7bb6bb18baf156fd4f2f5b254150dcdd6cbf0def1ee427a2fb2bfc4d/grpcio-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f250ff44843d9a0615e350c77f890082102a0318d66a99540f54769c8766ab73", size = 5943028 }, + { url = "https://files.pythonhosted.org/packages/13/aa/5fb756175995aeb47238d706530772d9a7ac8e73bcca1b47dc145d02c95f/grpcio-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6d8de076528f7c43a2f576bc311799f89d795aa6c9b637377cc2b1616473804", size = 6031841 }, + { url = "https://files.pythonhosted.org/packages/54/93/172783e01eed61f7f180617b7fa4470f504e383e32af2587f664576a7101/grpcio-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b91879d6da1605811ebc60d21ab6a7e4bae6c35f6b63a061d61eb818c8168f6", size = 6651039 }, + { url = "https://files.pythonhosted.org/packages/6f/99/62654b220a27ed46d3313252214f4bc66261143dc9b58004085cd0646753/grpcio-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f71574afdf944e6652203cd1badcda195b2a27d9c83e6d88dc1ce3cfb73b31a5", size = 6198465 }, + { url = "https://files.pythonhosted.org/packages/68/35/96116de833b330abe4412cc94edc68f99ed2fa3e39d8713ff307b3799e81/grpcio-1.71.0-cp310-cp310-win32.whl", hash = "sha256:8997d6785e93308f277884ee6899ba63baafa0dfb4729748200fcc537858a509", size = 3620382 }, + { url = "https://files.pythonhosted.org/packages/b7/09/f32ef637e386f3f2c02effac49699229fa560ce9007682d24e9e212d2eb4/grpcio-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:7d6ac9481d9d0d129224f6d5934d5832c4b1cddb96b59e7eba8416868909786a", size = 4280302 }, + { url = "https://files.pythonhosted.org/packages/63/04/a085f3ad4133426f6da8c1becf0749872a49feb625a407a2e864ded3fb12/grpcio-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:d6aa986318c36508dc1d5001a3ff169a15b99b9f96ef5e98e13522c506b37eef", size = 5210453 }, + { url = "https://files.pythonhosted.org/packages/b4/d5/0bc53ed33ba458de95020970e2c22aa8027b26cc84f98bea7fcad5d695d1/grpcio-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:d2c170247315f2d7e5798a22358e982ad6eeb68fa20cf7a820bb74c11f0736e7", size = 11347567 }, + { url = "https://files.pythonhosted.org/packages/e3/6d/ce334f7e7a58572335ccd61154d808fe681a4c5e951f8a1ff68f5a6e47ce/grpcio-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:e6f83a583ed0a5b08c5bc7a3fe860bb3c2eac1f03f1f63e0bc2091325605d2b7", size = 5696067 }, + { url = "https://files.pythonhosted.org/packages/05/4a/80befd0b8b1dc2b9ac5337e57473354d81be938f87132e147c4a24a581bd/grpcio-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be74ddeeb92cc87190e0e376dbc8fc7736dbb6d3d454f2fa1f5be1dee26b9d7", size = 6348377 }, + { url = "https://files.pythonhosted.org/packages/c7/67/cbd63c485051eb78663355d9efd1b896cfb50d4a220581ec2cb9a15cd750/grpcio-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd0dfbe4d5eb1fcfec9490ca13f82b089a309dc3678e2edabc144051270a66e", size = 5940407 }, + { url = "https://files.pythonhosted.org/packages/98/4b/7a11aa4326d7faa499f764eaf8a9b5a0eb054ce0988ee7ca34897c2b02ae/grpcio-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a2242d6950dc892afdf9e951ed7ff89473aaf744b7d5727ad56bdaace363722b", size = 6030915 }, + { url = "https://files.pythonhosted.org/packages/eb/a2/cdae2d0e458b475213a011078b0090f7a1d87f9a68c678b76f6af7c6ac8c/grpcio-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0fa05ee31a20456b13ae49ad2e5d585265f71dd19fbd9ef983c28f926d45d0a7", size = 6648324 }, + { url = "https://files.pythonhosted.org/packages/27/df/f345c8daaa8d8574ce9869f9b36ca220c8845923eb3087e8f317eabfc2a8/grpcio-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3d081e859fb1ebe176de33fc3adb26c7d46b8812f906042705346b314bde32c3", size = 6197839 }, + { url = "https://files.pythonhosted.org/packages/f2/2c/cd488dc52a1d0ae1bad88b0d203bc302efbb88b82691039a6d85241c5781/grpcio-1.71.0-cp311-cp311-win32.whl", hash = "sha256:d6de81c9c00c8a23047136b11794b3584cdc1460ed7cbc10eada50614baa1444", size = 3619978 }, + { url = "https://files.pythonhosted.org/packages/ee/3f/cf92e7e62ccb8dbdf977499547dfc27133124d6467d3a7d23775bcecb0f9/grpcio-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:24e867651fc67717b6f896d5f0cac0ec863a8b5fb7d6441c2ab428f52c651c6b", size = 4282279 }, + { url = "https://files.pythonhosted.org/packages/4c/83/bd4b6a9ba07825bd19c711d8b25874cd5de72c2a3fbf635c3c344ae65bd2/grpcio-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:0ff35c8d807c1c7531d3002be03221ff9ae15712b53ab46e2a0b4bb271f38537", size = 5184101 }, + { url = "https://files.pythonhosted.org/packages/31/ea/2e0d90c0853568bf714693447f5c73272ea95ee8dad107807fde740e595d/grpcio-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:b78a99cd1ece4be92ab7c07765a0b038194ded2e0a26fd654591ee136088d8d7", size = 11310927 }, + { url = "https://files.pythonhosted.org/packages/ac/bc/07a3fd8af80467390af491d7dc66882db43884128cdb3cc8524915e0023c/grpcio-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:dc1a1231ed23caac1de9f943d031f1bc38d0f69d2a3b243ea0d664fc1fbd7fec", size = 5654280 }, + { url = "https://files.pythonhosted.org/packages/16/af/21f22ea3eed3d0538b6ef7889fce1878a8ba4164497f9e07385733391e2b/grpcio-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6beeea5566092c5e3c4896c6d1d307fb46b1d4bdf3e70c8340b190a69198594", size = 6312051 }, + { url = "https://files.pythonhosted.org/packages/49/9d/e12ddc726dc8bd1aa6cba67c85ce42a12ba5b9dd75d5042214a59ccf28ce/grpcio-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5170929109450a2c031cfe87d6716f2fae39695ad5335d9106ae88cc32dc84c", size = 5910666 }, + { url = "https://files.pythonhosted.org/packages/d9/e9/38713d6d67aedef738b815763c25f092e0454dc58e77b1d2a51c9d5b3325/grpcio-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5b08d03ace7aca7b2fadd4baf291139b4a5f058805a8327bfe9aece7253b6d67", size = 6012019 }, + { url = "https://files.pythonhosted.org/packages/80/da/4813cd7adbae6467724fa46c952d7aeac5e82e550b1c62ed2aeb78d444ae/grpcio-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f903017db76bf9cc2b2d8bdd37bf04b505bbccad6be8a81e1542206875d0e9db", size = 6637043 }, + { url = "https://files.pythonhosted.org/packages/52/ca/c0d767082e39dccb7985c73ab4cf1d23ce8613387149e9978c70c3bf3b07/grpcio-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:469f42a0b410883185eab4689060a20488a1a0a00f8bbb3cbc1061197b4c5a79", size = 6186143 }, + { url = "https://files.pythonhosted.org/packages/00/61/7b2c8ec13303f8fe36832c13d91ad4d4ba57204b1c723ada709c346b2271/grpcio-1.71.0-cp312-cp312-win32.whl", hash = "sha256:ad9f30838550695b5eb302add33f21f7301b882937460dd24f24b3cc5a95067a", size = 3604083 }, + { url = "https://files.pythonhosted.org/packages/fd/7c/1e429c5fb26122055d10ff9a1d754790fb067d83c633ff69eddcf8e3614b/grpcio-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:652350609332de6dac4ece254e5d7e1ff834e203d6afb769601f286886f6f3a8", size = 4272191 }, + { url = "https://files.pythonhosted.org/packages/04/dd/b00cbb45400d06b26126dcfdbdb34bb6c4f28c3ebbd7aea8228679103ef6/grpcio-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:cebc1b34ba40a312ab480ccdb396ff3c529377a2fce72c45a741f7215bfe8379", size = 5184138 }, + { url = "https://files.pythonhosted.org/packages/ed/0a/4651215983d590ef53aac40ba0e29dda941a02b097892c44fa3357e706e5/grpcio-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:85da336e3649a3d2171e82f696b5cad2c6231fdd5bad52616476235681bee5b3", size = 11310747 }, + { url = "https://files.pythonhosted.org/packages/57/a3/149615b247f321e13f60aa512d3509d4215173bdb982c9098d78484de216/grpcio-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f9a412f55bb6e8f3bb000e020dbc1e709627dcb3a56f6431fa7076b4c1aab0db", size = 5653991 }, + { url = "https://files.pythonhosted.org/packages/ca/56/29432a3e8d951b5e4e520a40cd93bebaa824a14033ea8e65b0ece1da6167/grpcio-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47be9584729534660416f6d2a3108aaeac1122f6b5bdbf9fd823e11fe6fbaa29", size = 6312781 }, + { url = "https://files.pythonhosted.org/packages/a3/f8/286e81a62964ceb6ac10b10925261d4871a762d2a763fbf354115f9afc98/grpcio-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9c80ac6091c916db81131d50926a93ab162a7e97e4428ffc186b6e80d6dda4", size = 5910479 }, + { url = "https://files.pythonhosted.org/packages/35/67/d1febb49ec0f599b9e6d4d0d44c2d4afdbed9c3e80deb7587ec788fcf252/grpcio-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:789d5e2a3a15419374b7b45cd680b1e83bbc1e52b9086e49308e2c0b5bbae6e3", size = 6013262 }, + { url = "https://files.pythonhosted.org/packages/a1/04/f9ceda11755f0104a075ad7163fc0d96e2e3a9fe25ef38adfc74c5790daf/grpcio-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1be857615e26a86d7363e8a163fade914595c81fec962b3d514a4b1e8760467b", size = 6643356 }, + { url = "https://files.pythonhosted.org/packages/fb/ce/236dbc3dc77cf9a9242adcf1f62538734ad64727fabf39e1346ad4bd5c75/grpcio-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a76d39b5fafd79ed604c4be0a869ec3581a172a707e2a8d7a4858cb05a5a7637", size = 6186564 }, + { url = "https://files.pythonhosted.org/packages/10/fd/b3348fce9dd4280e221f513dd54024e765b21c348bc475516672da4218e9/grpcio-1.71.0-cp313-cp313-win32.whl", hash = "sha256:74258dce215cb1995083daa17b379a1a5a87d275387b7ffe137f1d5131e2cfbb", size = 3601890 }, + { url = "https://files.pythonhosted.org/packages/be/f8/db5d5f3fc7e296166286c2a397836b8b042f7ad1e11028d82b061701f0f7/grpcio-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:22c3bc8d488c039a199f7a003a38cb7635db6656fa96437a8accde8322ce2366", size = 4273308 }, +] + +[[package]] +name = "grpcio-tools" +version = "1.71.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "protobuf" }, + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/d2/c0866a48c355a6a4daa1f7e27e210c7fa561b1f3b7c0bce2671e89cfa31e/grpcio_tools-1.71.0.tar.gz", hash = "sha256:38dba8e0d5e0fb23a034e09644fdc6ed862be2371887eee54901999e8f6792a8", size = 5326008 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/60/aa7f261eda558d018457e5c8bd8a8079136e5107a0942fd3167477ab50e2/grpcio_tools-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:f4ad7f0d756546902597053d70b3af2606fbd70d7972876cd75c1e241d22ae00", size = 2385558 }, + { url = "https://files.pythonhosted.org/packages/0d/e3/e47b96e93e51398ba3462e027d93a10c0c23fffc31733de9bd4f44a2b867/grpcio_tools-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:64bdb291df61cf570b5256777ad5fe2b1db6d67bc46e55dc56a0a862722ae329", size = 5930039 }, + { url = "https://files.pythonhosted.org/packages/a6/69/5d8920002483b2a65ae3b03329dfe3b668c3592f001d5358e1538f540012/grpcio_tools-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:8dd9795e982d77a4b496f7278b943c2563d9afde2069cdee78c111a40cc4d675", size = 2351932 }, + { url = "https://files.pythonhosted.org/packages/c4/50/8116e307662a2337cdc3f0e1a8b23af197129448b7ff7e0cf1a76c9b0178/grpcio_tools-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c1b5860c41a36b26fec4f52998f1a451d0525a5c9a4fb06b6ea3e9211abdb925", size = 2744962 }, + { url = "https://files.pythonhosted.org/packages/e3/4b/d95be4aaf78d7b02dff3bd332c75c228288178e92af0e5228759ac5002a0/grpcio_tools-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3059c14035e5dc03d462f261e5900b9a077fd1a36976c3865b8507474520bad4", size = 2476716 }, + { url = "https://files.pythonhosted.org/packages/37/c2/c784a3705b1a1fd277751a8fc881d5a29325a460b9211e3c6164f594b178/grpcio_tools-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f360981b215b1d5aff9235b37e7e1826246e35bbac32a53e41d4e990a37b8f4c", size = 2854132 }, + { url = "https://files.pythonhosted.org/packages/93/8f/173adbf72ed3996e1962182b55abf30151edc8b53daac0bf15cc3dc4b09e/grpcio_tools-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bfe3888c3bbe16a5aa39409bc38744a31c0c3d2daa2b0095978c56e106c85b42", size = 3305069 }, + { url = "https://files.pythonhosted.org/packages/e4/a8/b1e7df63e7f83336275922f92ded1cd6918964c511280b31c872c54538f4/grpcio_tools-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:145985c0bf12131f0a1503e65763e0f060473f7f3928ed1ff3fb0e8aad5bc8ac", size = 2916636 }, + { url = "https://files.pythonhosted.org/packages/be/a3/53f1e74c6e1c92ad94d7a0127a60fe913276a3e8c864737a053a1574b05c/grpcio_tools-1.71.0-cp310-cp310-win32.whl", hash = "sha256:82c430edd939bb863550ee0fecf067d78feff828908a1b529bbe33cc57f2419c", size = 949576 }, + { url = "https://files.pythonhosted.org/packages/97/43/4a3ae830c1405bcb1ba47f2225779dbe9fc009ba341d4a90012919304855/grpcio_tools-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:83e90724e3f02415c628e4ead1d6ffe063820aaaa078d9a39176793df958cd5a", size = 1121087 }, + { url = "https://files.pythonhosted.org/packages/5d/ec/73b9797ffec80e1faf039ce3e2f0513e26e1a68eedc525ed294ae2a44d03/grpcio_tools-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:1f19b16b49afa5d21473f49c0966dd430c88d089cd52ac02404d8cef67134efb", size = 2385557 }, + { url = "https://files.pythonhosted.org/packages/bf/87/42c6e192b7b09c9610a53e771797f7826aee4f6e769683985ae406a2d862/grpcio_tools-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:459c8f5e00e390aecd5b89de67deb3ec7188a274bc6cb50e43cef35ab3a3f45d", size = 5954404 }, + { url = "https://files.pythonhosted.org/packages/25/30/3fd385a56d32dce34cde09a64dbaf7cf85d395f2bcd86dd41e4b4ee5938f/grpcio_tools-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:edab7e6518de01196be37f96cb1e138c3819986bf5e2a6c9e1519b4d716b2f5a", size = 2352061 }, + { url = "https://files.pythonhosted.org/packages/87/eb/e9971c7693a2d85e7f55760f7906211a95ff74af4d41b05d187849d7fb58/grpcio_tools-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8b93b9f6adc7491d4c10144c0643409db298e5e63c997106a804f6f0248dbaf4", size = 2745033 }, + { url = "https://files.pythonhosted.org/packages/15/72/4e69beae87a1b334f80da9e93c8e2f5c8fe4860c956a781246a092dc4c97/grpcio_tools-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ae5f2efa9e644c10bf1021600bfc099dfbd8e02b184d2d25dc31fcd6c2bc59e", size = 2476743 }, + { url = "https://files.pythonhosted.org/packages/b5/f3/336d2c83f1bfc00a5376bf20dd2273d7aa891b03dd91b11c71ca47392351/grpcio_tools-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:65aa082f4435571d65d5ce07fc444f23c3eff4f3e34abef599ef8c9e1f6f360f", size = 2853693 }, + { url = "https://files.pythonhosted.org/packages/62/ba/cc7ace518c11501a4b8620df5edb8188e81470e5b82dc6829212f3e9b2ff/grpcio_tools-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1331e726e08b7bdcbf2075fcf4b47dff07842b04845e6e220a08a4663e232d7f", size = 3304474 }, + { url = "https://files.pythonhosted.org/packages/00/0d/4b843654af3d5aa2f1a5775df1d583e6e3471e6d569106fd3213ad185a98/grpcio_tools-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6693a7d3ba138b0e693b3d1f687cdd9db9e68976c3fa2b951c17a072fea8b583", size = 2916147 }, + { url = "https://files.pythonhosted.org/packages/e4/14/047e1c817422bc3d434247b9c640c51fd51ca4e047583ff31d927c3dea73/grpcio_tools-1.71.0-cp311-cp311-win32.whl", hash = "sha256:6d11ed3ff7b6023b5c72a8654975324bb98c1092426ba5b481af406ff559df00", size = 949374 }, + { url = "https://files.pythonhosted.org/packages/86/cb/739a1b6d517672693796022c0f9061f63eaa243ec70cbbfa59bf881ed9fb/grpcio_tools-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:072b2a5805ac97e4623b3aa8f7818275f3fb087f4aa131b0fce00471065f6eaa", size = 1120786 }, + { url = "https://files.pythonhosted.org/packages/de/e4/156956b92ad0298290c3d68e6670bc5a6fbefcccfe1ec3997480605e7135/grpcio_tools-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:61c0409d5bdac57a7bd0ce0ab01c1c916728fe4c8a03d77a25135ad481eb505c", size = 2385480 }, + { url = "https://files.pythonhosted.org/packages/c1/08/9930eb4bb38c5214041c9f24f8b35e9864a7938282db986836546c782d52/grpcio_tools-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:28784f39921d061d2164a9dcda5164a69d07bf29f91f0ea50b505958292312c9", size = 5951891 }, + { url = "https://files.pythonhosted.org/packages/73/65/931f29ec9c33719d48e1e30446ecce6f5d2cd4e4934fa73fbe07de41c43b/grpcio_tools-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:192808cf553cedca73f0479cc61d5684ad61f24db7a5f3c4dfe1500342425866", size = 2351967 }, + { url = "https://files.pythonhosted.org/packages/b8/26/2ec8748534406214f20a4809c36efcfa88d1a26246e8312102e3ef8c295d/grpcio_tools-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:989ee9da61098230d3d4c8f8f8e27c2de796f1ff21b1c90110e636d9acd9432b", size = 2745003 }, + { url = "https://files.pythonhosted.org/packages/f1/33/87b4610c86a4e10ee446b543a4d536f94ab04f828bab841f0bc1a083de72/grpcio_tools-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:541a756276c8a55dec991f6c0106ae20c8c8f5ce8d0bdbfcb01e2338d1a8192b", size = 2476455 }, + { url = "https://files.pythonhosted.org/packages/00/7c/f7f0cc36a43be9d45b3ce2a55245f3c7d063a24b7930dd719929e58871a4/grpcio_tools-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:870c0097700d13c403e5517cb7750ab5b4a791ce3e71791c411a38c5468b64bd", size = 2854333 }, + { url = "https://files.pythonhosted.org/packages/07/c4/34b9ea62b173c13fa7accba5f219355b320c05c80c79c3ba70fe52f47b2f/grpcio_tools-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:abd57f615e88bf93c3c6fd31f923106e3beb12f8cd2df95b0d256fa07a7a0a57", size = 3304297 }, + { url = "https://files.pythonhosted.org/packages/5c/ef/9d3449db8a07688dc3de7dcbd2a07048a128610b1a491c5c0cb3e90a00c5/grpcio_tools-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:753270e2d06d37e6d7af8967d1d059ec635ad215882041a36294f4e2fd502b2e", size = 2916212 }, + { url = "https://files.pythonhosted.org/packages/2e/c6/990e8194c934dfe7cf89ef307c319fa4f2bc0b78aeca707addbfa1e502f1/grpcio_tools-1.71.0-cp312-cp312-win32.whl", hash = "sha256:0e647794bd7138b8c215e86277a9711a95cf6a03ff6f9e555d54fdf7378b9f9d", size = 948849 }, + { url = "https://files.pythonhosted.org/packages/42/95/3c36d3205e6bd19853cc2420e44b6ef302eb4cfcf56498973c7e85f6c03b/grpcio_tools-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:48debc879570972d28bfe98e4970eff25bb26da3f383e0e49829b2d2cd35ad87", size = 1120294 }, + { url = "https://files.pythonhosted.org/packages/84/a7/70dc7e9957bcbaccd4dcb6cc11215e0b918f546d55599221522fe0d073e0/grpcio_tools-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:9a78d07d6c301a25ef5ede962920a522556a1dfee1ccc05795994ceb867f766c", size = 2384758 }, + { url = "https://files.pythonhosted.org/packages/65/79/57320b28d0a0c5ec94095fd571a65292f8ed7e1c47e59ae4021e8a48d49b/grpcio_tools-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:580ac88141c9815557e63c9c04f5b1cdb19b4db8d0cb792b573354bde1ee8b12", size = 5951661 }, + { url = "https://files.pythonhosted.org/packages/80/3d/343df5ed7c5dd66fc7a19e4ef3e97ccc4f5d802122b04cd6492f0dcd79f5/grpcio_tools-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f7c678e68ece0ae908ecae1c4314a0c2c7f83e26e281738b9609860cc2c82d96", size = 2351571 }, + { url = "https://files.pythonhosted.org/packages/56/2f/b9736e8c84e880c4237f5b880c6c799b4977c5cde190999bc7ab4b2ec445/grpcio_tools-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:56ecd6cc89b5e5eed1de5eb9cafce86c9c9043ee3840888cc464d16200290b53", size = 2744580 }, + { url = "https://files.pythonhosted.org/packages/76/9b/bdb384967353da7bf64bac4232f4cf8ae43f19d0f2f640978d4d4197e667/grpcio_tools-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e52a041afc20ab2431d756b6295d727bd7adee813b21b06a3483f4a7a15ea15f", size = 2475978 }, + { url = "https://files.pythonhosted.org/packages/26/71/1411487fd7862d347b98fda5e3beef611a71b2ac2faac62a965d9e2536b3/grpcio_tools-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2a1712f12102b60c8d92779b89d0504e0d6f3a59f2b933e5622b8583f5c02992", size = 2853314 }, + { url = "https://files.pythonhosted.org/packages/03/06/59d0523eb1ba2f64edc72cb150152fa1b2e77061cae3ef3ecd3ef2a87f51/grpcio_tools-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:41878cb7a75477e62fdd45e7e9155b3af1b7a5332844021e2511deaf99ac9e6c", size = 3303981 }, + { url = "https://files.pythonhosted.org/packages/c2/71/fb9fb49f2b738ec1dfbbc8cdce0b26e5f9c5fc0edef72e453580620d6a36/grpcio_tools-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:682e958b476049ccc14c71bedf3f979bced01f6e0c04852efc5887841a32ad6b", size = 2915876 }, + { url = "https://files.pythonhosted.org/packages/bd/0f/0d49f6fe6fa2d09e9820dd9eeb30437e86002303076be2b6ada0fb52b8f2/grpcio_tools-1.71.0-cp313-cp313-win32.whl", hash = "sha256:0ccfb837152b7b858b9f26bb110b3ae8c46675d56130f6c2f03605c4f129be13", size = 948245 }, + { url = "https://files.pythonhosted.org/packages/bb/14/ab131a39187bfea950280b2277a82d2033469fe8c86f73b10b19f53cc5ca/grpcio_tools-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:ffff9bc5eacb34dd26b487194f7d44a3e64e752fc2cf049d798021bf25053b87", size = 1119649 }, +] + [[package]] name = "h11" version = "0.14.0" @@ -802,6 +905,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 }, ] +[[package]] +name = "h2" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1b/38/d7f80fd13e6582fb8e0df8c9a653dcc02b03ca34f4d72f34869298c5baf8/h2-4.2.0.tar.gz", hash = "sha256:c8a52129695e88b1a0578d8d2cc6842bbd79128ac685463b887ee278126ad01f", size = 2150682 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/9e/984486f2d0a0bd2b024bf4bc1c62688fcafa9e61991f041fb0e2def4a982/h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0", size = 60957 }, +] + +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357 }, +] + [[package]] name = "httpcore" version = "1.0.7" @@ -830,6 +955,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, ] +[package.optional-dependencies] +http2 = [ + { name = "h2" }, +] + [[package]] name = "httpx-sse" version = "0.4.0" @@ -857,6 +987,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/4d/8092df2cb0cafa9fcaf691db851b2fccfe9cad4048e081436bbbdf56e4e1/huggingface_hub-0.29.0-py3-none-any.whl", hash = "sha256:c02daa0b6bafbdacb1320fdfd1dc7151d0940825c88c4ef89837fdb1f6ea0afe", size = 468012 }, ] +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007 }, +] + [[package]] name = "identify" version = "2.6.7" @@ -1250,6 +1389,7 @@ unit = [ { name = "chardet" }, { name = "openai" }, { name = "pypdf" }, + { name = "qdrant-client" }, { name = "sqlite-vec" }, ] @@ -1290,6 +1430,7 @@ requires-dist = [ { name = "pytest-cov", marker = "extra == 'dev'" }, { name = "pytest-html", marker = "extra == 'dev'" }, { name = "python-dotenv" }, + { name = "qdrant-client", marker = "extra == 'unit'" }, { name = "requests" }, { name = "rich" }, { name = "rich", marker = "extra == 'codegen'" }, @@ -1314,7 +1455,6 @@ requires-dist = [ { name = "types-setuptools", marker = "extra == 'dev'" }, { name = "uvicorn", marker = "extra == 'dev'" }, ] -provides-extras = ["dev", "unit", "test", "docs", "codegen"] [[package]] name = "llama-stack-client" @@ -2062,6 +2202,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, ] +[[package]] +name = "portalocker" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywin32", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/d3/c6c64067759e87af98cc668c1cc75171347d0f1577fab7ca3749134e3cd4/portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f", size = 40891 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/fb/a70a4214956182e0d7a9099ab17d50bfcba1056188e9b14f35b9e2b62a0d/portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf", size = 18423 }, +] + [[package]] name = "pre-commit" version = "4.1.0" @@ -2668,6 +2820,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/fe/72e7e166bda3885810bee7b23049133e142f7c80c295bae02c562caeea16/pyzmq-26.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:bd8fdee945b877aa3bffc6a5a8816deb048dab0544f9df3731ecd0e54d8c84c9", size = 556563 }, ] +[[package]] +name = "qdrant-client" +version = "1.13.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "grpcio-tools" }, + { name = "httpx", extra = ["http2"] }, + { name = "numpy" }, + { name = "portalocker" }, + { name = "pydantic" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/58/1e4acd7ff7637ed56a66e5044699e7af6067232703d0b34f05068fc6234b/qdrant_client-1.13.3.tar.gz", hash = "sha256:61ca09e07c6d7ac0dfbdeb13dca4fe5f3e08fa430cb0d74d66ef5d023a70adfc", size = 266278 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/b4/bd676f91f5234ab59282e4a110f324029684482cbe08e7a1c77b6338013b/qdrant_client-1.13.3-py3-none-any.whl", hash = "sha256:f52cacbb936e547d3fceb1aaed3e3c56be0ebfd48e8ea495ea3dbc89c671d1d2", size = 306674 }, +] + [[package]] name = "rapidfuzz" version = "3.12.2" @@ -3417,7 +3587,8 @@ source = { registry = "https://download.pytorch.org/whl/cpu" } resolution-markers = [ "python_full_version < '3.11' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and sys_platform == 'darwin'", - "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version >= '3.13' and sys_platform == 'darwin'", + "python_full_version == '3.12.*' and sys_platform == 'darwin'", ] dependencies = [ { name = "filelock", marker = "sys_platform == 'darwin'" }, @@ -3444,8 +3615,10 @@ resolution-markers = [ "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux')", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')", + "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", ] dependencies = [ { name = "filelock", marker = "sys_platform != 'darwin'" }, @@ -3482,8 +3655,10 @@ resolution-markers = [ "python_full_version < '3.11' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version == '3.11.*' and sys_platform == 'darwin'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "python_full_version >= '3.13' and sys_platform == 'darwin'", + "python_full_version == '3.12.*' and sys_platform == 'darwin'", ] dependencies = [ { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" }, @@ -3509,7 +3684,8 @@ source = { registry = "https://download.pytorch.org/whl/cpu" } resolution-markers = [ "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", - "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')", ] dependencies = [ { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },