diff --git a/.github/TRIAGERS.md b/.github/TRIAGERS.md new file mode 100644 index 000000000..d4ef6d1ac --- /dev/null +++ b/.github/TRIAGERS.md @@ -0,0 +1,2 @@ +# This file documents Triage members in the Llama Stack community +@franciscojavierarceo @leseb diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml new file mode 100644 index 000000000..5b63e231c --- /dev/null +++ b/.github/workflows/changelog.yml @@ -0,0 +1,29 @@ +name: Update Changelog + +on: + release: + types: [published, unpublished, created, edited, deleted, released] + +permissions: + contents: read + +jobs: + generate_changelog: + name: Generate changelog + permissions: + contents: write # for peter-evans/create-pull-request to create branch + pull-requests: write # for peter-evans/create-pull-request to create a PR + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: main + fetch-depth: 0 + - run: | + python ./scripts/gen-changelog.py + - uses: peter-evans/create-pull-request@v7 + with: + title: 'docs: update CHANGELOG.md for ${{ github.ref_name }}' + commit-message: 'docs: update CHANGELOG.md for ${{ github.ref_name }}' + branch: create-pull-request/changelog + signoff: true diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 86adf8a15..0af46e1f0 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -1,4 +1,4 @@ -name: Integration tests +name: Integration Tests on: push: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e458fec0a..505d6b162 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -86,7 +86,7 @@ LLAMA_STACK_CONFIG= And then use this dotenv file when running client SDK tests via the following: ```bash -uv run --env-file .env -- pytest -v tests/api/inference/test_text_inference.py +uv run --env-file .env -- pytest -v tests/integration/inference/test_text_inference.py ``` ## Pre-commit Hooks diff --git a/README.md b/README.md index aade9c15f..918433d51 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,8 @@ [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/) [![License](https://img.shields.io/pypi/l/llama_stack.svg)](https://github.com/meta-llama/llama-stack/blob/main/LICENSE) [![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/llama-stack) -![Unit](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main) +[![Unit Tests](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/unit-tests.yml?query=branch%3Amain) +[![Integration Tests](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml/badge.svg?branch=main)](https://github.com/meta-llama/llama-stack/actions/workflows/integration-tests.yml?query=branch%3Amain) [**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb) @@ -72,26 +73,6 @@ A Llama Stack Distribution (or "distro") is a pre-configured bundle of provider | Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/fireworks.html) | | vLLM | [llamastack/distribution-remote-vllm](https://hub.docker.com/repository/docker/llamastack/distribution-remote-vllm/general) | [Guide](https://llama-stack.readthedocs.io/en/latest/distributions/self_hosted_distro/remote-vllm.html) | -### Installation - -You have two ways to install this repository: - -* **Install as a package**: - You can install the repository directly from [PyPI](https://pypi.org/project/llama-stack/) by running the following command: - ```bash - pip install llama-stack - ``` - -* **Install from source**: - If you prefer to install from the source code, we recommend using [uv](https://github.com/astral-sh/uv). - Then, run the following commands: - ```bash - git clone git@github.com:meta-llama/llama-stack.git - cd llama-stack - - uv sync - uv pip install -e . - ``` ### Documentation diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 210a84b03..eb626fc44 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -1101,14 +1101,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/Benchmark" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/Benchmark" } } } @@ -1150,14 +1143,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/Dataset" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/Dataset" } } } @@ -1232,14 +1218,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/Model" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/Model" } } } @@ -1314,14 +1293,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/ScoringFn" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/ScoringFn" } } } @@ -1363,14 +1335,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/Shield" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/Shield" } } } @@ -1673,14 +1638,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/PostTrainingJobArtifactsResponse" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/PostTrainingJobArtifactsResponse" } } } @@ -1722,14 +1680,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/PostTrainingJobStatusResponse" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/PostTrainingJobStatusResponse" } } } @@ -1804,14 +1755,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/FileUploadResponse" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/FileUploadResponse" } } } @@ -1913,14 +1857,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/VectorDB" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/VectorDB" } } } @@ -2246,14 +2183,7 @@ "content": { "application/json": { "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/JobStatus" - }, - { - "type": "null" - } - ] + "$ref": "#/components/schemas/JobStatus" } } } @@ -4229,70 +4159,80 @@ ] }, "arguments": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" - }, - { - "type": "array", - "items": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" + "oneOf": [ + { + "type": "string" + }, + { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + }, + { + "type": "array", + "items": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] } - ] - } - }, - { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "string" - }, - { - "type": "integer" - }, - { - "type": "number" - }, - { - "type": "boolean" - }, - { - "type": "null" + }, + { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "string" + }, + { + "type": "integer" + }, + { + "type": "number" + }, + { + "type": "boolean" + }, + { + "type": "null" + } + ] } - ] - } + } + ] } - ] - } + } + ] + }, + "arguments_json": { + "type": "string" } }, "additionalProperties": false, @@ -7857,7 +7797,8 @@ "type": "object", "properties": { "document_id": { - "type": "string" + "type": "string", + "description": "The unique identifier for the document." }, "content": { "oneOf": [ @@ -7876,10 +7817,12 @@ { "$ref": "#/components/schemas/URL" } - ] + ], + "description": "The content of the document." }, "mime_type": { - "type": "string" + "type": "string", + "description": "The MIME type of the document." }, "metadata": { "type": "object", @@ -7904,7 +7847,8 @@ "type": "object" } ] - } + }, + "description": "Additional metadata for the document." } }, "additionalProperties": false, @@ -7913,7 +7857,8 @@ "content", "metadata" ], - "title": "RAGDocument" + "title": "RAGDocument", + "description": "A document to be used for document ingestion in the RAG Tool." }, "InsertRequest": { "type": "object", @@ -9917,23 +9862,6 @@ ], "title": "ScoreBatchResponse" }, - "AlgorithmConfig": { - "oneOf": [ - { - "$ref": "#/components/schemas/LoraFinetuningConfig" - }, - { - "$ref": "#/components/schemas/QATFinetuningConfig" - } - ], - "discriminator": { - "propertyName": "type", - "mapping": { - "LoRA": "#/components/schemas/LoraFinetuningConfig", - "QAT": "#/components/schemas/QATFinetuningConfig" - } - } - }, "LoraFinetuningConfig": { "type": "object", "properties": { @@ -10069,7 +9997,14 @@ "type": "string" }, "algorithm_config": { - "$ref": "#/components/schemas/AlgorithmConfig" + "oneOf": [ + { + "$ref": "#/components/schemas/LoraFinetuningConfig" + }, + { + "$ref": "#/components/schemas/QATFinetuningConfig" + } + ] } }, "additionalProperties": false, diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index a1eb07444..fa6920381 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -757,9 +757,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/Benchmark' - - type: 'null' + $ref: '#/components/schemas/Benchmark' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -787,9 +785,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/Dataset' - - type: 'null' + $ref: '#/components/schemas/Dataset' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -840,9 +836,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/Model' - - type: 'null' + $ref: '#/components/schemas/Model' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -893,9 +887,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/ScoringFn' - - type: 'null' + $ref: '#/components/schemas/ScoringFn' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -923,9 +915,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/Shield' - - type: 'null' + $ref: '#/components/schemas/Shield' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -1127,9 +1117,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' - - type: 'null' + $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -1157,9 +1145,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/PostTrainingJobStatusResponse' - - type: 'null' + $ref: '#/components/schemas/PostTrainingJobStatusResponse' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -1210,9 +1196,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/FileUploadResponse' - - type: 'null' + $ref: '#/components/schemas/FileUploadResponse' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -1281,9 +1265,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/VectorDB' - - type: 'null' + $ref: '#/components/schemas/VectorDB' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -1509,9 +1491,7 @@ paths: content: application/json: schema: - oneOf: - - $ref: '#/components/schemas/JobStatus' - - type: 'null' + $ref: '#/components/schemas/JobStatus' '400': $ref: '#/components/responses/BadRequest400' '429': @@ -2884,30 +2864,34 @@ components: title: BuiltinTool - type: string arguments: - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - - type: array - items: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' - - type: object - additionalProperties: - oneOf: - - type: string - - type: integer - - type: number - - type: boolean - - type: 'null' + oneOf: + - type: string + - type: object + additionalProperties: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + - type: array + items: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + - type: object + additionalProperties: + oneOf: + - type: string + - type: integer + - type: number + - type: boolean + - type: 'null' + arguments_json: + type: string additionalProperties: false required: - call_id @@ -5395,6 +5379,7 @@ components: properties: document_id: type: string + description: The unique identifier for the document. content: oneOf: - type: string @@ -5403,8 +5388,10 @@ components: items: $ref: '#/components/schemas/InterleavedContentItem' - $ref: '#/components/schemas/URL' + description: The content of the document. mime_type: type: string + description: The MIME type of the document. metadata: type: object additionalProperties: @@ -5415,12 +5402,15 @@ components: - type: string - type: array - type: object + description: Additional metadata for the document. additionalProperties: false required: - document_id - content - metadata title: RAGDocument + description: >- + A document to be used for document ingestion in the RAG Tool. InsertRequest: type: object properties: @@ -6698,15 +6688,6 @@ components: required: - results title: ScoreBatchResponse - AlgorithmConfig: - oneOf: - - $ref: '#/components/schemas/LoraFinetuningConfig' - - $ref: '#/components/schemas/QATFinetuningConfig' - discriminator: - propertyName: type - mapping: - LoRA: '#/components/schemas/LoraFinetuningConfig' - QAT: '#/components/schemas/QATFinetuningConfig' LoraFinetuningConfig: type: object properties: @@ -6790,7 +6771,9 @@ components: checkpoint_dir: type: string algorithm_config: - $ref: '#/components/schemas/AlgorithmConfig' + oneOf: + - $ref: '#/components/schemas/LoraFinetuningConfig' + - $ref: '#/components/schemas/QATFinetuningConfig' additionalProperties: false required: - job_uuid diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py index a2553f905..879ac95e2 100644 --- a/docs/openapi_generator/generate.py +++ b/docs/openapi_generator/generate.py @@ -12,7 +12,7 @@ from datetime import datetime from pathlib import Path - +import sys import fire import ruamel.yaml as yaml @@ -21,7 +21,7 @@ from llama_stack.distribution.stack import LlamaStack # noqa: E402 from .pyopenapi.options import Options # noqa: E402 from .pyopenapi.specification import Info, Server # noqa: E402 -from .pyopenapi.utility import Specification # noqa: E402 +from .pyopenapi.utility import Specification, validate_api_method_return_types # noqa: E402 def str_presenter(dumper, data): @@ -39,6 +39,14 @@ def main(output_dir: str): if not output_dir.exists(): raise ValueError(f"Directory {output_dir} does not exist") + # Validate API protocols before generating spec + print("Validating API method return types...") + return_type_errors = validate_api_method_return_types() + if return_type_errors: + print("\nAPI Method Return Type Validation Errors:\n") + for error in return_type_errors: + print(error) + sys.exit(1) now = str(datetime.now()) print( "Converting the spec to YAML (openapi.yaml) and HTML (openapi.html) at " + now diff --git a/docs/openapi_generator/pyopenapi/utility.py b/docs/openapi_generator/pyopenapi/utility.py index f134aab4b..f60a33bb7 100644 --- a/docs/openapi_generator/pyopenapi/utility.py +++ b/docs/openapi_generator/pyopenapi/utility.py @@ -6,16 +6,19 @@ import json import typing +import inspect +import os from pathlib import Path from typing import TextIO +from typing import Any, Dict, List, Optional, Protocol, Type, Union, get_type_hints, get_origin, get_args from llama_stack.strong_typing.schema import object_to_json, StrictJsonType +from llama_stack.distribution.resolver import api_protocol_map from .generator import Generator from .options import Options from .specification import Document - THIS_DIR = Path(__file__).parent @@ -114,3 +117,37 @@ class Specification: ) f.write(html) + +def is_optional_type(type_: Any) -> bool: + """Check if a type is Optional.""" + origin = get_origin(type_) + args = get_args(type_) + return origin is Optional or (origin is Union and type(None) in args) + + +def validate_api_method_return_types() -> List[str]: + """Validate that all API methods have proper return types.""" + errors = [] + protocols = api_protocol_map() + + for protocol_name, protocol in protocols.items(): + methods = inspect.getmembers(protocol, predicate=inspect.isfunction) + + for method_name, method in methods: + if not hasattr(method, '__webmethod__'): + continue + + # Only check GET methods + if method.__webmethod__.method != "GET": + continue + + hints = get_type_hints(method) + + if 'return' not in hints: + errors.append(f"Method {protocol_name}.{method_name} has no return type annotation") + else: + return_type = hints['return'] + if is_optional_type(return_type): + errors.append(f"Method {protocol_name}.{method_name} returns Optional type") + + return errors diff --git a/docs/source/index.md b/docs/source/index.md index 0a8fcb30c..12a27bd2b 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -15,8 +15,6 @@ Llama Stack defines and standardizes the core building blocks needed to bring ge - **Multiple developer interfaces** like CLI and SDKs for Python, Node, iOS, and Android - **Standalone applications** as examples for how to build production-grade AI applications with Llama Stack -We focus on making it easy to build production applications with the Llama model family - from the latest Llama 3.3 to specialized models like Llama Guard for safety. - ```{image} ../_static/llama-stack.png :alt: Llama Stack :width: 400px diff --git a/docs/source/introduction/index.md b/docs/source/introduction/index.md index 686f44cc4..5ffa5e68d 100644 --- a/docs/source/introduction/index.md +++ b/docs/source/introduction/index.md @@ -48,7 +48,7 @@ Llama Stack addresses these challenges through a service-oriented, API-first app **Robust Ecosystem** - Llama Stack is already integrated with distribution partners (cloud providers, hardware vendors, and AI-focused companies). -- Ecosystem offers tailored infrastructure, software, and services for deploying Llama models. +- Ecosystem offers tailored infrastructure, software, and services for deploying a variety of models. ### Our Philosophy @@ -57,7 +57,6 @@ Llama Stack addresses these challenges through a service-oriented, API-first app - **Composability**: Every component is independent but works together seamlessly - **Production Ready**: Built for real-world applications, not just demos - **Turnkey Solutions**: Easy to deploy built in solutions for popular deployment scenarios -- **Llama First**: Explicit focus on Meta's Llama models and partnering ecosystem With Llama Stack, you can focus on building your application while we handle the infrastructure complexity, essential capabilities, and provider integrations. diff --git a/docs/source/providers/vector_io/qdrant.md b/docs/source/providers/vector_io/qdrant.md index a0de0be98..8b0cbeef8 100644 --- a/docs/source/providers/vector_io/qdrant.md +++ b/docs/source/providers/vector_io/qdrant.md @@ -3,21 +3,36 @@ orphan: true --- # Qdrant -[Qdrant](https://qdrant.tech/documentation/) is a remote vector database provider for Llama Stack. It +[Qdrant](https://qdrant.tech/documentation/) is an inline and remote vector database provider for Llama Stack. It allows you to store and query vectors directly in memory. That means you'll get fast and efficient vector retrieval. +> By default, Qdrant stores vectors in RAM, delivering incredibly fast access for datasets that fit comfortably in +> memory. But when your dataset exceeds RAM capacity, Qdrant offers Memmap as an alternative. +> +> \[[An Introduction to Vector Databases](https://qdrant.tech/articles/what-is-a-vector-database/)\] + + + ## Features -- Easy to use +- Lightweight and easy to use - Fully integrated with Llama Stack +- Apache 2.0 license terms +- Store embeddings and their metadata +- Supports search by + [Keyword](https://qdrant.tech/articles/qdrant-introduces-full-text-filters-and-indexes/) + and [Hybrid](https://qdrant.tech/articles/hybrid-search/#building-a-hybrid-search-system-in-qdrant) search +- [Multilingual and Multimodal retrieval](https://qdrant.tech/documentation/multimodal-search/) +- [Medatata filtering](https://qdrant.tech/articles/vector-search-filtering/) +- [GPU support](https://qdrant.tech/documentation/guides/running-with-gpu/) ## Usage To use Qdrant in your Llama Stack project, follow these steps: 1. Install the necessary dependencies. -2. Configure your Llama Stack project to use Faiss. +2. Configure your Llama Stack project to use Qdrant. 3. Start storing and querying vectors. ## Installation diff --git a/llama_stack/apis/benchmarks/benchmarks.py b/llama_stack/apis/benchmarks/benchmarks.py index 39ba355e9..809af8868 100644 --- a/llama_stack/apis/benchmarks/benchmarks.py +++ b/llama_stack/apis/benchmarks/benchmarks.py @@ -52,7 +52,7 @@ class Benchmarks(Protocol): async def get_benchmark( self, benchmark_id: str, - ) -> Optional[Benchmark]: ... + ) -> Benchmark: ... @webmethod(route="/eval/benchmarks", method="POST") async def register_benchmark( diff --git a/llama_stack/apis/datasets/datasets.py b/llama_stack/apis/datasets/datasets.py index d033d0b70..e2c940f64 100644 --- a/llama_stack/apis/datasets/datasets.py +++ b/llama_stack/apis/datasets/datasets.py @@ -121,8 +121,6 @@ class Dataset(CommonDatasetFields, Resource): class DatasetInput(CommonDatasetFields, BaseModel): dataset_id: str - provider_id: Optional[str] = None - provider_dataset_id: Optional[str] = None class ListDatasetsResponse(BaseModel): @@ -201,7 +199,7 @@ class Datasets(Protocol): async def get_dataset( self, dataset_id: str, - ) -> Optional[Dataset]: ... + ) -> Dataset: ... @webmethod(route="/datasets", method="GET") async def list_datasets(self) -> ListDatasetsResponse: ... diff --git a/llama_stack/apis/eval/eval.py b/llama_stack/apis/eval/eval.py index dec018d83..51c38b16a 100644 --- a/llama_stack/apis/eval/eval.py +++ b/llama_stack/apis/eval/eval.py @@ -117,7 +117,7 @@ class Eval(Protocol): """ @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET") - async def job_status(self, benchmark_id: str, job_id: str) -> Optional[JobStatus]: + async def job_status(self, benchmark_id: str, job_id: str) -> JobStatus: """Get the status of a job. :param benchmark_id: The ID of the benchmark to run the evaluation on. diff --git a/llama_stack/apis/files/files.py b/llama_stack/apis/files/files.py index f17fadc8c..65c1ead6a 100644 --- a/llama_stack/apis/files/files.py +++ b/llama_stack/apis/files/files.py @@ -115,7 +115,7 @@ class Files(Protocol): async def get_upload_session_info( self, upload_id: str, - ) -> Optional[FileUploadResponse]: + ) -> FileUploadResponse: """ Returns information about an existsing upload session diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index 64b9510ea..893ebc179 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -66,7 +66,7 @@ class Models(Protocol): async def get_model( self, model_id: str, - ) -> Optional[Model]: ... + ) -> Model: ... @webmethod(route="/models", method="POST") async def register_model( diff --git a/llama_stack/apis/post_training/post_training.py b/llama_stack/apis/post_training/post_training.py index ed15c6de4..362f87a26 100644 --- a/llama_stack/apis/post_training/post_training.py +++ b/llama_stack/apis/post_training/post_training.py @@ -6,7 +6,7 @@ from datetime import datetime from enum import Enum -from typing import Any, Dict, List, Literal, Optional, Protocol, Union +from typing import Any, Dict, List, Literal, Optional, Protocol from pydantic import BaseModel, Field from typing_extensions import Annotated @@ -89,7 +89,7 @@ class QATFinetuningConfig(BaseModel): AlgorithmConfig = register_schema( - Annotated[Union[LoraFinetuningConfig, QATFinetuningConfig], Field(discriminator="type")], + Annotated[LoraFinetuningConfig | QATFinetuningConfig, Field(discriminator="type")], name="AlgorithmConfig", ) @@ -184,7 +184,7 @@ class PostTraining(Protocol): description="Model descriptor from `llama model list`", ), checkpoint_dir: Optional[str] = None, - algorithm_config: Optional[AlgorithmConfig] = None, + algorithm_config: Optional[LoraFinetuningConfig | QATFinetuningConfig] = None, ) -> PostTrainingJob: ... @webmethod(route="/post-training/preference-optimize", method="POST") @@ -202,10 +202,10 @@ class PostTraining(Protocol): async def get_training_jobs(self) -> ListPostTrainingJobsResponse: ... @webmethod(route="/post-training/job/status", method="GET") - async def get_training_job_status(self, job_uuid: str) -> Optional[PostTrainingJobStatusResponse]: ... + async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse: ... @webmethod(route="/post-training/job/cancel", method="POST") async def cancel_training_job(self, job_uuid: str) -> None: ... @webmethod(route="/post-training/job/artifacts", method="GET") - async def get_training_job_artifacts(self, job_uuid: str) -> Optional[PostTrainingJobArtifactsResponse]: ... + async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: ... diff --git a/llama_stack/apis/scoring_functions/scoring_functions.py b/llama_stack/apis/scoring_functions/scoring_functions.py index 52508d2ec..b02a7a0c4 100644 --- a/llama_stack/apis/scoring_functions/scoring_functions.py +++ b/llama_stack/apis/scoring_functions/scoring_functions.py @@ -135,7 +135,7 @@ class ScoringFunctions(Protocol): async def list_scoring_functions(self) -> ListScoringFunctionsResponse: ... @webmethod(route="/scoring-functions/{scoring_fn_id:path}", method="GET") - async def get_scoring_function(self, scoring_fn_id: str, /) -> Optional[ScoringFn]: ... + async def get_scoring_function(self, scoring_fn_id: str, /) -> ScoringFn: ... @webmethod(route="/scoring-functions", method="POST") async def register_scoring_function( diff --git a/llama_stack/apis/shields/shields.py b/llama_stack/apis/shields/shields.py index ec1179ac4..67f3bd27b 100644 --- a/llama_stack/apis/shields/shields.py +++ b/llama_stack/apis/shields/shields.py @@ -49,7 +49,7 @@ class Shields(Protocol): async def list_shields(self) -> ListShieldsResponse: ... @webmethod(route="/shields/{identifier:path}", method="GET") - async def get_shield(self, identifier: str) -> Optional[Shield]: ... + async def get_shield(self, identifier: str) -> Shield: ... @webmethod(route="/shields", method="POST") async def register_shield( diff --git a/llama_stack/apis/tools/rag_tool.py b/llama_stack/apis/tools/rag_tool.py index 2b9ef10d8..671e19619 100644 --- a/llama_stack/apis/tools/rag_tool.py +++ b/llama_stack/apis/tools/rag_tool.py @@ -17,6 +17,15 @@ from llama_stack.schema_utils import json_schema_type, register_schema, webmetho @json_schema_type class RAGDocument(BaseModel): + """ + A document to be used for document ingestion in the RAG Tool. + + :param document_id: The unique identifier for the document. + :param content: The content of the document. + :param mime_type: The MIME type of the document. + :param metadata: Additional metadata for the document. + """ + document_id: str content: InterleavedContent | URL mime_type: str | None = None diff --git a/llama_stack/apis/vector_dbs/vector_dbs.py b/llama_stack/apis/vector_dbs/vector_dbs.py index 9a4aa322f..fe6c33919 100644 --- a/llama_stack/apis/vector_dbs/vector_dbs.py +++ b/llama_stack/apis/vector_dbs/vector_dbs.py @@ -50,7 +50,7 @@ class VectorDBs(Protocol): async def get_vector_db( self, vector_db_id: str, - ) -> Optional[VectorDB]: ... + ) -> VectorDB: ... @webmethod(route="/vector-dbs", method="POST") async def register_vector_db( diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index 7e1d8c016..e16e047e5 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -125,6 +125,13 @@ class LoggingConfig(BaseModel): ) +class AuthenticationConfig(BaseModel): + endpoint: str = Field( + ..., + description="Endpoint URL to validate authentication tokens", + ) + + class ServerConfig(BaseModel): port: int = Field( default=8321, @@ -140,6 +147,10 @@ class ServerConfig(BaseModel): default=None, description="Path to TLS key file for HTTPS", ) + auth: Optional[AuthenticationConfig] = Field( + default=None, + description="Authentication configuration for the server", + ) class StackRunConfig(BaseModel): diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index 533993421..7aef2f8d5 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -20,6 +20,8 @@ from llama_stack.apis.datasets import ( DatasetType, DataSource, ListDatasetsResponse, + RowsDataSource, + URIDataSource, ) from llama_stack.apis.models import ListModelsResponse, Model, Models, ModelType from llama_stack.apis.resource import ResourceType @@ -219,8 +221,11 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): async def list_models(self) -> ListModelsResponse: return ListModelsResponse(data=await self.get_all_with_type("model")) - async def get_model(self, model_id: str) -> Optional[Model]: - return await self.get_object_by_identifier("model", model_id) + async def get_model(self, model_id: str) -> Model: + model = await self.get_object_by_identifier("model", model_id) + if model is None: + raise ValueError(f"Model '{model_id}' not found") + return model async def register_model( self, @@ -267,8 +272,11 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): async def list_shields(self) -> ListShieldsResponse: return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value)) - async def get_shield(self, identifier: str) -> Optional[Shield]: - return await self.get_object_by_identifier("shield", identifier) + async def get_shield(self, identifier: str) -> Shield: + shield = await self.get_object_by_identifier("shield", identifier) + if shield is None: + raise ValueError(f"Shield '{identifier}' not found") + return shield async def register_shield( self, @@ -303,8 +311,11 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs): async def list_vector_dbs(self) -> ListVectorDBsResponse: return ListVectorDBsResponse(data=await self.get_all_with_type("vector_db")) - async def get_vector_db(self, vector_db_id: str) -> Optional[VectorDB]: - return await self.get_object_by_identifier("vector_db", vector_db_id) + async def get_vector_db(self, vector_db_id: str) -> VectorDB: + vector_db = await self.get_object_by_identifier("vector_db", vector_db_id) + if vector_db is None: + raise ValueError(f"Vector DB '{vector_db_id}' not found") + return vector_db async def register_vector_db( self, @@ -355,8 +366,11 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets): async def list_datasets(self) -> ListDatasetsResponse: return ListDatasetsResponse(data=await self.get_all_with_type(ResourceType.dataset.value)) - async def get_dataset(self, dataset_id: str) -> Optional[Dataset]: - return await self.get_object_by_identifier("dataset", dataset_id) + async def get_dataset(self, dataset_id: str) -> Dataset: + dataset = await self.get_object_by_identifier("dataset", dataset_id) + if dataset is None: + raise ValueError(f"Dataset '{dataset_id}' not found") + return dataset async def register_dataset( self, @@ -365,6 +379,12 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets): metadata: Optional[Dict[str, Any]] = None, dataset_id: Optional[str] = None, ) -> Dataset: + if isinstance(source, dict): + if source["type"] == "uri": + source = URIDataSource.parse_obj(source) + elif source["type"] == "rows": + source = RowsDataSource.parse_obj(source) + if not dataset_id: dataset_id = f"dataset-{str(uuid.uuid4())}" @@ -408,8 +428,11 @@ class ScoringFunctionsRoutingTable(CommonRoutingTableImpl, ScoringFunctions): async def list_scoring_functions(self) -> ListScoringFunctionsResponse: return ListScoringFunctionsResponse(data=await self.get_all_with_type(ResourceType.scoring_function.value)) - async def get_scoring_function(self, scoring_fn_id: str) -> Optional[ScoringFn]: - return await self.get_object_by_identifier("scoring_function", scoring_fn_id) + async def get_scoring_function(self, scoring_fn_id: str) -> ScoringFn: + scoring_fn = await self.get_object_by_identifier("scoring_function", scoring_fn_id) + if scoring_fn is None: + raise ValueError(f"Scoring function '{scoring_fn_id}' not found") + return scoring_fn async def register_scoring_function( self, @@ -445,8 +468,11 @@ class BenchmarksRoutingTable(CommonRoutingTableImpl, Benchmarks): async def list_benchmarks(self) -> ListBenchmarksResponse: return ListBenchmarksResponse(data=await self.get_all_with_type("benchmark")) - async def get_benchmark(self, benchmark_id: str) -> Optional[Benchmark]: - return await self.get_object_by_identifier("benchmark", benchmark_id) + async def get_benchmark(self, benchmark_id: str) -> Benchmark: + benchmark = await self.get_object_by_identifier("benchmark", benchmark_id) + if benchmark is None: + raise ValueError(f"Benchmark '{benchmark_id}' not found") + return benchmark async def register_benchmark( self, @@ -490,7 +516,10 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups): return ListToolGroupsResponse(data=await self.get_all_with_type("tool_group")) async def get_tool_group(self, toolgroup_id: str) -> ToolGroup: - return await self.get_object_by_identifier("tool_group", toolgroup_id) + tool_group = await self.get_object_by_identifier("tool_group", toolgroup_id) + if tool_group is None: + raise ValueError(f"Tool group '{toolgroup_id}' not found") + return tool_group async def get_tool(self, tool_name: str) -> Tool: return await self.get_object_by_identifier("tool", tool_name) diff --git a/llama_stack/distribution/server/auth.py b/llama_stack/distribution/server/auth.py new file mode 100644 index 000000000..bb577bae5 --- /dev/null +++ b/llama_stack/distribution/server/auth.py @@ -0,0 +1,69 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +from urllib.parse import parse_qs + +import httpx + +from llama_stack.log import get_logger + +logger = get_logger(name=__name__, category="auth") + + +class AuthenticationMiddleware: + def __init__(self, app, auth_endpoint): + self.app = app + self.auth_endpoint = auth_endpoint + + async def __call__(self, scope, receive, send): + if scope["type"] == "http": + headers = dict(scope.get("headers", [])) + auth_header = headers.get(b"authorization", b"").decode() + + if not auth_header or not auth_header.startswith("Bearer "): + return await self._send_auth_error(send, "Missing or invalid Authorization header") + + api_key = auth_header.split("Bearer ", 1)[1] + + path = scope.get("path", "") + request_headers = {k.decode(): v.decode() for k, v in headers.items()} + + query_string = scope.get("query_string", b"").decode() + params = parse_qs(query_string) + + auth_data = { + "api_key": api_key, + "request": { + "path": path, + "headers": request_headers, + "params": params, + }, + } + + # Validate with authentication endpoint + try: + async with httpx.AsyncClient() as client: + response = await client.post(self.auth_endpoint, json=auth_data) + if response.status_code != 200: + logger.warning(f"Authentication failed: {response.status_code}") + return await self._send_auth_error(send, "Authentication failed") + except Exception: + logger.exception("Error during authentication") + return await self._send_auth_error(send, "Authentication service error") + + return await self.app(scope, receive, send) + + async def _send_auth_error(self, send, message): + await send( + { + "type": "http.response.start", + "status": 401, + "headers": [[b"content-type", b"application/json"]], + } + ) + error_msg = json.dumps({"error": {"message": message}}).encode() + await send({"type": "http.response.body", "body": error_msg}) diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index b37b3a007..460acbc87 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -52,6 +52,7 @@ from llama_stack.providers.utils.telemetry.tracing import ( start_trace, ) +from .auth import AuthenticationMiddleware from .endpoints import get_all_api_endpoints REPO_ROOT = Path(__file__).parent.parent.parent.parent @@ -351,6 +352,11 @@ def main(): if not os.environ.get("LLAMA_STACK_DISABLE_VERSION_CHECK"): app.add_middleware(ClientVersionMiddleware) + # Add authentication middleware if configured + if config.server.auth and config.server.auth.endpoint: + logger.info(f"Enabling authentication with endpoint: {config.server.auth.endpoint}") + app.add_middleware(AuthenticationMiddleware, auth_endpoint=config.server.auth.endpoint) + try: impls = asyncio.run(construct_stack(config)) except InvalidProviderError as e: diff --git a/llama_stack/distribution/ui/page/distribution/datasets.py b/llama_stack/distribution/ui/page/distribution/datasets.py index b583c93fd..6842b29a7 100644 --- a/llama_stack/distribution/ui/page/distribution/datasets.py +++ b/llama_stack/distribution/ui/page/distribution/datasets.py @@ -5,7 +5,8 @@ # the root directory of this source tree. import streamlit as st -from modules.api import llama_stack_api + +from llama_stack.distribution.ui.modules.api import llama_stack_api def datasets(): diff --git a/llama_stack/distribution/ui/page/distribution/eval_tasks.py b/llama_stack/distribution/ui/page/distribution/eval_tasks.py index 1428ae9ab..492be4700 100644 --- a/llama_stack/distribution/ui/page/distribution/eval_tasks.py +++ b/llama_stack/distribution/ui/page/distribution/eval_tasks.py @@ -5,7 +5,8 @@ # the root directory of this source tree. import streamlit as st -from modules.api import llama_stack_api + +from llama_stack.distribution.ui.modules.api import llama_stack_api def benchmarks(): diff --git a/llama_stack/distribution/ui/page/distribution/models.py b/llama_stack/distribution/ui/page/distribution/models.py index 3141c1627..f29459098 100644 --- a/llama_stack/distribution/ui/page/distribution/models.py +++ b/llama_stack/distribution/ui/page/distribution/models.py @@ -5,7 +5,8 @@ # the root directory of this source tree. import streamlit as st -from modules.api import llama_stack_api + +from llama_stack.distribution.ui.modules.api import llama_stack_api def models(): diff --git a/llama_stack/distribution/ui/page/distribution/providers.py b/llama_stack/distribution/ui/page/distribution/providers.py index 9aeb7f2a5..c660cb986 100644 --- a/llama_stack/distribution/ui/page/distribution/providers.py +++ b/llama_stack/distribution/ui/page/distribution/providers.py @@ -5,7 +5,8 @@ # the root directory of this source tree. import streamlit as st -from modules.api import llama_stack_api + +from llama_stack.distribution.ui.modules.api import llama_stack_api def providers(): diff --git a/llama_stack/distribution/ui/page/distribution/resources.py b/llama_stack/distribution/ui/page/distribution/resources.py index 684270d4d..5e10e6e80 100644 --- a/llama_stack/distribution/ui/page/distribution/resources.py +++ b/llama_stack/distribution/ui/page/distribution/resources.py @@ -4,14 +4,15 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from page.distribution.benchmarks import benchmarks -from page.distribution.datasets import datasets -from page.distribution.models import models -from page.distribution.scoring_functions import scoring_functions -from page.distribution.shields import shields -from page.distribution.vector_dbs import vector_dbs from streamlit_option_menu import option_menu +from llama_stack.distribution.ui.page.distribution.datasets import datasets +from llama_stack.distribution.ui.page.distribution.eval_tasks import benchmarks +from llama_stack.distribution.ui.page.distribution.models import models +from llama_stack.distribution.ui.page.distribution.scoring_functions import scoring_functions +from llama_stack.distribution.ui.page.distribution.shields import shields +from llama_stack.distribution.ui.page.distribution.vector_dbs import vector_dbs + def resources_page(): options = [ diff --git a/llama_stack/distribution/ui/page/distribution/scoring_functions.py b/llama_stack/distribution/ui/page/distribution/scoring_functions.py index 6a2a08c6d..193146356 100644 --- a/llama_stack/distribution/ui/page/distribution/scoring_functions.py +++ b/llama_stack/distribution/ui/page/distribution/scoring_functions.py @@ -5,7 +5,8 @@ # the root directory of this source tree. import streamlit as st -from modules.api import llama_stack_api + +from llama_stack.distribution.ui.modules.api import llama_stack_api def scoring_functions(): diff --git a/llama_stack/distribution/ui/page/distribution/shields.py b/llama_stack/distribution/ui/page/distribution/shields.py index b5ed27ef9..67d66d64f 100644 --- a/llama_stack/distribution/ui/page/distribution/shields.py +++ b/llama_stack/distribution/ui/page/distribution/shields.py @@ -5,7 +5,8 @@ # the root directory of this source tree. import streamlit as st -from modules.api import llama_stack_api + +from llama_stack.distribution.ui.modules.api import llama_stack_api def shields(): diff --git a/llama_stack/distribution/ui/page/distribution/vector_dbs.py b/llama_stack/distribution/ui/page/distribution/vector_dbs.py index 1c9d06e8d..49a4f25bb 100644 --- a/llama_stack/distribution/ui/page/distribution/vector_dbs.py +++ b/llama_stack/distribution/ui/page/distribution/vector_dbs.py @@ -5,7 +5,8 @@ # the root directory of this source tree. import streamlit as st -from modules.api import llama_stack_api + +from llama_stack.distribution.ui.modules.api import llama_stack_api def vector_dbs(): diff --git a/llama_stack/distribution/ui/page/evaluations/app_eval.py b/llama_stack/distribution/ui/page/evaluations/app_eval.py index 26bc28451..d7bc6388c 100644 --- a/llama_stack/distribution/ui/page/evaluations/app_eval.py +++ b/llama_stack/distribution/ui/page/evaluations/app_eval.py @@ -8,8 +8,9 @@ import json import pandas as pd import streamlit as st -from modules.api import llama_stack_api -from modules.utils import process_dataset + +from llama_stack.distribution.ui.modules.api import llama_stack_api +from llama_stack.distribution.ui.modules.utils import process_dataset def application_evaluation_page(): diff --git a/llama_stack/distribution/ui/page/evaluations/native_eval.py b/llama_stack/distribution/ui/page/evaluations/native_eval.py index 7c39adc4a..97f875e17 100644 --- a/llama_stack/distribution/ui/page/evaluations/native_eval.py +++ b/llama_stack/distribution/ui/page/evaluations/native_eval.py @@ -8,7 +8,8 @@ import json import pandas as pd import streamlit as st -from modules.api import llama_stack_api + +from llama_stack.distribution.ui.modules.api import llama_stack_api def select_benchmark_1(): diff --git a/llama_stack/distribution/ui/page/playground/chat.py b/llama_stack/distribution/ui/page/playground/chat.py index e69f559db..8e7345169 100644 --- a/llama_stack/distribution/ui/page/playground/chat.py +++ b/llama_stack/distribution/ui/page/playground/chat.py @@ -5,7 +5,8 @@ # the root directory of this source tree. import streamlit as st -from modules.api import llama_stack_api + +from llama_stack.distribution.ui.modules.api import llama_stack_api # Sidebar configurations with st.sidebar: diff --git a/llama_stack/distribution/ui/page/playground/rag.py b/llama_stack/distribution/ui/page/playground/rag.py index 7ee934fb7..e2f451668 100644 --- a/llama_stack/distribution/ui/page/playground/rag.py +++ b/llama_stack/distribution/ui/page/playground/rag.py @@ -7,9 +7,10 @@ import streamlit as st from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.lib.agents.event_logger import EventLogger -from llama_stack_client.types.memory_insert_params import Document -from modules.api import llama_stack_api -from modules.utils import data_url_from_file +from llama_stack_client.types.shared.document import Document + +from llama_stack.distribution.ui.modules.api import llama_stack_api +from llama_stack.distribution.ui.modules.utils import data_url_from_file def rag_chat_page(): diff --git a/llama_stack/models/llama/datatypes.py b/llama_stack/models/llama/datatypes.py index b25bf0ea9..9842d7980 100644 --- a/llama_stack/models/llama/datatypes.py +++ b/llama_stack/models/llama/datatypes.py @@ -47,7 +47,14 @@ RecursiveType = Union[Primitive, List[Primitive], Dict[str, Primitive]] class ToolCall(BaseModel): call_id: str tool_name: Union[BuiltinTool, str] - arguments: Dict[str, RecursiveType] + # Plan is to deprecate the Dict in favor of a JSON string + # that is parsed on the client side instead of trying to manage + # the recursive type here. + # Making this a union so that client side can start prepping for this change. + # Eventually, we will remove both the Dict and arguments_json field, + # and arguments will just be a str + arguments: Union[str, Dict[str, RecursiveType]] + arguments_json: Optional[str] = None @field_validator("tool_name", mode="before") @classmethod diff --git a/llama_stack/models/llama/llama3/chat_format.py b/llama_stack/models/llama/llama3/chat_format.py index 011ccb02a..2862f8558 100644 --- a/llama_stack/models/llama/llama3/chat_format.py +++ b/llama_stack/models/llama/llama3/chat_format.py @@ -12,6 +12,7 @@ # the top-level of this source tree. import io +import json import uuid from dataclasses import dataclass from typing import Dict, List, Optional, Tuple @@ -203,9 +204,10 @@ class ChatFormat: # This code tries to handle that case if tool_name in BuiltinTool.__members__: tool_name = BuiltinTool[tool_name] - tool_arguments = { - "query": list(tool_arguments.values())[0], - } + if isinstance(tool_arguments, dict): + tool_arguments = { + "query": list(tool_arguments.values())[0], + } else: builtin_tool_info = ToolUtils.maybe_extract_builtin_tool_call(content) if builtin_tool_info is not None: @@ -229,6 +231,7 @@ class ChatFormat: call_id=call_id, tool_name=tool_name, arguments=tool_arguments, + arguments_json=json.dumps(tool_arguments), ) ) content = "" diff --git a/llama_stack/models/llama/llama3/template_data.py b/llama_stack/models/llama/llama3/template_data.py index aa16aa009..076b4adb4 100644 --- a/llama_stack/models/llama/llama3/template_data.py +++ b/llama_stack/models/llama/llama3/template_data.py @@ -11,11 +11,8 @@ # top-level folder for each specific model found within the models/ directory at # the top-level of this source tree. -from llama_stack.models.llama.datatypes import ( - BuiltinTool, - StopReason, - ToolCall, -) + +from llama_stack.models.llama.datatypes import BuiltinTool, StopReason, ToolCall from .prompt_templates import ( BuiltinToolGenerator, diff --git a/llama_stack/providers/inline/datasetio/localfs/datasetio.py b/llama_stack/providers/inline/datasetio/localfs/datasetio.py index cf4bf7fec..f489739bf 100644 --- a/llama_stack/providers/inline/datasetio/localfs/datasetio.py +++ b/llama_stack/providers/inline/datasetio/localfs/datasetio.py @@ -35,12 +35,12 @@ class PandasDataframeDataset: else: return self.df.iloc[idx].to_dict() - def load(self) -> None: + async def load(self) -> None: if self.df is not None: return if self.dataset_def.source.type == "uri": - self.df = get_dataframe_from_uri(self.dataset_def.source.uri) + self.df = await get_dataframe_from_uri(self.dataset_def.source.uri) elif self.dataset_def.source.type == "rows": self.df = pandas.DataFrame(self.dataset_def.source.rows) else: @@ -95,7 +95,7 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): ) -> IterrowsResponse: dataset_def = self.dataset_infos[dataset_id] dataset_impl = PandasDataframeDataset(dataset_def) - dataset_impl.load() + await dataset_impl.load() start_index = start_index or 0 @@ -114,7 +114,7 @@ class LocalFSDatasetIOImpl(DatasetIO, DatasetsProtocolPrivate): async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None: dataset_def = self.dataset_infos[dataset_id] dataset_impl = PandasDataframeDataset(dataset_def) - dataset_impl.load() + await dataset_impl.load() new_rows_df = pandas.DataFrame(rows) dataset_impl.df = pandas.concat([dataset_impl.df, new_rows_df], ignore_index=True) diff --git a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py index 738f9ddcd..e8767c2ff 100644 --- a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +++ b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py @@ -10,6 +10,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import copy import json import logging import multiprocessing @@ -213,7 +214,7 @@ def maybe_parse_message(maybe_json: Optional[str]) -> Optional[ProcessingMessage def parse_message(json_str: str) -> ProcessingMessage: data = json.loads(json_str) - return ProcessingMessageWrapper(**data).payload + return copy.deepcopy(ProcessingMessageWrapper(**data).payload) def worker_process_entrypoint( diff --git a/llama_stack/providers/inline/inference/vllm/vllm.py b/llama_stack/providers/inline/inference/vllm/vllm.py index b59df13d0..256e0f821 100644 --- a/llama_stack/providers/inline/inference/vllm/vllm.py +++ b/llama_stack/providers/inline/inference/vllm/vllm.py @@ -582,6 +582,7 @@ class VLLMInferenceImpl(Inference, ModelsProtocolPrivate): tool_name=t.function.name, # vLLM function args come back as a string. Llama Stack expects JSON. arguments=json.loads(t.function.arguments), + arguments_json=t.function.arguments, ) for t in vllm_message.tool_calls ], diff --git a/llama_stack/providers/inline/post_training/common/validator.py b/llama_stack/providers/inline/post_training/common/validator.py index e76edf3a0..b0aec6187 100644 --- a/llama_stack/providers/inline/post_training/common/validator.py +++ b/llama_stack/providers/inline/post_training/common/validator.py @@ -9,6 +9,9 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. + +from typing import Any + from llama_stack.apis.common.type_system import ( ChatCompletionInputType, DialogType, @@ -20,7 +23,7 @@ from llama_stack.providers.utils.common.data_schema_validator import ( validate_dataset_schema, ) -EXPECTED_DATASET_SCHEMA = { +EXPECTED_DATASET_SCHEMA: dict[str, list[dict[str, Any]]] = { "instruct": [ { ColumnName.chat_completion_input.value: ChatCompletionInputType(), @@ -41,6 +44,9 @@ async def validate_input_dataset_schema( dataset_type: str, ) -> None: dataset_def = await datasets_api.get_dataset(dataset_id=dataset_id) + if not dataset_def: + raise ValueError(f"Dataset {dataset_id} does not exist.") + if not dataset_def.dataset_schema or len(dataset_def.dataset_schema) == 0: raise ValueError(f"Dataset {dataset_id} does not have a schema defined.") diff --git a/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py b/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py index 64d61b053..fcadd0884 100644 --- a/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py +++ b/llama_stack/providers/inline/post_training/torchtune/common/checkpointer.py @@ -37,7 +37,7 @@ class TorchtuneCheckpointer: checkpoint_files: List[str], output_dir: str, model_type: str, - ) -> None: + ): # Fail fast if ``checkpoint_files`` is invalid # TODO: support loading more than one file if len(checkpoint_files) != 1: @@ -58,7 +58,7 @@ class TorchtuneCheckpointer: """ Load Meta checkpoint from file. Currently only loading from a single file is supported. """ - state_dict: Dict[str:Any] = {} + state_dict: Dict[str, Any] = {} model_state_dict = safe_torch_load(self._checkpoint_path) if self._model_type == ModelType.LLAMA3_VISION: from torchtune.models.llama3_2_vision._convert_weights import ( @@ -85,10 +85,10 @@ class TorchtuneCheckpointer: state_dict: Dict[str, Any], epoch: int, adapter_only: bool = False, - checkpoint_format: str = "meta", + checkpoint_format: str | None = None, ) -> str: model_file_path = Path(self._output_dir) / f"{self._model_id}-{self._training_algorithm}-{epoch}" - if checkpoint_format == "meta": + if checkpoint_format == "meta" or checkpoint_format is None: self._save_meta_format_checkpoint(model_file_path, state_dict, adapter_only) elif checkpoint_format == "huggingface": # Note: for saving hugging face format checkpoints, we only suppport saving adapter weights now diff --git a/llama_stack/providers/inline/post_training/torchtune/common/utils.py b/llama_stack/providers/inline/post_training/torchtune/common/utils.py index 98e16f9d7..f8a1c0436 100644 --- a/llama_stack/providers/inline/post_training/torchtune/common/utils.py +++ b/llama_stack/providers/inline/post_training/torchtune/common/utils.py @@ -10,7 +10,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Any, Callable, Dict +from typing import Callable, Dict import torch from pydantic import BaseModel @@ -25,10 +25,13 @@ from llama_stack.apis.post_training import DatasetFormat from llama_stack.models.llama.datatypes import Model from llama_stack.models.llama.sku_list import resolve_model +BuildLoraModelCallable = Callable[..., torch.nn.Module] +BuildTokenizerCallable = Callable[..., Llama3Tokenizer] + class ModelConfig(BaseModel): - model_definition: Any - tokenizer_type: Any + model_definition: BuildLoraModelCallable + tokenizer_type: BuildTokenizerCallable checkpoint_type: str @@ -51,10 +54,6 @@ DATA_FORMATS: Dict[str, Transform] = { } -BuildLoraModelCallable = Callable[..., torch.nn.Module] -BuildTokenizerCallable = Callable[..., Llama3Tokenizer] - - def _validate_model_id(model_id: str) -> Model: model = resolve_model(model_id) if model is None or model.core_model_id.value not in MODEL_CONFIGS: diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py b/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py index b556b59a6..050996860 100644 --- a/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py +++ b/llama_stack/providers/inline/post_training/torchtune/datasets/sft.py @@ -55,7 +55,7 @@ class SFTDataset(Dataset): if "messages" in transformed_sample: validate_messages(transformed_sample["messages"]) - tokenized_dict = self._model_transform(transformed_sample) + tokenized_dict: dict[str, Any] = self._model_transform(transformed_sample) if not ("tokens" in tokenized_dict and "mask" in tokenized_dict): keys_str = ", ".join(tokenized_dict.keys()) diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py index 0f89b4064..edc1ceb90 100644 --- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +++ b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py @@ -37,10 +37,10 @@ from llama_stack.apis.common.training_types import PostTrainingMetric from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasets import Datasets from llama_stack.apis.post_training import ( - AlgorithmConfig, Checkpoint, LoraFinetuningConfig, OptimizerConfig, + QATFinetuningConfig, TrainingConfig, ) from llama_stack.distribution.utils.config_dirs import DEFAULT_CHECKPOINT_DIR @@ -73,6 +73,9 @@ class LoraFinetuningSingleDevice: # Currently logging only logs limited training metrics to local disk # will figure out more loggings and how it works with telemetry in future PRs + + _checkpointer: TorchtuneCheckpointer + def __init__( self, config: TorchtunePostTrainingConfig, @@ -82,7 +85,7 @@ class LoraFinetuningSingleDevice: logger_config: Dict[str, Any], model: str, checkpoint_dir: Optional[str], - algorithm_config: Optional[AlgorithmConfig], + algorithm_config: LoraFinetuningConfig | QATFinetuningConfig | None, datasetio_api: DatasetIO, datasets_api: Datasets, ) -> None: @@ -109,12 +112,12 @@ class LoraFinetuningSingleDevice: return str(checkpoint_dir) if checkpoint_dir and checkpoint_dir != "null": - self.checkpoint_dir = config.checkpoint_dir + self.checkpoint_dir = checkpoint_dir else: - model = resolve_model(self.model_id) - if model is None: + model_obj = resolve_model(self.model_id) + if model_obj is None: raise ValueError(f"{self.model_id} not found. Your model id should be in the llama models SKU list") - self.checkpoint_dir = model_checkpoint_dir(model) + self.checkpoint_dir = model_checkpoint_dir(model_obj) self._output_dir = str(DEFAULT_CHECKPOINT_DIR) self._checkpoint_format = config.checkpoint_format @@ -135,16 +138,16 @@ class LoraFinetuningSingleDevice: self.max_validation_steps = training_config.max_validation_steps self._clip_grad_norm = 1.0 - self._enable_activation_checkpointing = ( - (training_config.efficiency_config.enable_activation_checkpointing) - if training_config.efficiency_config - else False - ) - self._enable_activation_offloading = ( - (training_config.efficiency_config.enable_activation_offloading) - if training_config.efficiency_config - else False - ) + + self._enable_activation_checkpointing = False + self._enable_activation_offloading = False + if training_config.efficiency_config: + if training_config.efficiency_config.enable_activation_checkpointing: + self._enable_activation_checkpointing = ( + training_config.efficiency_config.enable_activation_checkpointing + ) + if training_config.efficiency_config.enable_activation_offloading: + self._enable_activation_offloading = training_config.efficiency_config.enable_activation_offloading self.datasetio_api = datasetio_api self.datasets_api = datasets_api @@ -451,12 +454,12 @@ class LoraFinetuningSingleDevice: """ # Initialize tokens count and running loss (for grad accumulation) t0 = time.perf_counter() - running_loss = 0 + running_loss: float = 0.0 num_tokens = 0 # training artifacts checkpoints = [] - memory_stats = {} + memory_stats: Dict[str, Any] = {} # self.epochs_run should be non-zero when we're resuming from a checkpoint for curr_epoch in range(self.epochs_run, self.total_epochs): @@ -484,7 +487,7 @@ class LoraFinetuningSingleDevice: # Loss is normalized by default so we multiply by the number of tokens # This way we can normalize by the total number of tokens if we're accumulating gradients current_loss = await self._loss_step(batch) * current_num_tokens - running_loss += current_loss + running_loss += current_loss.detach().item() current_loss.backward() # Step with optimizer @@ -500,7 +503,7 @@ class LoraFinetuningSingleDevice: # Update the number of steps when the weights are updated self.global_step += 1 - loss_to_log = running_loss.item() / num_tokens + loss_to_log = running_loss / num_tokens pbar.update(1) pbar.set_description(f"{curr_epoch + 1}|{self.global_step}|Loss: {loss_to_log}") @@ -523,7 +526,7 @@ class LoraFinetuningSingleDevice: ) # Reset running stats for the next step - running_loss = 0 + running_loss = 0.0 num_tokens = 0 t0 = time.perf_counter() diff --git a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py b/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py index 4b97914c5..9610b9b46 100644 --- a/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py +++ b/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py @@ -5,6 +5,7 @@ # the root directory of this source tree. +import asyncio import logging import os import tempfile @@ -37,7 +38,7 @@ class CodeInterpreterToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime): async def initialize(self): pass - async def register_tool(self, tool: Tool): + async def register_tool(self, tool: Tool) -> None: pass async def unregister_tool(self, tool_id: str) -> None: @@ -65,7 +66,7 @@ class CodeInterpreterToolRuntimeImpl(ToolsProtocolPrivate, ToolRuntime): # Use environment variable to control bwrap usage force_disable_bwrap = os.environ.get("DISABLE_CODE_SANDBOX", "").lower() in ("1", "true", "yes") req = CodeExecutionRequest(scripts=[script], use_bwrap=not force_disable_bwrap) - res = self.code_executor.execute(req) + res = await asyncio.to_thread(self.code_executor.execute, req) pieces = [res["process_status"]] for out_type in ["stdout", "stderr"]: res_out = res[out_type] diff --git a/llama_stack/providers/inline/vector_io/qdrant/__init__.py b/llama_stack/providers/inline/vector_io/qdrant/__init__.py new file mode 100644 index 000000000..8f0b91c61 --- /dev/null +++ b/llama_stack/providers/inline/vector_io/qdrant/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Dict + +from llama_stack.providers.datatypes import Api, ProviderSpec + +from .config import QdrantVectorIOConfig + + +async def get_adapter_impl(config: QdrantVectorIOConfig, deps: Dict[Api, ProviderSpec]): + from llama_stack.providers.remote.vector_io.qdrant.qdrant import QdrantVectorIOAdapter + + impl = QdrantVectorIOAdapter(config, deps[Api.inference]) + await impl.initialize() + return impl diff --git a/llama_stack/providers/inline/vector_io/qdrant/config.py b/llama_stack/providers/inline/vector_io/qdrant/config.py new file mode 100644 index 000000000..282e951b0 --- /dev/null +++ b/llama_stack/providers/inline/vector_io/qdrant/config.py @@ -0,0 +1,23 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + + +from typing import Any, Dict + +from pydantic import BaseModel + +from llama_stack.schema_utils import json_schema_type + + +@json_schema_type +class QdrantVectorIOConfig(BaseModel): + path: str + + @classmethod + def sample_run_config(cls, __distro_dir__: str) -> Dict[str, Any]: + return { + "path": "${env.QDRANT_PATH:~/.llama/" + __distro_dir__ + "}/" + "qdrant.db", + } diff --git a/llama_stack/providers/registry/vector_io.py b/llama_stack/providers/registry/vector_io.py index fbc495d83..93031763d 100644 --- a/llama_stack/providers/registry/vector_io.py +++ b/llama_stack/providers/registry/vector_io.py @@ -92,6 +92,14 @@ def available_providers() -> List[ProviderSpec]: ), api_dependencies=[Api.inference], ), + InlineProviderSpec( + api=Api.vector_io, + provider_type="inline::qdrant", + pip_packages=["qdrant-client"], + module="llama_stack.providers.inline.vector_io.qdrant", + config_class="llama_stack.providers.inline.vector_io.qdrant.QdrantVectorIOConfig", + api_dependencies=[Api.inference], + ), remote_provider_spec( Api.vector_io, AdapterSpec( diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index b59da79eb..69e6335c6 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -6,6 +6,7 @@ import logging import warnings +from functools import lru_cache from typing import AsyncIterator, List, Optional, Union from openai import APIConnectionError, AsyncOpenAI, BadRequestError @@ -82,12 +83,42 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): # ) self._config = config - # make sure the client lives longer than any async calls - self._client = AsyncOpenAI( - base_url=f"{self._config.url}/v1", - api_key=(self._config.api_key.get_secret_value() if self._config.api_key else "NO KEY"), - timeout=self._config.timeout, - ) + + @lru_cache # noqa: B019 + def _get_client(self, provider_model_id: str) -> AsyncOpenAI: + """ + For hosted models, https://integrate.api.nvidia.com/v1 is the primary base_url. However, + some models are hosted on different URLs. This function returns the appropriate client + for the given provider_model_id. + + This relies on lru_cache and self._default_client to avoid creating a new client for each request + or for each model that is hosted on https://integrate.api.nvidia.com/v1. + + :param provider_model_id: The provider model ID + :return: An OpenAI client + """ + + @lru_cache # noqa: B019 + def _get_client_for_base_url(base_url: str) -> AsyncOpenAI: + """ + Maintain a single OpenAI client per base_url. + """ + return AsyncOpenAI( + base_url=base_url, + api_key=(self._config.api_key.get_secret_value() if self._config.api_key else "NO KEY"), + timeout=self._config.timeout, + ) + + special_model_urls = { + "meta/llama-3.2-11b-vision-instruct": "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-11b-vision-instruct", + "meta/llama-3.2-90b-vision-instruct": "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct", + } + + base_url = f"{self._config.url}/v1" + if _is_nvidia_hosted(self._config) and provider_model_id in special_model_urls: + base_url = special_model_urls[provider_model_id] + + return _get_client_for_base_url(base_url) async def completion( self, @@ -105,9 +136,10 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): await check_health(self._config) # this raises errors + provider_model_id = self.get_provider_model_id(model_id) request = convert_completion_request( request=CompletionRequest( - model=self.get_provider_model_id(model_id), + model=provider_model_id, content=content, sampling_params=sampling_params, response_format=response_format, @@ -118,7 +150,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): ) try: - response = await self._client.completions.create(**request) + response = await self._get_client(provider_model_id).completions.create(**request) except APIConnectionError as e: raise ConnectionError(f"Failed to connect to NVIDIA NIM at {self._config.url}: {e}") from e @@ -206,6 +238,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): await check_health(self._config) # this raises errors + provider_model_id = self.get_provider_model_id(model_id) request = await convert_chat_completion_request( request=ChatCompletionRequest( model=self.get_provider_model_id(model_id), @@ -221,7 +254,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): ) try: - response = await self._client.chat.completions.create(**request) + response = await self._get_client(provider_model_id).chat.completions.create(**request) except APIConnectionError as e: raise ConnectionError(f"Failed to connect to NVIDIA NIM at {self._config.url}: {e}") from e diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index f940de7ba..eda1a179c 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -90,15 +90,12 @@ def _convert_to_vllm_tool_calls_in_response( if not tool_calls: return [] - call_function_arguments = None - for call in tool_calls: - call_function_arguments = json.loads(call.function.arguments) - return [ ToolCall( call_id=call.id, tool_name=call.function.name, - arguments=call_function_arguments, + arguments=json.loads(call.function.arguments), + arguments_json=call.function.arguments, ) for call in tool_calls ] @@ -183,6 +180,7 @@ async def _process_vllm_chat_completion_stream_response( call_id=tool_call_buf.call_id, tool_name=tool_call_buf.tool_name, arguments=args, + arguments_json=args_str, ), parse_status=ToolCallParseStatus.succeeded, ), diff --git a/llama_stack/providers/remote/vector_io/qdrant/config.py b/llama_stack/providers/remote/vector_io/qdrant/config.py index ce68aa492..6d7eebe23 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/config.py +++ b/llama_stack/providers/remote/vector_io/qdrant/config.py @@ -23,7 +23,6 @@ class QdrantVectorIOConfig(BaseModel): prefix: Optional[str] = None timeout: Optional[int] = None host: Optional[str] = None - path: Optional[str] = None @classmethod def sample_run_config(cls, **kwargs: Any) -> Dict[str, Any]: diff --git a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py index 586b8ca95..9e7788dc0 100644 --- a/llama_stack/providers/remote/vector_io/qdrant/qdrant.py +++ b/llama_stack/providers/remote/vector_io/qdrant/qdrant.py @@ -6,7 +6,7 @@ import logging import uuid -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from numpy.typing import NDArray from qdrant_client import AsyncQdrantClient, models @@ -16,12 +16,13 @@ from llama_stack.apis.inference import InterleavedContent from llama_stack.apis.vector_dbs import VectorDB from llama_stack.apis.vector_io import Chunk, QueryChunksResponse, VectorIO from llama_stack.providers.datatypes import Api, VectorDBsProtocolPrivate +from llama_stack.providers.inline.vector_io.qdrant import QdrantVectorIOConfig as InlineQdrantVectorIOConfig from llama_stack.providers.utils.memory.vector_store import ( EmbeddingIndex, VectorDBWithIndex, ) -from .config import QdrantVectorIOConfig +from .config import QdrantVectorIOConfig as RemoteQdrantVectorIOConfig log = logging.getLogger(__name__) CHUNK_ID_KEY = "_chunk_id" @@ -99,17 +100,19 @@ class QdrantIndex(EmbeddingIndex): class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate): - def __init__(self, config: QdrantVectorIOConfig, inference_api: Api.inference) -> None: + def __init__( + self, config: Union[RemoteQdrantVectorIOConfig, InlineQdrantVectorIOConfig], inference_api: Api.inference + ) -> None: self.config = config - self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True)) + self.client: AsyncQdrantClient = None self.cache = {} self.inference_api = inference_api async def initialize(self) -> None: - pass + self.client = AsyncQdrantClient(**self.config.model_dump(exclude_none=True)) async def shutdown(self) -> None: - self.client.close() + await self.client.close() async def register_vector_db( self, @@ -123,6 +126,11 @@ class QdrantVectorIOAdapter(VectorIO, VectorDBsProtocolPrivate): self.cache[vector_db.identifier] = index + async def unregister_vector_db(self, vector_db_id: str) -> None: + if vector_db_id in self.cache: + await self.cache[vector_db_id].index.delete() + del self.cache[vector_db_id] + async def _get_and_cache_vector_db_index(self, vector_db_id: str) -> Optional[VectorDBWithIndex]: if vector_db_id in self.cache: return self.cache[vector_db_id] diff --git a/llama_stack/providers/utils/datasetio/url_utils.py b/llama_stack/providers/utils/datasetio/url_utils.py index 6a544ea49..386ee736d 100644 --- a/llama_stack/providers/utils/datasetio/url_utils.py +++ b/llama_stack/providers/utils/datasetio/url_utils.py @@ -4,6 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import asyncio import base64 import io from urllib.parse import unquote @@ -13,12 +14,15 @@ import pandas from llama_stack.providers.utils.memory.vector_store import parse_data_url -def get_dataframe_from_uri(uri: str): +async def get_dataframe_from_uri(uri: str): df = None if uri.endswith(".csv"): - df = pandas.read_csv(uri) + # Moving to its own thread to avoid io from blocking the eventloop + # This isn't ideal as it moves more then just the IO to a new thread + # but it is as close as we can easly get + df = await asyncio.to_thread(pandas.read_csv, uri) elif uri.endswith(".xlsx"): - df = pandas.read_excel(uri) + df = await asyncio.to_thread(pandas.read_excel, uri) elif uri.startswith("data:"): parts = parse_data_url(uri) data = parts["data"] diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index 2a362f8cb..b264c7312 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -529,7 +529,11 @@ async def convert_message_to_openai_dict_new( ) -> Union[str, Iterable[OpenAIChatCompletionContentPartParam]]: async def impl( content_: InterleavedContent, - ) -> Union[str, OpenAIChatCompletionContentPartParam, List[OpenAIChatCompletionContentPartParam]]: + ) -> Union[ + str, + OpenAIChatCompletionContentPartParam, + List[OpenAIChatCompletionContentPartParam], + ]: # Llama Stack and OpenAI spec match for str and text input if isinstance(content_, str): return content_ @@ -570,7 +574,7 @@ async def convert_message_to_openai_dict_new( OpenAIChatCompletionMessageToolCall( id=tool.call_id, function=OpenAIFunction( - name=tool.tool_name if not isinstance(tool.tool_name, BuiltinTool) else tool.tool_name.value, + name=(tool.tool_name if not isinstance(tool.tool_name, BuiltinTool) else tool.tool_name.value), arguments=json.dumps(tool.arguments), ), type="function", @@ -609,6 +613,7 @@ def convert_tool_call( call_id=tool_call.id, tool_name=tool_call.function.name, arguments=json.loads(tool_call.function.arguments), + arguments_json=tool_call.function.arguments, ) except Exception: return UnparseableToolCall( @@ -759,6 +764,7 @@ def _convert_openai_tool_calls( call_id=call.id, tool_name=call.function.name, arguments=json.loads(call.function.arguments), + arguments_json=call.function.arguments, ) for call in tool_calls ] @@ -890,7 +896,8 @@ async def convert_openai_chat_completion_stream( # ChatCompletionResponseEvent only supports one per stream if len(choice.delta.tool_calls) > 1: warnings.warn( - "multiple tool calls found in a single delta, using the first, ignoring the rest", stacklevel=2 + "multiple tool calls found in a single delta, using the first, ignoring the rest", + stacklevel=2, ) if not enable_incremental_tool_calls: @@ -971,6 +978,7 @@ async def convert_openai_chat_completion_stream( call_id=buffer["call_id"], tool_name=buffer["name"], arguments=arguments, + arguments_json=buffer["arguments"], ) yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( diff --git a/llama_stack/templates/open-benchmark/open_benchmark.py b/llama_stack/templates/open-benchmark/open_benchmark.py index b339e8c80..acfbd78d6 100644 --- a/llama_stack/templates/open-benchmark/open_benchmark.py +++ b/llama_stack/templates/open-benchmark/open_benchmark.py @@ -170,7 +170,6 @@ def get_distribution_template() -> DistributionTemplate: default_datasets = [ DatasetInput( dataset_id="simpleqa", - provider_id="huggingface", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource( uri="huggingface://datasets/llamastack/simpleqa?split=train", @@ -178,7 +177,6 @@ def get_distribution_template() -> DistributionTemplate: ), DatasetInput( dataset_id="mmlu_cot", - provider_id="huggingface", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource( uri="huggingface://datasets/llamastack/mmlu_cot?split=test&name=all", @@ -186,7 +184,6 @@ def get_distribution_template() -> DistributionTemplate: ), DatasetInput( dataset_id="gpqa_cot", - provider_id="huggingface", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource( uri="huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main", @@ -194,7 +191,6 @@ def get_distribution_template() -> DistributionTemplate: ), DatasetInput( dataset_id="math_500", - provider_id="huggingface", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource( uri="huggingface://datasets/llamastack/math_500?split=test", @@ -202,7 +198,6 @@ def get_distribution_template() -> DistributionTemplate: ), DatasetInput( dataset_id="bfcl", - provider_id="huggingface", purpose=DatasetPurpose.eval_messages_answer, source=URIDataSource( uri="huggingface://datasets/llamastack/bfcl_v3?split=train", diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml index 93f437273..8dbf51472 100644 --- a/llama_stack/templates/open-benchmark/run.yaml +++ b/llama_stack/templates/open-benchmark/run.yaml @@ -164,35 +164,30 @@ datasets: uri: huggingface://datasets/llamastack/simpleqa?split=train metadata: {} dataset_id: simpleqa - provider_id: huggingface - purpose: eval/messages-answer source: type: uri uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all metadata: {} dataset_id: mmlu_cot - provider_id: huggingface - purpose: eval/messages-answer source: type: uri uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main metadata: {} dataset_id: gpqa_cot - provider_id: huggingface - purpose: eval/messages-answer source: type: uri uri: huggingface://datasets/llamastack/math_500?split=test metadata: {} dataset_id: math_500 - provider_id: huggingface - purpose: eval/messages-answer source: type: uri uri: huggingface://datasets/llamastack/bfcl_v3?split=train metadata: {} dataset_id: bfcl - provider_id: huggingface scoring_fns: [] benchmarks: - dataset_id: simpleqa diff --git a/pyproject.toml b/pyproject.toml index a006d69f9..107150cee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ dev = [ "ruamel.yaml", # needed for openapi generator ] # These are the dependencies required for running unit tests. -unit = ["sqlite-vec", "openai", "aiosqlite", "pypdf", "chardet"] +unit = ["sqlite-vec", "openai", "aiosqlite", "pypdf", "chardet", "qdrant-client"] # These are the core dependencies required for running integration tests. They are shared across all # providers. If a provider requires additional dependencies, please add them to your environment # separately. If you are using "uv" to execute your tests, you can use the "--with" flag to specify extra @@ -114,7 +114,6 @@ exclude = [ "./.git", "./docs/*", "./build", - "./scripts", "./venv", "*.pyi", ".pre-commit-config.yaml", @@ -229,10 +228,6 @@ exclude = [ "^llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$", "^llama_stack/providers/inline/inference/vllm/", "^llama_stack/providers/inline/post_training/common/validator\\.py$", - "^llama_stack/providers/inline/post_training/torchtune/common/checkpointer\\.py$", - "^llama_stack/providers/inline/post_training/torchtune/common/utils\\.py$", - "^llama_stack/providers/inline/post_training/torchtune/datasets/sft\\.py$", - "^llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device\\.py$", "^llama_stack/providers/inline/post_training/torchtune/post_training\\.py$", "^llama_stack/providers/inline/safety/code_scanner/", "^llama_stack/providers/inline/safety/llama_guard/", @@ -248,6 +243,7 @@ exclude = [ "^llama_stack/providers/inline/vector_io/chroma/", "^llama_stack/providers/inline/vector_io/faiss/", "^llama_stack/providers/inline/vector_io/milvus/", + "^llama_stack/providers/inline/vector_io/qdrant/", "^llama_stack/providers/inline/vector_io/sqlite_vec/", "^llama_stack/providers/remote/agents/sample/", "^llama_stack/providers/remote/datasetio/huggingface/", diff --git a/scripts/gen-changelog.py b/scripts/gen-changelog.py index ac4053339..3df2af06b 100755 --- a/scripts/gen-changelog.py +++ b/scripts/gen-changelog.py @@ -11,7 +11,7 @@ import requests def get_all_releases(token): - url = f"https://api.github.com/repos/meta-llama/llama-stack/releases" + url = "https://api.github.com/repos/meta-llama/llama-stack/releases" headers = {"Accept": "application/vnd.github.v3+json"} if token: @@ -22,9 +22,7 @@ def get_all_releases(token): if response.status_code == 200: return response.json() else: - raise Exception( - f"Error fetching releases: {response.status_code}, {response.text}" - ) + raise Exception(f"Error fetching releases: {response.status_code}, {response.text}") def clean_release_body(body): @@ -55,7 +53,7 @@ def merge_release_notes(output_file, token=None): releases = get_all_releases(token) with open(output_file, "w", encoding="utf-8") as md_file: - md_file.write(f"# Changelog\n\n") + md_file.write("# Changelog\n\n") for release in releases: md_file.write(f"# {release['tag_name']}\n") diff --git a/tests/unit/models/test_prompt_adapter.py b/tests/unit/models/test_prompt_adapter.py index c3755e2cb..0e2780e50 100644 --- a/tests/unit/models/test_prompt_adapter.py +++ b/tests/unit/models/test_prompt_adapter.py @@ -165,7 +165,10 @@ class PrepareMessagesTests(unittest.IsolatedAsyncioTestCase): request.model = MODEL request.tool_config.tool_prompt_format = ToolPromptFormat.json prompt = await chat_completion_request_to_prompt(request, request.model) - self.assertIn('{"type": "function", "name": "custom1", "parameters": {"param1": "value1"}}', prompt) + self.assertIn( + '{"type": "function", "name": "custom1", "parameters": {"param1": "value1"}}', + prompt, + ) async def test_user_provided_system_message(self): content = "Hello !" diff --git a/tests/unit/providers/vector_io/conftest.py b/tests/unit/providers/vector_io/conftest.py new file mode 100644 index 000000000..3bcd0613f --- /dev/null +++ b/tests/unit/providers/vector_io/conftest.py @@ -0,0 +1,42 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import random + +import numpy as np +import pytest + +from llama_stack.apis.vector_io import Chunk + +EMBEDDING_DIMENSION = 384 + + +@pytest.fixture +def vector_db_id() -> str: + return f"test-vector-db-{random.randint(1, 100)}" + + +@pytest.fixture(scope="session") +def embedding_dimension() -> int: + return EMBEDDING_DIMENSION + + +@pytest.fixture(scope="session") +def sample_chunks(): + """Generates chunks that force multiple batches for a single document to expose ID conflicts.""" + n, k = 10, 3 + sample = [ + Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"}) + for j in range(k) + for i in range(n) + ] + return sample + + +@pytest.fixture(scope="session") +def sample_embeddings(sample_chunks): + np.random.seed(42) + return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks]) diff --git a/tests/unit/providers/vector_io/test_qdrant.py b/tests/unit/providers/vector_io/test_qdrant.py new file mode 100644 index 000000000..bc97719c0 --- /dev/null +++ b/tests/unit/providers/vector_io/test_qdrant.py @@ -0,0 +1,135 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import asyncio +import os +from typing import Any +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +import pytest_asyncio + +from llama_stack.apis.inference import EmbeddingsResponse, Inference +from llama_stack.apis.vector_io import ( + QueryChunksResponse, + VectorDB, + VectorDBStore, +) +from llama_stack.providers.inline.vector_io.qdrant.config import ( + QdrantVectorIOConfig as InlineQdrantVectorIOConfig, +) +from llama_stack.providers.remote.vector_io.qdrant.qdrant import ( + QdrantVectorIOAdapter, +) + +# This test is a unit test for the QdrantVectorIOAdapter class. This should only contain +# tests which are specific to this class. More general (API-level) tests should be placed in +# tests/integration/vector_io/ +# +# How to run this test: +# +# pytest tests/unit/providers/vector_io/test_qdrant.py \ +# -v -s --tb=short --disable-warnings --asyncio-mode=auto + + +@pytest.fixture +def qdrant_config(tmp_path) -> InlineQdrantVectorIOConfig: + return InlineQdrantVectorIOConfig(path=os.path.join(tmp_path, "qdrant.db")) + + +@pytest.fixture(scope="session") +def loop(): + return asyncio.new_event_loop() + + +@pytest.fixture +def mock_vector_db(vector_db_id) -> MagicMock: + mock_vector_db = MagicMock(spec=VectorDB) + mock_vector_db.embedding_model = "embedding_model" + mock_vector_db.identifier = vector_db_id + return mock_vector_db + + +@pytest.fixture +def mock_vector_db_store(mock_vector_db) -> MagicMock: + mock_store = MagicMock(spec=VectorDBStore) + mock_store.get_vector_db = AsyncMock(return_value=mock_vector_db) + return mock_store + + +@pytest.fixture +def mock_api_service(sample_embeddings): + mock_api_service = MagicMock(spec=Inference) + mock_api_service.embeddings = AsyncMock(return_value=EmbeddingsResponse(embeddings=sample_embeddings)) + return mock_api_service + + +@pytest_asyncio.fixture +async def qdrant_adapter(qdrant_config, mock_vector_db_store, mock_api_service, loop) -> QdrantVectorIOAdapter: + adapter = QdrantVectorIOAdapter(config=qdrant_config, inference_api=mock_api_service) + adapter.vector_db_store = mock_vector_db_store + await adapter.initialize() + yield adapter + await adapter.shutdown() + + +__QUERY = "Sample query" + + +@pytest.mark.asyncio +@pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 30)]) +async def test_qdrant_adapter_returns_expected_chunks( + qdrant_adapter: QdrantVectorIOAdapter, + vector_db_id, + sample_chunks, + sample_embeddings, + max_query_chunks, + expected_chunks, +) -> None: + assert qdrant_adapter is not None + await qdrant_adapter.insert_chunks(vector_db_id, sample_chunks) + + index = await qdrant_adapter._get_and_cache_vector_db_index(vector_db_id=vector_db_id) + assert index is not None + + response = await qdrant_adapter.query_chunks( + query=__QUERY, + vector_db_id=vector_db_id, + params={"max_chunks": max_query_chunks}, + ) + assert isinstance(response, QueryChunksResponse) + assert len(response.chunks) == expected_chunks + + +# To by-pass attempt to convert a Mock to JSON +def _prepare_for_json(value: Any) -> str: + return str(value) + + +@patch("llama_stack.providers.utils.telemetry.trace_protocol._prepare_for_json", new=_prepare_for_json) +@pytest.mark.asyncio +async def test_qdrant_register_and_unregister_vector_db( + qdrant_adapter: QdrantVectorIOAdapter, + mock_vector_db, + sample_chunks, +) -> None: + # Initially, no collections + vector_db_id = mock_vector_db.identifier + assert len((await qdrant_adapter.client.get_collections()).collections) == 0 + + # Register does not create a collection + assert not (await qdrant_adapter.client.collection_exists(vector_db_id)) + await qdrant_adapter.register_vector_db(mock_vector_db) + assert not (await qdrant_adapter.client.collection_exists(vector_db_id)) + + # First insert creates the collection + await qdrant_adapter.insert_chunks(vector_db_id, sample_chunks) + assert await qdrant_adapter.client.collection_exists(vector_db_id) + + # Unregister deletes the collection + await qdrant_adapter.unregister_vector_db(vector_db_id) + assert not (await qdrant_adapter.client.collection_exists(vector_db_id)) + assert len((await qdrant_adapter.client.get_collections()).collections) == 0 diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py index eb5660a85..cff988c53 100644 --- a/tests/unit/providers/vector_io/test_sqlite_vec.py +++ b/tests/unit/providers/vector_io/test_sqlite_vec.py @@ -29,8 +29,6 @@ from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import ( # -v -s --tb=short --disable-warnings --asyncio-mode=auto SQLITE_VEC_PROVIDER = "sqlite_vec" -EMBEDDING_DIMENSION = 384 -EMBEDDING_MODEL = "all-MiniLM-L6-v2" @pytest.fixture(scope="session") @@ -50,26 +48,8 @@ def sqlite_connection(loop): @pytest_asyncio.fixture(scope="session", autouse=True) -async def sqlite_vec_index(sqlite_connection): - return await SQLiteVecIndex.create(dimension=EMBEDDING_DIMENSION, connection=sqlite_connection, bank_id="test_bank") - - -@pytest.fixture(scope="session") -def sample_chunks(): - """Generates chunks that force multiple batches for a single document to expose ID conflicts.""" - n, k = 10, 3 - sample = [ - Chunk(content=f"Sentence {i} from document {j}", metadata={"document_id": f"document-{j}"}) - for j in range(k) - for i in range(n) - ] - return sample - - -@pytest.fixture(scope="session") -def sample_embeddings(sample_chunks): - np.random.seed(42) - return np.array([np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) for _ in sample_chunks]) +async def sqlite_vec_index(sqlite_connection, embedding_dimension): + return await SQLiteVecIndex.create(dimension=embedding_dimension, connection=sqlite_connection, bank_id="test_bank") @pytest.mark.asyncio @@ -82,21 +62,21 @@ async def test_add_chunks(sqlite_vec_index, sample_chunks, sample_embeddings): @pytest.mark.asyncio -async def test_query_chunks(sqlite_vec_index, sample_chunks, sample_embeddings): +async def test_query_chunks(sqlite_vec_index, sample_chunks, sample_embeddings, embedding_dimension): await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) - query_embedding = np.random.rand(EMBEDDING_DIMENSION).astype(np.float32) + query_embedding = np.random.rand(embedding_dimension).astype(np.float32) response = await sqlite_vec_index.query(query_embedding, k=2, score_threshold=0.0) assert isinstance(response, QueryChunksResponse) assert len(response.chunks) == 2 @pytest.mark.asyncio -async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks): +async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dimension): """Test that chunk IDs do not conflict across batches when inserting chunks.""" # Reduce batch size to force multiple batches for same document # since there are 10 chunks per document and batch size is 2 batch_size = 2 - sample_embeddings = np.random.rand(len(sample_chunks), EMBEDDING_DIMENSION).astype(np.float32) + sample_embeddings = np.random.rand(len(sample_chunks), embedding_dimension).astype(np.float32) await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings, batch_size=batch_size) diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py new file mode 100644 index 000000000..70f08dbd6 --- /dev/null +++ b/tests/unit/server/test_auth.py @@ -0,0 +1,124 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from unittest.mock import AsyncMock, patch + +import pytest +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from llama_stack.distribution.server.auth import AuthenticationMiddleware + + +@pytest.fixture +def mock_auth_endpoint(): + return "http://mock-auth-service/validate" + + +@pytest.fixture +def valid_api_key(): + return "valid_api_key_12345" + + +@pytest.fixture +def invalid_api_key(): + return "invalid_api_key_67890" + + +@pytest.fixture +def app(mock_auth_endpoint): + app = FastAPI() + app.add_middleware(AuthenticationMiddleware, auth_endpoint=mock_auth_endpoint) + + @app.get("/test") + def test_endpoint(): + return {"message": "Authentication successful"} + + return app + + +@pytest.fixture +def client(app): + return TestClient(app) + + +async def mock_post_success(*args, **kwargs): + mock_response = AsyncMock() + mock_response.status_code = 200 + return mock_response + + +async def mock_post_failure(*args, **kwargs): + mock_response = AsyncMock() + mock_response.status_code = 401 + return mock_response + + +async def mock_post_exception(*args, **kwargs): + raise Exception("Connection error") + + +def test_missing_auth_header(client): + response = client.get("/test") + assert response.status_code == 401 + assert "Missing or invalid Authorization header" in response.json()["error"]["message"] + + +def test_invalid_auth_header_format(client): + response = client.get("/test", headers={"Authorization": "InvalidFormat token123"}) + assert response.status_code == 401 + assert "Missing or invalid Authorization header" in response.json()["error"]["message"] + + +@patch("httpx.AsyncClient.post", new=mock_post_success) +def test_valid_authentication(client, valid_api_key): + response = client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"}) + assert response.status_code == 200 + assert response.json() == {"message": "Authentication successful"} + + +@patch("httpx.AsyncClient.post", new=mock_post_failure) +def test_invalid_authentication(client, invalid_api_key): + response = client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"}) + assert response.status_code == 401 + assert "Authentication failed" in response.json()["error"]["message"] + + +@patch("httpx.AsyncClient.post", new=mock_post_exception) +def test_auth_service_error(client, valid_api_key): + response = client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"}) + assert response.status_code == 401 + assert "Authentication service error" in response.json()["error"]["message"] + + +def test_auth_request_payload(client, valid_api_key, mock_auth_endpoint): + with patch("httpx.AsyncClient.post") as mock_post: + mock_response = AsyncMock() + mock_response.status_code = 200 + mock_post.return_value = mock_response + + client.get( + "/test?param1=value1¶m2=value2", + headers={ + "Authorization": f"Bearer {valid_api_key}", + "User-Agent": "TestClient", + "Content-Type": "application/json", + }, + ) + + # Check that the auth endpoint was called with the correct payload + call_args = mock_post.call_args + assert call_args is not None + + url, kwargs = call_args[0][0], call_args[1] + assert url == mock_auth_endpoint + + payload = kwargs["json"] + assert payload["api_key"] == valid_api_key + assert payload["request"]["path"] == "/test" + assert "authorization" in payload["request"]["headers"] + assert "param1" in payload["request"]["params"] + assert "param2" in payload["request"]["params"] diff --git a/uv.lock b/uv.lock index 860b29241..b63d23b14 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,4 @@ version = 1 -revision = 1 requires-python = ">=3.10" resolution-markers = [ "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", @@ -8,9 +7,12 @@ resolution-markers = [ "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version == '3.11.*' and sys_platform == 'darwin'", - "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux')", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version >= '3.12' and sys_platform == 'darwin'", + "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')", + "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "python_full_version >= '3.13' and sys_platform == 'darwin'", + "python_full_version == '3.12.*' and sys_platform == 'darwin'", ] [[package]] @@ -793,6 +795,107 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/89/30/2bd0eb03a7dee7727cd2ec643d1e992979e62d5e7443507381cce0455132/googleapis_common_protos-1.67.0-py2.py3-none-any.whl", hash = "sha256:579de760800d13616f51cf8be00c876f00a9f146d3e6510e19d1f4111758b741", size = 164985 }, ] +[[package]] +name = "grpcio" +version = "1.71.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1c/95/aa11fc09a85d91fbc7dd405dcb2a1e0256989d67bf89fa65ae24b3ba105a/grpcio-1.71.0.tar.gz", hash = "sha256:2b85f7820475ad3edec209d3d89a7909ada16caab05d3f2e08a7e8ae3200a55c", size = 12549828 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/c5/ef610b3f988cc0cc67b765f72b8e2db06a1db14e65acb5ae7810a6b7042e/grpcio-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:c200cb6f2393468142eb50ab19613229dcc7829b5ccee8b658a36005f6669fdd", size = 5210643 }, + { url = "https://files.pythonhosted.org/packages/bf/de/c84293c961622df302c0d5d07ec6e2d4cd3874ea42f602be2df09c4ad44f/grpcio-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:b2266862c5ad664a380fbbcdbdb8289d71464c42a8c29053820ee78ba0119e5d", size = 11308962 }, + { url = "https://files.pythonhosted.org/packages/7c/38/04c9e0dc8c904570c80faa1f1349b190b63e45d6b2782ec8567b050efa9d/grpcio-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:0ab8b2864396663a5b0b0d6d79495657ae85fa37dcb6498a2669d067c65c11ea", size = 5699236 }, + { url = "https://files.pythonhosted.org/packages/95/96/e7be331d1298fa605ea7c9ceafc931490edd3d5b33c4f695f1a0667f3491/grpcio-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c30f393f9d5ff00a71bb56de4aa75b8fe91b161aeb61d39528db6b768d7eac69", size = 6339767 }, + { url = "https://files.pythonhosted.org/packages/5d/b7/7e7b7bb6bb18baf156fd4f2f5b254150dcdd6cbf0def1ee427a2fb2bfc4d/grpcio-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f250ff44843d9a0615e350c77f890082102a0318d66a99540f54769c8766ab73", size = 5943028 }, + { url = "https://files.pythonhosted.org/packages/13/aa/5fb756175995aeb47238d706530772d9a7ac8e73bcca1b47dc145d02c95f/grpcio-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e6d8de076528f7c43a2f576bc311799f89d795aa6c9b637377cc2b1616473804", size = 6031841 }, + { url = "https://files.pythonhosted.org/packages/54/93/172783e01eed61f7f180617b7fa4470f504e383e32af2587f664576a7101/grpcio-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:9b91879d6da1605811ebc60d21ab6a7e4bae6c35f6b63a061d61eb818c8168f6", size = 6651039 }, + { url = "https://files.pythonhosted.org/packages/6f/99/62654b220a27ed46d3313252214f4bc66261143dc9b58004085cd0646753/grpcio-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f71574afdf944e6652203cd1badcda195b2a27d9c83e6d88dc1ce3cfb73b31a5", size = 6198465 }, + { url = "https://files.pythonhosted.org/packages/68/35/96116de833b330abe4412cc94edc68f99ed2fa3e39d8713ff307b3799e81/grpcio-1.71.0-cp310-cp310-win32.whl", hash = "sha256:8997d6785e93308f277884ee6899ba63baafa0dfb4729748200fcc537858a509", size = 3620382 }, + { url = "https://files.pythonhosted.org/packages/b7/09/f32ef637e386f3f2c02effac49699229fa560ce9007682d24e9e212d2eb4/grpcio-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:7d6ac9481d9d0d129224f6d5934d5832c4b1cddb96b59e7eba8416868909786a", size = 4280302 }, + { url = "https://files.pythonhosted.org/packages/63/04/a085f3ad4133426f6da8c1becf0749872a49feb625a407a2e864ded3fb12/grpcio-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:d6aa986318c36508dc1d5001a3ff169a15b99b9f96ef5e98e13522c506b37eef", size = 5210453 }, + { url = "https://files.pythonhosted.org/packages/b4/d5/0bc53ed33ba458de95020970e2c22aa8027b26cc84f98bea7fcad5d695d1/grpcio-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:d2c170247315f2d7e5798a22358e982ad6eeb68fa20cf7a820bb74c11f0736e7", size = 11347567 }, + { url = "https://files.pythonhosted.org/packages/e3/6d/ce334f7e7a58572335ccd61154d808fe681a4c5e951f8a1ff68f5a6e47ce/grpcio-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:e6f83a583ed0a5b08c5bc7a3fe860bb3c2eac1f03f1f63e0bc2091325605d2b7", size = 5696067 }, + { url = "https://files.pythonhosted.org/packages/05/4a/80befd0b8b1dc2b9ac5337e57473354d81be938f87132e147c4a24a581bd/grpcio-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be74ddeeb92cc87190e0e376dbc8fc7736dbb6d3d454f2fa1f5be1dee26b9d7", size = 6348377 }, + { url = "https://files.pythonhosted.org/packages/c7/67/cbd63c485051eb78663355d9efd1b896cfb50d4a220581ec2cb9a15cd750/grpcio-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dd0dfbe4d5eb1fcfec9490ca13f82b089a309dc3678e2edabc144051270a66e", size = 5940407 }, + { url = "https://files.pythonhosted.org/packages/98/4b/7a11aa4326d7faa499f764eaf8a9b5a0eb054ce0988ee7ca34897c2b02ae/grpcio-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a2242d6950dc892afdf9e951ed7ff89473aaf744b7d5727ad56bdaace363722b", size = 6030915 }, + { url = "https://files.pythonhosted.org/packages/eb/a2/cdae2d0e458b475213a011078b0090f7a1d87f9a68c678b76f6af7c6ac8c/grpcio-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0fa05ee31a20456b13ae49ad2e5d585265f71dd19fbd9ef983c28f926d45d0a7", size = 6648324 }, + { url = "https://files.pythonhosted.org/packages/27/df/f345c8daaa8d8574ce9869f9b36ca220c8845923eb3087e8f317eabfc2a8/grpcio-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3d081e859fb1ebe176de33fc3adb26c7d46b8812f906042705346b314bde32c3", size = 6197839 }, + { url = "https://files.pythonhosted.org/packages/f2/2c/cd488dc52a1d0ae1bad88b0d203bc302efbb88b82691039a6d85241c5781/grpcio-1.71.0-cp311-cp311-win32.whl", hash = "sha256:d6de81c9c00c8a23047136b11794b3584cdc1460ed7cbc10eada50614baa1444", size = 3619978 }, + { url = "https://files.pythonhosted.org/packages/ee/3f/cf92e7e62ccb8dbdf977499547dfc27133124d6467d3a7d23775bcecb0f9/grpcio-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:24e867651fc67717b6f896d5f0cac0ec863a8b5fb7d6441c2ab428f52c651c6b", size = 4282279 }, + { url = "https://files.pythonhosted.org/packages/4c/83/bd4b6a9ba07825bd19c711d8b25874cd5de72c2a3fbf635c3c344ae65bd2/grpcio-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:0ff35c8d807c1c7531d3002be03221ff9ae15712b53ab46e2a0b4bb271f38537", size = 5184101 }, + { url = "https://files.pythonhosted.org/packages/31/ea/2e0d90c0853568bf714693447f5c73272ea95ee8dad107807fde740e595d/grpcio-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:b78a99cd1ece4be92ab7c07765a0b038194ded2e0a26fd654591ee136088d8d7", size = 11310927 }, + { url = "https://files.pythonhosted.org/packages/ac/bc/07a3fd8af80467390af491d7dc66882db43884128cdb3cc8524915e0023c/grpcio-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:dc1a1231ed23caac1de9f943d031f1bc38d0f69d2a3b243ea0d664fc1fbd7fec", size = 5654280 }, + { url = "https://files.pythonhosted.org/packages/16/af/21f22ea3eed3d0538b6ef7889fce1878a8ba4164497f9e07385733391e2b/grpcio-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6beeea5566092c5e3c4896c6d1d307fb46b1d4bdf3e70c8340b190a69198594", size = 6312051 }, + { url = "https://files.pythonhosted.org/packages/49/9d/e12ddc726dc8bd1aa6cba67c85ce42a12ba5b9dd75d5042214a59ccf28ce/grpcio-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5170929109450a2c031cfe87d6716f2fae39695ad5335d9106ae88cc32dc84c", size = 5910666 }, + { url = "https://files.pythonhosted.org/packages/d9/e9/38713d6d67aedef738b815763c25f092e0454dc58e77b1d2a51c9d5b3325/grpcio-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:5b08d03ace7aca7b2fadd4baf291139b4a5f058805a8327bfe9aece7253b6d67", size = 6012019 }, + { url = "https://files.pythonhosted.org/packages/80/da/4813cd7adbae6467724fa46c952d7aeac5e82e550b1c62ed2aeb78d444ae/grpcio-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:f903017db76bf9cc2b2d8bdd37bf04b505bbccad6be8a81e1542206875d0e9db", size = 6637043 }, + { url = "https://files.pythonhosted.org/packages/52/ca/c0d767082e39dccb7985c73ab4cf1d23ce8613387149e9978c70c3bf3b07/grpcio-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:469f42a0b410883185eab4689060a20488a1a0a00f8bbb3cbc1061197b4c5a79", size = 6186143 }, + { url = "https://files.pythonhosted.org/packages/00/61/7b2c8ec13303f8fe36832c13d91ad4d4ba57204b1c723ada709c346b2271/grpcio-1.71.0-cp312-cp312-win32.whl", hash = "sha256:ad9f30838550695b5eb302add33f21f7301b882937460dd24f24b3cc5a95067a", size = 3604083 }, + { url = "https://files.pythonhosted.org/packages/fd/7c/1e429c5fb26122055d10ff9a1d754790fb067d83c633ff69eddcf8e3614b/grpcio-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:652350609332de6dac4ece254e5d7e1ff834e203d6afb769601f286886f6f3a8", size = 4272191 }, + { url = "https://files.pythonhosted.org/packages/04/dd/b00cbb45400d06b26126dcfdbdb34bb6c4f28c3ebbd7aea8228679103ef6/grpcio-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:cebc1b34ba40a312ab480ccdb396ff3c529377a2fce72c45a741f7215bfe8379", size = 5184138 }, + { url = "https://files.pythonhosted.org/packages/ed/0a/4651215983d590ef53aac40ba0e29dda941a02b097892c44fa3357e706e5/grpcio-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:85da336e3649a3d2171e82f696b5cad2c6231fdd5bad52616476235681bee5b3", size = 11310747 }, + { url = "https://files.pythonhosted.org/packages/57/a3/149615b247f321e13f60aa512d3509d4215173bdb982c9098d78484de216/grpcio-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f9a412f55bb6e8f3bb000e020dbc1e709627dcb3a56f6431fa7076b4c1aab0db", size = 5653991 }, + { url = "https://files.pythonhosted.org/packages/ca/56/29432a3e8d951b5e4e520a40cd93bebaa824a14033ea8e65b0ece1da6167/grpcio-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:47be9584729534660416f6d2a3108aaeac1122f6b5bdbf9fd823e11fe6fbaa29", size = 6312781 }, + { url = "https://files.pythonhosted.org/packages/a3/f8/286e81a62964ceb6ac10b10925261d4871a762d2a763fbf354115f9afc98/grpcio-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c9c80ac6091c916db81131d50926a93ab162a7e97e4428ffc186b6e80d6dda4", size = 5910479 }, + { url = "https://files.pythonhosted.org/packages/35/67/d1febb49ec0f599b9e6d4d0d44c2d4afdbed9c3e80deb7587ec788fcf252/grpcio-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:789d5e2a3a15419374b7b45cd680b1e83bbc1e52b9086e49308e2c0b5bbae6e3", size = 6013262 }, + { url = "https://files.pythonhosted.org/packages/a1/04/f9ceda11755f0104a075ad7163fc0d96e2e3a9fe25ef38adfc74c5790daf/grpcio-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:1be857615e26a86d7363e8a163fade914595c81fec962b3d514a4b1e8760467b", size = 6643356 }, + { url = "https://files.pythonhosted.org/packages/fb/ce/236dbc3dc77cf9a9242adcf1f62538734ad64727fabf39e1346ad4bd5c75/grpcio-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:a76d39b5fafd79ed604c4be0a869ec3581a172a707e2a8d7a4858cb05a5a7637", size = 6186564 }, + { url = "https://files.pythonhosted.org/packages/10/fd/b3348fce9dd4280e221f513dd54024e765b21c348bc475516672da4218e9/grpcio-1.71.0-cp313-cp313-win32.whl", hash = "sha256:74258dce215cb1995083daa17b379a1a5a87d275387b7ffe137f1d5131e2cfbb", size = 3601890 }, + { url = "https://files.pythonhosted.org/packages/be/f8/db5d5f3fc7e296166286c2a397836b8b042f7ad1e11028d82b061701f0f7/grpcio-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:22c3bc8d488c039a199f7a003a38cb7635db6656fa96437a8accde8322ce2366", size = 4273308 }, +] + +[[package]] +name = "grpcio-tools" +version = "1.71.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "protobuf" }, + { name = "setuptools" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/d2/c0866a48c355a6a4daa1f7e27e210c7fa561b1f3b7c0bce2671e89cfa31e/grpcio_tools-1.71.0.tar.gz", hash = "sha256:38dba8e0d5e0fb23a034e09644fdc6ed862be2371887eee54901999e8f6792a8", size = 5326008 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f9/60/aa7f261eda558d018457e5c8bd8a8079136e5107a0942fd3167477ab50e2/grpcio_tools-1.71.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:f4ad7f0d756546902597053d70b3af2606fbd70d7972876cd75c1e241d22ae00", size = 2385558 }, + { url = "https://files.pythonhosted.org/packages/0d/e3/e47b96e93e51398ba3462e027d93a10c0c23fffc31733de9bd4f44a2b867/grpcio_tools-1.71.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:64bdb291df61cf570b5256777ad5fe2b1db6d67bc46e55dc56a0a862722ae329", size = 5930039 }, + { url = "https://files.pythonhosted.org/packages/a6/69/5d8920002483b2a65ae3b03329dfe3b668c3592f001d5358e1538f540012/grpcio_tools-1.71.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:8dd9795e982d77a4b496f7278b943c2563d9afde2069cdee78c111a40cc4d675", size = 2351932 }, + { url = "https://files.pythonhosted.org/packages/c4/50/8116e307662a2337cdc3f0e1a8b23af197129448b7ff7e0cf1a76c9b0178/grpcio_tools-1.71.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c1b5860c41a36b26fec4f52998f1a451d0525a5c9a4fb06b6ea3e9211abdb925", size = 2744962 }, + { url = "https://files.pythonhosted.org/packages/e3/4b/d95be4aaf78d7b02dff3bd332c75c228288178e92af0e5228759ac5002a0/grpcio_tools-1.71.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3059c14035e5dc03d462f261e5900b9a077fd1a36976c3865b8507474520bad4", size = 2476716 }, + { url = "https://files.pythonhosted.org/packages/37/c2/c784a3705b1a1fd277751a8fc881d5a29325a460b9211e3c6164f594b178/grpcio_tools-1.71.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f360981b215b1d5aff9235b37e7e1826246e35bbac32a53e41d4e990a37b8f4c", size = 2854132 }, + { url = "https://files.pythonhosted.org/packages/93/8f/173adbf72ed3996e1962182b55abf30151edc8b53daac0bf15cc3dc4b09e/grpcio_tools-1.71.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bfe3888c3bbe16a5aa39409bc38744a31c0c3d2daa2b0095978c56e106c85b42", size = 3305069 }, + { url = "https://files.pythonhosted.org/packages/e4/a8/b1e7df63e7f83336275922f92ded1cd6918964c511280b31c872c54538f4/grpcio_tools-1.71.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:145985c0bf12131f0a1503e65763e0f060473f7f3928ed1ff3fb0e8aad5bc8ac", size = 2916636 }, + { url = "https://files.pythonhosted.org/packages/be/a3/53f1e74c6e1c92ad94d7a0127a60fe913276a3e8c864737a053a1574b05c/grpcio_tools-1.71.0-cp310-cp310-win32.whl", hash = "sha256:82c430edd939bb863550ee0fecf067d78feff828908a1b529bbe33cc57f2419c", size = 949576 }, + { url = "https://files.pythonhosted.org/packages/97/43/4a3ae830c1405bcb1ba47f2225779dbe9fc009ba341d4a90012919304855/grpcio_tools-1.71.0-cp310-cp310-win_amd64.whl", hash = "sha256:83e90724e3f02415c628e4ead1d6ffe063820aaaa078d9a39176793df958cd5a", size = 1121087 }, + { url = "https://files.pythonhosted.org/packages/5d/ec/73b9797ffec80e1faf039ce3e2f0513e26e1a68eedc525ed294ae2a44d03/grpcio_tools-1.71.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:1f19b16b49afa5d21473f49c0966dd430c88d089cd52ac02404d8cef67134efb", size = 2385557 }, + { url = "https://files.pythonhosted.org/packages/bf/87/42c6e192b7b09c9610a53e771797f7826aee4f6e769683985ae406a2d862/grpcio_tools-1.71.0-cp311-cp311-macosx_10_14_universal2.whl", hash = "sha256:459c8f5e00e390aecd5b89de67deb3ec7188a274bc6cb50e43cef35ab3a3f45d", size = 5954404 }, + { url = "https://files.pythonhosted.org/packages/25/30/3fd385a56d32dce34cde09a64dbaf7cf85d395f2bcd86dd41e4b4ee5938f/grpcio_tools-1.71.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:edab7e6518de01196be37f96cb1e138c3819986bf5e2a6c9e1519b4d716b2f5a", size = 2352061 }, + { url = "https://files.pythonhosted.org/packages/87/eb/e9971c7693a2d85e7f55760f7906211a95ff74af4d41b05d187849d7fb58/grpcio_tools-1.71.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8b93b9f6adc7491d4c10144c0643409db298e5e63c997106a804f6f0248dbaf4", size = 2745033 }, + { url = "https://files.pythonhosted.org/packages/15/72/4e69beae87a1b334f80da9e93c8e2f5c8fe4860c956a781246a092dc4c97/grpcio_tools-1.71.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ae5f2efa9e644c10bf1021600bfc099dfbd8e02b184d2d25dc31fcd6c2bc59e", size = 2476743 }, + { url = "https://files.pythonhosted.org/packages/b5/f3/336d2c83f1bfc00a5376bf20dd2273d7aa891b03dd91b11c71ca47392351/grpcio_tools-1.71.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:65aa082f4435571d65d5ce07fc444f23c3eff4f3e34abef599ef8c9e1f6f360f", size = 2853693 }, + { url = "https://files.pythonhosted.org/packages/62/ba/cc7ace518c11501a4b8620df5edb8188e81470e5b82dc6829212f3e9b2ff/grpcio_tools-1.71.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1331e726e08b7bdcbf2075fcf4b47dff07842b04845e6e220a08a4663e232d7f", size = 3304474 }, + { url = "https://files.pythonhosted.org/packages/00/0d/4b843654af3d5aa2f1a5775df1d583e6e3471e6d569106fd3213ad185a98/grpcio_tools-1.71.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6693a7d3ba138b0e693b3d1f687cdd9db9e68976c3fa2b951c17a072fea8b583", size = 2916147 }, + { url = "https://files.pythonhosted.org/packages/e4/14/047e1c817422bc3d434247b9c640c51fd51ca4e047583ff31d927c3dea73/grpcio_tools-1.71.0-cp311-cp311-win32.whl", hash = "sha256:6d11ed3ff7b6023b5c72a8654975324bb98c1092426ba5b481af406ff559df00", size = 949374 }, + { url = "https://files.pythonhosted.org/packages/86/cb/739a1b6d517672693796022c0f9061f63eaa243ec70cbbfa59bf881ed9fb/grpcio_tools-1.71.0-cp311-cp311-win_amd64.whl", hash = "sha256:072b2a5805ac97e4623b3aa8f7818275f3fb087f4aa131b0fce00471065f6eaa", size = 1120786 }, + { url = "https://files.pythonhosted.org/packages/de/e4/156956b92ad0298290c3d68e6670bc5a6fbefcccfe1ec3997480605e7135/grpcio_tools-1.71.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:61c0409d5bdac57a7bd0ce0ab01c1c916728fe4c8a03d77a25135ad481eb505c", size = 2385480 }, + { url = "https://files.pythonhosted.org/packages/c1/08/9930eb4bb38c5214041c9f24f8b35e9864a7938282db986836546c782d52/grpcio_tools-1.71.0-cp312-cp312-macosx_10_14_universal2.whl", hash = "sha256:28784f39921d061d2164a9dcda5164a69d07bf29f91f0ea50b505958292312c9", size = 5951891 }, + { url = "https://files.pythonhosted.org/packages/73/65/931f29ec9c33719d48e1e30446ecce6f5d2cd4e4934fa73fbe07de41c43b/grpcio_tools-1.71.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:192808cf553cedca73f0479cc61d5684ad61f24db7a5f3c4dfe1500342425866", size = 2351967 }, + { url = "https://files.pythonhosted.org/packages/b8/26/2ec8748534406214f20a4809c36efcfa88d1a26246e8312102e3ef8c295d/grpcio_tools-1.71.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:989ee9da61098230d3d4c8f8f8e27c2de796f1ff21b1c90110e636d9acd9432b", size = 2745003 }, + { url = "https://files.pythonhosted.org/packages/f1/33/87b4610c86a4e10ee446b543a4d536f94ab04f828bab841f0bc1a083de72/grpcio_tools-1.71.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:541a756276c8a55dec991f6c0106ae20c8c8f5ce8d0bdbfcb01e2338d1a8192b", size = 2476455 }, + { url = "https://files.pythonhosted.org/packages/00/7c/f7f0cc36a43be9d45b3ce2a55245f3c7d063a24b7930dd719929e58871a4/grpcio_tools-1.71.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:870c0097700d13c403e5517cb7750ab5b4a791ce3e71791c411a38c5468b64bd", size = 2854333 }, + { url = "https://files.pythonhosted.org/packages/07/c4/34b9ea62b173c13fa7accba5f219355b320c05c80c79c3ba70fe52f47b2f/grpcio_tools-1.71.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:abd57f615e88bf93c3c6fd31f923106e3beb12f8cd2df95b0d256fa07a7a0a57", size = 3304297 }, + { url = "https://files.pythonhosted.org/packages/5c/ef/9d3449db8a07688dc3de7dcbd2a07048a128610b1a491c5c0cb3e90a00c5/grpcio_tools-1.71.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:753270e2d06d37e6d7af8967d1d059ec635ad215882041a36294f4e2fd502b2e", size = 2916212 }, + { url = "https://files.pythonhosted.org/packages/2e/c6/990e8194c934dfe7cf89ef307c319fa4f2bc0b78aeca707addbfa1e502f1/grpcio_tools-1.71.0-cp312-cp312-win32.whl", hash = "sha256:0e647794bd7138b8c215e86277a9711a95cf6a03ff6f9e555d54fdf7378b9f9d", size = 948849 }, + { url = "https://files.pythonhosted.org/packages/42/95/3c36d3205e6bd19853cc2420e44b6ef302eb4cfcf56498973c7e85f6c03b/grpcio_tools-1.71.0-cp312-cp312-win_amd64.whl", hash = "sha256:48debc879570972d28bfe98e4970eff25bb26da3f383e0e49829b2d2cd35ad87", size = 1120294 }, + { url = "https://files.pythonhosted.org/packages/84/a7/70dc7e9957bcbaccd4dcb6cc11215e0b918f546d55599221522fe0d073e0/grpcio_tools-1.71.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:9a78d07d6c301a25ef5ede962920a522556a1dfee1ccc05795994ceb867f766c", size = 2384758 }, + { url = "https://files.pythonhosted.org/packages/65/79/57320b28d0a0c5ec94095fd571a65292f8ed7e1c47e59ae4021e8a48d49b/grpcio_tools-1.71.0-cp313-cp313-macosx_10_14_universal2.whl", hash = "sha256:580ac88141c9815557e63c9c04f5b1cdb19b4db8d0cb792b573354bde1ee8b12", size = 5951661 }, + { url = "https://files.pythonhosted.org/packages/80/3d/343df5ed7c5dd66fc7a19e4ef3e97ccc4f5d802122b04cd6492f0dcd79f5/grpcio_tools-1.71.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f7c678e68ece0ae908ecae1c4314a0c2c7f83e26e281738b9609860cc2c82d96", size = 2351571 }, + { url = "https://files.pythonhosted.org/packages/56/2f/b9736e8c84e880c4237f5b880c6c799b4977c5cde190999bc7ab4b2ec445/grpcio_tools-1.71.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:56ecd6cc89b5e5eed1de5eb9cafce86c9c9043ee3840888cc464d16200290b53", size = 2744580 }, + { url = "https://files.pythonhosted.org/packages/76/9b/bdb384967353da7bf64bac4232f4cf8ae43f19d0f2f640978d4d4197e667/grpcio_tools-1.71.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e52a041afc20ab2431d756b6295d727bd7adee813b21b06a3483f4a7a15ea15f", size = 2475978 }, + { url = "https://files.pythonhosted.org/packages/26/71/1411487fd7862d347b98fda5e3beef611a71b2ac2faac62a965d9e2536b3/grpcio_tools-1.71.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2a1712f12102b60c8d92779b89d0504e0d6f3a59f2b933e5622b8583f5c02992", size = 2853314 }, + { url = "https://files.pythonhosted.org/packages/03/06/59d0523eb1ba2f64edc72cb150152fa1b2e77061cae3ef3ecd3ef2a87f51/grpcio_tools-1.71.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:41878cb7a75477e62fdd45e7e9155b3af1b7a5332844021e2511deaf99ac9e6c", size = 3303981 }, + { url = "https://files.pythonhosted.org/packages/c2/71/fb9fb49f2b738ec1dfbbc8cdce0b26e5f9c5fc0edef72e453580620d6a36/grpcio_tools-1.71.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:682e958b476049ccc14c71bedf3f979bced01f6e0c04852efc5887841a32ad6b", size = 2915876 }, + { url = "https://files.pythonhosted.org/packages/bd/0f/0d49f6fe6fa2d09e9820dd9eeb30437e86002303076be2b6ada0fb52b8f2/grpcio_tools-1.71.0-cp313-cp313-win32.whl", hash = "sha256:0ccfb837152b7b858b9f26bb110b3ae8c46675d56130f6c2f03605c4f129be13", size = 948245 }, + { url = "https://files.pythonhosted.org/packages/bb/14/ab131a39187bfea950280b2277a82d2033469fe8c86f73b10b19f53cc5ca/grpcio_tools-1.71.0-cp313-cp313-win_amd64.whl", hash = "sha256:ffff9bc5eacb34dd26b487194f7d44a3e64e752fc2cf049d798021bf25053b87", size = 1119649 }, +] + [[package]] name = "h11" version = "0.14.0" @@ -802,6 +905,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 }, ] +[[package]] +name = "h2" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1b/38/d7f80fd13e6582fb8e0df8c9a653dcc02b03ca34f4d72f34869298c5baf8/h2-4.2.0.tar.gz", hash = "sha256:c8a52129695e88b1a0578d8d2cc6842bbd79128ac685463b887ee278126ad01f", size = 2150682 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d0/9e/984486f2d0a0bd2b024bf4bc1c62688fcafa9e61991f041fb0e2def4a982/h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0", size = 60957 }, +] + +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357 }, +] + [[package]] name = "httpcore" version = "1.0.7" @@ -830,6 +955,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 }, ] +[package.optional-dependencies] +http2 = [ + { name = "h2" }, +] + [[package]] name = "httpx-sse" version = "0.4.0" @@ -857,6 +987,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/4d/8092df2cb0cafa9fcaf691db851b2fccfe9cad4048e081436bbbdf56e4e1/huggingface_hub-0.29.0-py3-none-any.whl", hash = "sha256:c02daa0b6bafbdacb1320fdfd1dc7151d0940825c88c4ef89837fdb1f6ea0afe", size = 468012 }, ] +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007 }, +] + [[package]] name = "identify" version = "2.6.7" @@ -1250,6 +1389,7 @@ unit = [ { name = "chardet" }, { name = "openai" }, { name = "pypdf" }, + { name = "qdrant-client" }, { name = "sqlite-vec" }, ] @@ -1290,6 +1430,7 @@ requires-dist = [ { name = "pytest-cov", marker = "extra == 'dev'" }, { name = "pytest-html", marker = "extra == 'dev'" }, { name = "python-dotenv" }, + { name = "qdrant-client", marker = "extra == 'unit'" }, { name = "requests" }, { name = "rich" }, { name = "rich", marker = "extra == 'codegen'" }, @@ -1314,7 +1455,6 @@ requires-dist = [ { name = "types-setuptools", marker = "extra == 'dev'" }, { name = "uvicorn", marker = "extra == 'dev'" }, ] -provides-extras = ["dev", "unit", "test", "docs", "codegen"] [[package]] name = "llama-stack-client" @@ -2062,6 +2202,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, ] +[[package]] +name = "portalocker" +version = "2.10.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywin32", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/d3/c6c64067759e87af98cc668c1cc75171347d0f1577fab7ca3749134e3cd4/portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f", size = 40891 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/fb/a70a4214956182e0d7a9099ab17d50bfcba1056188e9b14f35b9e2b62a0d/portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf", size = 18423 }, +] + [[package]] name = "pre-commit" version = "4.1.0" @@ -2668,6 +2820,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/fe/72e7e166bda3885810bee7b23049133e142f7c80c295bae02c562caeea16/pyzmq-26.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:bd8fdee945b877aa3bffc6a5a8816deb048dab0544f9df3731ecd0e54d8c84c9", size = 556563 }, ] +[[package]] +name = "qdrant-client" +version = "1.13.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "grpcio" }, + { name = "grpcio-tools" }, + { name = "httpx", extra = ["http2"] }, + { name = "numpy" }, + { name = "portalocker" }, + { name = "pydantic" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/eb/58/1e4acd7ff7637ed56a66e5044699e7af6067232703d0b34f05068fc6234b/qdrant_client-1.13.3.tar.gz", hash = "sha256:61ca09e07c6d7ac0dfbdeb13dca4fe5f3e08fa430cb0d74d66ef5d023a70adfc", size = 266278 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/dd/b4/bd676f91f5234ab59282e4a110f324029684482cbe08e7a1c77b6338013b/qdrant_client-1.13.3-py3-none-any.whl", hash = "sha256:f52cacbb936e547d3fceb1aaed3e3c56be0ebfd48e8ea495ea3dbc89c671d1d2", size = 306674 }, +] + [[package]] name = "rapidfuzz" version = "3.12.2" @@ -3417,7 +3587,8 @@ source = { registry = "https://download.pytorch.org/whl/cpu" } resolution-markers = [ "python_full_version < '3.11' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and sys_platform == 'darwin'", - "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version >= '3.13' and sys_platform == 'darwin'", + "python_full_version == '3.12.*' and sys_platform == 'darwin'", ] dependencies = [ { name = "filelock", marker = "sys_platform == 'darwin'" }, @@ -3444,8 +3615,10 @@ resolution-markers = [ "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'", "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux')", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')", + "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", ] dependencies = [ { name = "filelock", marker = "sys_platform != 'darwin'" }, @@ -3482,8 +3655,10 @@ resolution-markers = [ "python_full_version < '3.11' and sys_platform == 'darwin'", "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", "python_full_version == '3.11.*' and sys_platform == 'darwin'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'", - "python_full_version >= '3.12' and sys_platform == 'darwin'", + "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "python_full_version == '3.12.*' and platform_machine == 'aarch64' and sys_platform == 'linux'", + "python_full_version >= '3.13' and sys_platform == 'darwin'", + "python_full_version == '3.12.*' and sys_platform == 'darwin'", ] dependencies = [ { name = "numpy", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'darwin'" }, @@ -3509,7 +3684,8 @@ source = { registry = "https://download.pytorch.org/whl/cpu" } resolution-markers = [ "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')", "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')", - "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')", + "(python_full_version == '3.12.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.12.*' and sys_platform != 'darwin' and sys_platform != 'linux')", ] dependencies = [ { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },