diff --git a/.github/actions/setup-runner/action.yml b/.github/actions/setup-runner/action.yml new file mode 100644 index 000000000..972dcbdae --- /dev/null +++ b/.github/actions/setup-runner/action.yml @@ -0,0 +1,22 @@ +name: Setup runner +description: Prepare a runner for the tests (install uv, python, project dependencies, etc.) +runs: + using: "composite" + steps: + - name: Install uv + uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 + with: + python-version: "3.10" + activate-environment: true + version: 0.7.6 + + - name: Install dependencies + shell: bash + run: | + uv sync --all-extras + uv pip install ollama faiss-cpu + # always test against the latest version of the client + # TODO: this is not necessarily a good idea. we need to test against both published and latest + # to find out backwards compatibility issues. + uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main + uv pip install -e . diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml index 82a76ad32..25f696c9e 100644 --- a/.github/workflows/integration-auth-tests.yml +++ b/.github/workflows/integration-auth-tests.yml @@ -23,23 +23,18 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - auth-provider: [kubernetes] + auth-provider: [oauth2_token] fail-fast: false # we want to run all tests regardless of failure steps: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - activate-environment: true + - name: Install dependencies + uses: ./.github/actions/setup-runner - - name: Set Up Environment and Install Dependencies + - name: Build Llama Stack run: | - uv sync --extra dev --extra test - uv pip install -e . llama stack build --template ollama --image-type venv - name: Install minikube @@ -47,29 +42,53 @@ jobs: uses: medyagh/setup-minikube@cea33675329b799adccc9526aa5daccc26cd5052 # v0.0.19 - name: Start minikube - if: ${{ matrix.auth-provider == 'kubernetes' }} + if: ${{ matrix.auth-provider == 'oauth2_token' }} run: | minikube start kubectl get pods -A - name: Configure Kube Auth - if: ${{ matrix.auth-provider == 'kubernetes' }} + if: ${{ matrix.auth-provider == 'oauth2_token' }} run: | kubectl create namespace llama-stack kubectl create serviceaccount llama-stack-auth -n llama-stack kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --serviceaccount=llama-stack:llama-stack-auth -n llama-stack kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token + cat <> $GITHUB_ENV + echo "KUBERNETES_API_SERVER_URL=$(kubectl get --raw /.well-known/openid-configuration| jq -r .jwks_uri)" >> $GITHUB_ENV echo "KUBERNETES_CA_CERT_PATH=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}')" >> $GITHUB_ENV + echo "KUBERNETES_ISSUER=$(kubectl get --raw /.well-known/openid-configuration| jq -r .issuer)" >> $GITHUB_ENV + echo "KUBERNETES_AUDIENCE=$(kubectl create token default --duration=1h | cut -d. -f2 | base64 -d | jq -r '.aud[0]')" >> $GITHUB_ENV - name: Set Kube Auth Config and run server env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" - if: ${{ matrix.auth-provider == 'kubernetes' }} + if: ${{ matrix.auth-provider == 'oauth2_token' }} run: | run_dir=$(mktemp -d) cat <<'EOF' > $run_dir/run.yaml @@ -81,10 +100,10 @@ jobs: port: 8321 EOF yq eval '.server.auth = {"provider_type": "${{ matrix.auth-provider }}"}' -i $run_dir/run.yaml - yq eval '.server.auth.config = {"api_server_url": "${{ env.KUBERNETES_API_SERVER_URL }}", "ca_cert_path": "${{ env.KUBERNETES_CA_CERT_PATH }}"}' -i $run_dir/run.yaml + yq eval '.server.auth.config = {"tls_cafile": "${{ env.KUBERNETES_CA_CERT_PATH }}", "issuer": "${{ env.KUBERNETES_ISSUER }}", "audience": "${{ env.KUBERNETES_AUDIENCE }}"}' -i $run_dir/run.yaml + yq eval '.server.auth.config.jwks = {"uri": "${{ env.KUBERNETES_API_SERVER_URL }}"}' -i $run_dir/run.yaml cat $run_dir/run.yaml - source .venv/bin/activate nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 & - name: Wait for Llama Stack server to be ready diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index da41e2185..2414522a7 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -32,24 +32,14 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - activate-environment: true + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Setup ollama uses: ./.github/actions/setup-ollama - - name: Set Up Environment and Install Dependencies + - name: Build Llama Stack run: | - uv sync --extra dev --extra test - uv pip install ollama faiss-cpu - # always test against the latest version of the client - # TODO: this is not necessarily a good idea. we need to test against both published and latest - # to find out backwards compatibility issues. - uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main - uv pip install -e . llama stack build --template ollama --image-type venv - name: Start Llama Stack server in background @@ -57,7 +47,6 @@ jobs: env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" run: | - source .venv/bin/activate LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv & - name: Wait for Llama Stack server to be ready @@ -85,6 +74,7 @@ jobs: echo "Ollama health check failed" exit 1 fi + - name: Check Storage and Memory Available Before Tests if: ${{ always() }} run: | diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index 3c1682833..cf53459b9 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -50,21 +50,8 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: '3.10' - - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - - - name: Install LlamaStack - run: | - uv venv - source .venv/bin/activate - uv pip install -e . + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Print build dependencies run: | @@ -79,7 +66,6 @@ jobs: - name: Print dependencies in the image if: matrix.image-type == 'venv' run: | - source test/bin/activate uv pip list build-single-provider: @@ -88,21 +74,8 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: '3.10' - - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - - - name: Install LlamaStack - run: | - uv venv - source .venv/bin/activate - uv pip install -e . + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Build a single provider run: | @@ -114,21 +87,8 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: '3.10' - - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - - - name: Install LlamaStack - run: | - uv venv - source .venv/bin/activate - uv pip install -e . + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Build a single provider run: | @@ -152,21 +112,8 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: '3.10' - - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - - - name: Install LlamaStack - run: | - uv venv - source .venv/bin/activate - uv pip install -e . + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Pin template to UBI9 base run: | diff --git a/.github/workflows/test-external-providers.yml b/.github/workflows/test-external-providers.yml index 2e18fc5eb..06ab7cf3c 100644 --- a/.github/workflows/test-external-providers.yml +++ b/.github/workflows/test-external-providers.yml @@ -25,15 +25,8 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - - - name: Set Up Environment and Install Dependencies - run: | - uv sync --extra dev --extra test - uv pip install -e . + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Apply image type to config file run: | @@ -59,7 +52,6 @@ jobs: env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" run: | - source ci-test/bin/activate uv run pip list nohup uv run --active llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 & diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index d2dd34e05..fc0459f0f 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -30,17 +30,11 @@ jobs: - "3.12" - "3.13" steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - name: Checkout repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python ${{ matrix.python }} - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: ${{ matrix.python }} - - - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: ${{ matrix.python }} - enable-cache: false + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Run unit tests run: | diff --git a/.github/workflows/update-readthedocs.yml b/.github/workflows/update-readthedocs.yml index 04e23bca9..981332a77 100644 --- a/.github/workflows/update-readthedocs.yml +++ b/.github/workflows/update-readthedocs.yml @@ -37,16 +37,8 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: '3.11' - - - name: Install the latest version of uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - - - name: Sync with uv - run: uv sync --extra docs + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Build HTML run: | diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d7c3e3e2f..8f71a6ba1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -167,14 +167,11 @@ If you have made changes to a provider's configuration in any form (introducing If you are making changes to the documentation at [https://llama-stack.readthedocs.io/en/latest/](https://llama-stack.readthedocs.io/en/latest/), you can use the following command to build the documentation and preview your changes. You will need [Sphinx](https://www.sphinx-doc.org/en/master/) and the readthedocs theme. ```bash -cd docs -uv sync --extra docs - # This rebuilds the documentation pages. -uv run make html +uv run --with ".[docs]" make -C docs/ html # This will start a local server (usually at http://127.0.0.1:8000) that automatically rebuilds and refreshes when you make changes to the documentation. -uv run sphinx-autobuild source build/html --write-all +uv run --with ".[docs]" sphinx-autobuild docs/source docs/build/html --write-all ``` ### Update API Documentation diff --git a/docs/readme.md b/docs/readme.md index b88a4738d..d84dbe6eb 100644 --- a/docs/readme.md +++ b/docs/readme.md @@ -3,10 +3,10 @@ Here's a collection of comprehensive guides, examples, and resources for building AI applications with Llama Stack. For the complete documentation, visit our [ReadTheDocs page](https://llama-stack.readthedocs.io/en/latest/index.html). ## Render locally + +From the llama-stack root directory, run the following command to render the docs locally: ```bash -pip install -r requirements.txt -cd docs -python -m sphinx_autobuild source _build +uv run --with ".[docs]" sphinx-autobuild docs/source docs/build/html --write-all ``` You can open up the docs in your browser at http://localhost:8000 diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 6cd45c33b..000000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,16 +0,0 @@ -linkify -myst-parser --e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme -sphinx==8.1.3 -sphinx-copybutton -sphinx-design -sphinx-pdj-theme -sphinx-rtd-theme>=1.0.0 -sphinx-tabs -sphinx_autobuild -sphinx_rtd_dark_mode -sphinxcontrib-mermaid -sphinxcontrib-openapi -sphinxcontrib-redoc -sphinxcontrib-video -tomli diff --git a/docs/source/conf.py b/docs/source/conf.py index 501a923dd..43e8dbdd5 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -53,14 +53,6 @@ myst_enable_extensions = ["colon_fence"] html_theme = "sphinx_rtd_theme" html_use_relative_paths = True - -# html_theme = "sphinx_pdj_theme" -# html_theme_path = [sphinx_pdj_theme.get_html_theme_path()] - -# html_theme = "pytorch_sphinx_theme" -# html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] - - templates_path = ["_templates"] exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md index d9b73c910..0dbabf8aa 100644 --- a/docs/source/distributions/building_distro.md +++ b/docs/source/distributions/building_distro.md @@ -338,6 +338,48 @@ INFO: Application startup complete. INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit) INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK ``` +### Listing Distributions +Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files. + +``` +llama stack list -h +usage: llama stack list [-h] + +list the build stacks + +options: + -h, --help show this help message and exit +``` + +Example Usage + +``` +llama stack list +``` + +### Removing a Distribution +Use the remove command to delete a distribution you've previously built. + +``` +llama stack rm -h +usage: llama stack rm [-h] [--all] [name] + +Remove the build stack + +positional arguments: + name Name of the stack to delete (default: None) + +options: + -h, --help show this help message and exit + --all, -a Delete all stacks (use with caution) (default: False) +``` + +Example +``` +llama stack rm llamastack-test +``` + +To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm ` to delete them when they’re no longer needed. ### Troubleshooting diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index b62227a84..de99b6576 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -118,11 +118,6 @@ server: port: 8321 # Port to listen on (default: 8321) tls_certfile: "/path/to/cert.pem" # Optional: Path to TLS certificate for HTTPS tls_keyfile: "/path/to/key.pem" # Optional: Path to TLS key for HTTPS - auth: # Optional: Authentication configuration - provider_type: "kubernetes" # Type of auth provider - config: # Provider-specific configuration - api_server_url: "https://kubernetes.default.svc" - ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate ``` ### Authentication Configuration @@ -135,7 +130,7 @@ Authorization: Bearer The server supports multiple authentication providers: -#### Kubernetes Provider +#### OAuth 2.0/OpenID Connect Provider with Kubernetes The Kubernetes cluster must be configured to use a service account for authentication. @@ -146,14 +141,67 @@ kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --se kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token ``` -Validates tokens against the Kubernetes API server: +Make sure the `kube-apiserver` runs with `--anonymous-auth=true` to allow unauthenticated requests +and that the correct RoleBinding is created to allow the service account to access the necessary +resources. If that is not the case, you can create a RoleBinding for the service account to access +the necessary resources: + +```yaml +# allow-anonymous-openid.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: allow-anonymous-openid +rules: +- nonResourceURLs: ["/openid/v1/jwks"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: allow-anonymous-openid +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: allow-anonymous-openid +subjects: +- kind: User + name: system:anonymous + apiGroup: rbac.authorization.k8s.io +``` + +And then apply the configuration: +```bash +kubectl apply -f allow-anonymous-openid.yaml +``` + +Validates tokens against the Kubernetes API server through the OIDC provider: ```yaml server: auth: - provider_type: "kubernetes" + provider_type: "oauth2_token" config: - api_server_url: "https://kubernetes.default.svc" # URL of the Kubernetes API server - ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate + jwks: + uri: "https://kubernetes.default.svc" + key_recheck_period: 3600 + tls_cafile: "/path/to/ca.crt" + issuer: "https://kubernetes.default.svc" + audience: "https://kubernetes.default.svc" +``` + +To find your cluster's audience, run: +```bash +kubectl create token default --duration=1h | cut -d. -f2 | base64 -d | jq .aud +``` + +For the issuer, you can use the OIDC provider's URL: +```bash +kubectl get --raw /.well-known/openid-configuration| jq .issuer +``` + +For the tls_cafile, you can use the CA certificate of the OIDC provider: +```bash +kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}' ``` The provider extracts user information from the JWT token: @@ -208,6 +256,80 @@ And must respond with: If no access attributes are returned, the token is used as a namespace. +### Quota Configuration + +The `quota` section allows you to enable server-side request throttling for both +authenticated and anonymous clients. This is useful for preventing abuse, enforcing +fairness across tenants, and controlling infrastructure costs without requiring +client-side rate limiting or external proxies. + +Quotas are disabled by default. When enabled, each client is tracked using either: + +* Their authenticated `client_id` (derived from the Bearer token), or +* Their IP address (fallback for anonymous requests) + +Quota state is stored in a SQLite-backed key-value store, and rate limits are applied +within a configurable time window (currently only `day` is supported). + +#### Example + +```yaml +server: + quota: + kvstore: + type: sqlite + db_path: ./quotas.db + anonymous_max_requests: 100 + authenticated_max_requests: 1000 + period: day +``` + +#### Configuration Options + +| Field | Description | +| ---------------------------- | -------------------------------------------------------------------------- | +| `kvstore` | Required. Backend storage config for tracking request counts. | +| `kvstore.type` | Must be `"sqlite"` for now. Other backends may be supported in the future. | +| `kvstore.db_path` | File path to the SQLite database. | +| `anonymous_max_requests` | Max requests per period for unauthenticated clients. | +| `authenticated_max_requests` | Max requests per period for authenticated clients. | +| `period` | Time window for quota enforcement. Only `"day"` is supported. | + +> Note: if `authenticated_max_requests` is set but no authentication provider is +configured, the server will fall back to applying `anonymous_max_requests` to all +clients. + +#### Example with Authentication Enabled + +```yaml +server: + port: 8321 + auth: + provider_type: custom + config: + endpoint: https://auth.example.com/validate + quota: + kvstore: + type: sqlite + db_path: ./quotas.db + anonymous_max_requests: 100 + authenticated_max_requests: 1000 + period: day +``` + +If a client exceeds their limit, the server responds with: + +```http +HTTP/1.1 429 Too Many Requests +Content-Type: application/json + +{ + "error": { + "message": "Quota exceeded" + } +} +``` + ## Extending to handle Safety Configuring Safety can be a little involved so it is instructive to go through an example. diff --git a/llama_stack/cli/stack/list_stacks.py b/llama_stack/cli/stack/list_stacks.py new file mode 100644 index 000000000..2ea0fdeea --- /dev/null +++ b/llama_stack/cli/stack/list_stacks.py @@ -0,0 +1,56 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import argparse +from pathlib import Path + +from llama_stack.cli.subcommand import Subcommand +from llama_stack.cli.table import print_table + + +class StackListBuilds(Subcommand): + """List built stacks in .llama/distributions directory""" + + def __init__(self, subparsers: argparse._SubParsersAction): + super().__init__() + self.parser = subparsers.add_parser( + "list", + prog="llama stack list", + description="list the build stacks", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + self._add_arguments() + self.parser.set_defaults(func=self._list_stack_command) + + def _get_distribution_dirs(self) -> dict[str, Path]: + """Return a dictionary of distribution names and their paths""" + distributions = {} + dist_dir = Path.home() / ".llama" / "distributions" + + if dist_dir.exists(): + for stack_dir in dist_dir.iterdir(): + if stack_dir.is_dir(): + distributions[stack_dir.name] = stack_dir + return distributions + + def _list_stack_command(self, args: argparse.Namespace) -> None: + distributions = self._get_distribution_dirs() + + if not distributions: + print("No stacks found in ~/.llama/distributions") + return + + headers = ["Stack Name", "Path"] + headers.extend(["Build Config", "Run Config"]) + rows = [] + for name, path in distributions.items(): + row = [name, str(path)] + # Check for build and run config files + build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No" + run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No" + row.extend([build_config, run_config]) + rows.append(row) + print_table(rows, headers, separate_rows=True) diff --git a/llama_stack/cli/stack/remove.py b/llama_stack/cli/stack/remove.py new file mode 100644 index 000000000..be7c49a5d --- /dev/null +++ b/llama_stack/cli/stack/remove.py @@ -0,0 +1,116 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import argparse +import shutil +import sys +from pathlib import Path + +from termcolor import cprint + +from llama_stack.cli.subcommand import Subcommand +from llama_stack.cli.table import print_table + + +class StackRemove(Subcommand): + """Remove the build stack""" + + def __init__(self, subparsers: argparse._SubParsersAction): + super().__init__() + self.parser = subparsers.add_parser( + "rm", + prog="llama stack rm", + description="Remove the build stack", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + self._add_arguments() + self.parser.set_defaults(func=self._remove_stack_build_command) + + def _add_arguments(self) -> None: + self.parser.add_argument( + "name", + type=str, + nargs="?", + help="Name of the stack to delete", + ) + self.parser.add_argument( + "--all", + "-a", + action="store_true", + help="Delete all stacks (use with caution)", + ) + + def _get_distribution_dirs(self) -> dict[str, Path]: + """Return a dictionary of distribution names and their paths""" + distributions = {} + dist_dir = Path.home() / ".llama" / "distributions" + + if dist_dir.exists(): + for stack_dir in dist_dir.iterdir(): + if stack_dir.is_dir(): + distributions[stack_dir.name] = stack_dir + return distributions + + def _list_stacks(self) -> None: + """Display available stacks in a table""" + distributions = self._get_distribution_dirs() + if not distributions: + print("No stacks found in ~/.llama/distributions") + return + + headers = ["Stack Name", "Path"] + rows = [[name, str(path)] for name, path in distributions.items()] + print_table(rows, headers, separate_rows=True) + + def _remove_stack_build_command(self, args: argparse.Namespace) -> None: + distributions = self._get_distribution_dirs() + + if args.all: + confirm = input("Are you sure you want to delete ALL stacks? [yes-i-really-want/N] ").lower() + if confirm != "yes-i-really-want": + print("Deletion cancelled.") + return + + for name, path in distributions.items(): + try: + shutil.rmtree(path) + print(f"Deleted stack: {name}") + except Exception as e: + cprint( + f"Failed to delete stack {name}: {e}", + color="red", + ) + sys.exit(2) + + if not args.name: + self._list_stacks() + if not args.name: + return + + if args.name not in distributions: + self._list_stacks() + cprint( + f"Stack not found: {args.name}", + color="red", + ) + return + + stack_path = distributions[args.name] + + confirm = input(f"Are you sure you want to delete stack '{args.name}'? [y/N] ").lower() + if confirm != "y": + print("Deletion cancelled.") + return + + try: + shutil.rmtree(stack_path) + print(f"Successfully deleted stack: {args.name}") + except Exception as e: + cprint( + f"Failed to delete stack {args.name}: {e}", + color="red", + ) + sys.exit(2) diff --git a/llama_stack/cli/stack/stack.py b/llama_stack/cli/stack/stack.py index ccf1a5ffc..3aff78e23 100644 --- a/llama_stack/cli/stack/stack.py +++ b/llama_stack/cli/stack/stack.py @@ -7,12 +7,14 @@ import argparse from importlib.metadata import version +from llama_stack.cli.stack.list_stacks import StackListBuilds from llama_stack.cli.stack.utils import print_subcommand_description from llama_stack.cli.subcommand import Subcommand from .build import StackBuild from .list_apis import StackListApis from .list_providers import StackListProviders +from .remove import StackRemove from .run import StackRun @@ -41,5 +43,6 @@ class StackParser(Subcommand): StackListApis.create(subparsers) StackListProviders.create(subparsers) StackRun.create(subparsers) - + StackRemove.create(subparsers) + StackListBuilds.create(subparsers) print_subcommand_description(self.parser, subparsers) diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index 446a88ca0..eb790ad93 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -25,7 +25,7 @@ from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput from llama_stack.apis.vector_io import VectorIO from llama_stack.providers.datatypes import Api, ProviderSpec -from llama_stack.providers.utils.kvstore.config import KVStoreConfig +from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig LLAMA_STACK_BUILD_CONFIG_VERSION = "2" LLAMA_STACK_RUN_CONFIG_VERSION = "2" @@ -220,21 +220,34 @@ class LoggingConfig(BaseModel): class AuthProviderType(str, Enum): """Supported authentication provider types.""" - KUBERNETES = "kubernetes" + OAUTH2_TOKEN = "oauth2_token" CUSTOM = "custom" class AuthenticationConfig(BaseModel): provider_type: AuthProviderType = Field( ..., - description="Type of authentication provider (e.g., 'kubernetes', 'custom')", + description="Type of authentication provider", ) - config: dict[str, str] = Field( + config: dict[str, Any] = Field( ..., description="Provider-specific configuration", ) +class QuotaPeriod(str, Enum): + DAY = "day" + + +class QuotaConfig(BaseModel): + kvstore: SqliteKVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") + anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period") + authenticated_max_requests: int = Field( + default=1000, description="Max requests for authenticated clients per period" + ) + period: QuotaPeriod = Field(default=QuotaPeriod.DAY, description="Quota period to set") + + class ServerConfig(BaseModel): port: int = Field( default=8321, @@ -262,6 +275,10 @@ class ServerConfig(BaseModel): default=None, description="The host the server should listen on", ) + quota: QuotaConfig | None = Field( + default=None, + description="Per client quota request configuration", + ) class StackRunConfig(BaseModel): diff --git a/llama_stack/distribution/server/auth.py b/llama_stack/distribution/server/auth.py index 83436c51f..fb26b49a7 100644 --- a/llama_stack/distribution/server/auth.py +++ b/llama_stack/distribution/server/auth.py @@ -8,7 +8,8 @@ import json import httpx -from llama_stack.distribution.server.auth_providers import AuthProviderConfig, create_auth_provider +from llama_stack.distribution.datatypes import AuthenticationConfig +from llama_stack.distribution.server.auth_providers import create_auth_provider from llama_stack.log import get_logger logger = get_logger(name=__name__, category="auth") @@ -77,7 +78,7 @@ class AuthenticationMiddleware: access resources that don't have access_attributes defined. """ - def __init__(self, app, auth_config: AuthProviderConfig): + def __init__(self, app, auth_config: AuthenticationConfig): self.app = app self.auth_provider = create_auth_provider(auth_config) @@ -113,6 +114,10 @@ class AuthenticationMiddleware: "roles": [token], } + # Store the client ID in the request scope so that downstream middleware (like QuotaMiddleware) + # can identify the requester and enforce per-client rate limits. + scope["authenticated_client_id"] = token + # Store attributes in request scope scope["user_attributes"] = user_attributes scope["principal"] = validation_result.principal diff --git a/llama_stack/distribution/server/auth_providers.py b/llama_stack/distribution/server/auth_providers.py index b73fded58..723a65b77 100644 --- a/llama_stack/distribution/server/auth_providers.py +++ b/llama_stack/distribution/server/auth_providers.py @@ -4,18 +4,19 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import json +import ssl import time from abc import ABC, abstractmethod from asyncio import Lock -from enum import Enum +from pathlib import Path from urllib.parse import parse_qs import httpx from jose import jwt -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field, field_validator, model_validator +from typing_extensions import Self -from llama_stack.distribution.datatypes import AccessAttributes +from llama_stack.distribution.datatypes import AccessAttributes, AuthenticationConfig, AuthProviderType from llama_stack.log import get_logger logger = get_logger(name=__name__, category="auth") @@ -73,21 +74,6 @@ class AuthRequest(BaseModel): request: AuthRequestContext = Field(description="Context information about the request being authenticated") -class AuthProviderType(str, Enum): - """Supported authentication provider types.""" - - KUBERNETES = "kubernetes" - CUSTOM = "custom" - OAUTH2_TOKEN = "oauth2_token" - - -class AuthProviderConfig(BaseModel): - """Base configuration for authentication providers.""" - - provider_type: AuthProviderType = Field(..., description="Type of authentication provider") - config: dict[str, str] = Field(..., description="Provider-specific configuration") - - class AuthProvider(ABC): """Abstract base class for authentication providers.""" @@ -102,83 +88,6 @@ class AuthProvider(ABC): pass -class KubernetesAuthProviderConfig(BaseModel): - api_server_url: str - ca_cert_path: str | None = None - - -class KubernetesAuthProvider(AuthProvider): - """Kubernetes authentication provider that validates tokens against the Kubernetes API server.""" - - def __init__(self, config: KubernetesAuthProviderConfig): - self.config = config - self._client = None - - async def _get_client(self): - """Get or create a Kubernetes client.""" - if self._client is None: - # kubernetes-client has not async support, see: - # https://github.com/kubernetes-client/python/issues/323 - from kubernetes import client - from kubernetes.client import ApiClient - - # Configure the client - configuration = client.Configuration() - configuration.host = self.config.api_server_url - if self.config.ca_cert_path: - configuration.ssl_ca_cert = self.config.ca_cert_path - configuration.verify_ssl = bool(self.config.ca_cert_path) - - # Create API client - self._client = ApiClient(configuration) - return self._client - - async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult: - """Validate a Kubernetes token and return access attributes.""" - try: - client = await self._get_client() - - # Set the token in the client - client.set_default_header("Authorization", f"Bearer {token}") - - # Make a request to validate the token - # We use the /api endpoint which requires authentication - from kubernetes.client import CoreV1Api - - api = CoreV1Api(client) - api.get_api_resources(_request_timeout=3.0) # Set timeout for this specific request - - # If we get here, the token is valid - # Extract user info from the token claims - import base64 - - # Decode the token (without verification since we've already validated it) - token_parts = token.split(".") - payload = json.loads(base64.b64decode(token_parts[1] + "=" * (-len(token_parts[1]) % 4))) - - # Extract user information from the token - username = payload.get("sub", "") - groups = payload.get("groups", []) - - return TokenValidationResult( - principal=username, - access_attributes=AccessAttributes( - roles=[username], # Use username as a role - teams=groups, # Use Kubernetes groups as teams - ), - ) - - except Exception as e: - logger.exception("Failed to validate Kubernetes token") - raise ValueError("Invalid or expired token") from e - - async def close(self): - """Close the HTTP client.""" - if self._client: - self._client.close() - self._client = None - - def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> AccessAttributes: attributes = AccessAttributes() for claim_key, attribute_key in mapping.items(): @@ -198,11 +107,24 @@ def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) return attributes -class OAuth2TokenAuthProviderConfig(BaseModel): +class OAuth2JWKSConfig(BaseModel): # The JWKS URI for collecting public keys - jwks_uri: str - cache_ttl: int = 3600 + uri: str + key_recheck_period: int = Field(default=3600, description="The period to recheck the JWKS URI for key updates") + + +class OAuth2IntrospectionConfig(BaseModel): + url: str + client_id: str + client_secret: str + send_secret_in_body: bool = False + + +class OAuth2TokenAuthProviderConfig(BaseModel): audience: str = "llama-stack" + verify_tls: bool = True + tls_cafile: Path | None = None + issuer: str | None = Field(default=None, description="The OIDC issuer URL.") claims_mapping: dict[str, str] = Field( default_factory=lambda: { "sub": "roles", @@ -214,6 +136,8 @@ class OAuth2TokenAuthProviderConfig(BaseModel): "namespace": "namespaces", }, ) + jwks: OAuth2JWKSConfig | None + introspection: OAuth2IntrospectionConfig | None = None @classmethod @field_validator("claims_mapping") @@ -225,6 +149,14 @@ class OAuth2TokenAuthProviderConfig(BaseModel): raise ValueError(f"claims_mapping value is not a valid attribute: {value}") return v + @model_validator(mode="after") + def validate_mode(self) -> Self: + if not self.jwks and not self.introspection: + raise ValueError("One of jwks or introspection must be configured") + if self.jwks and self.introspection: + raise ValueError("At present only one of jwks or introspection should be configured") + return self + class OAuth2TokenAuthProvider(AuthProvider): """ @@ -240,6 +172,13 @@ class OAuth2TokenAuthProvider(AuthProvider): self._jwks_lock = Lock() async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult: + if self.config.jwks: + return await self.validate_jwt_token(token, scope) + if self.config.introspection: + return await self.introspect_token(token, scope) + raise ValueError("One of jwks or introspection must be configured") + + async def validate_jwt_token(self, token: str, scope: dict | None = None) -> TokenValidationResult: """Validate a token using the JWT token.""" await self._refresh_jwks() @@ -255,7 +194,7 @@ class OAuth2TokenAuthProvider(AuthProvider): key_data, algorithms=[algorithm], audience=self.config.audience, - options={"verify_exp": True}, + issuer=self.config.issuer, ) except Exception as exc: raise ValueError(f"Invalid JWT token: {token}") from exc @@ -269,14 +208,75 @@ class OAuth2TokenAuthProvider(AuthProvider): access_attributes=access_attributes, ) + async def introspect_token(self, token: str, scope: dict | None = None) -> TokenValidationResult: + """Validate a token using token introspection as defined by RFC 7662.""" + form = { + "token": token, + } + if self.config.introspection is None: + raise ValueError("Introspection is not configured") + + if self.config.introspection.send_secret_in_body: + form["client_id"] = self.config.introspection.client_id + form["client_secret"] = self.config.introspection.client_secret + auth = None + else: + auth = (self.config.introspection.client_id, self.config.introspection.client_secret) + ssl_ctxt = None + if self.config.tls_cafile: + ssl_ctxt = ssl.create_default_context(cafile=self.config.tls_cafile.as_posix()) + try: + async with httpx.AsyncClient(verify=ssl_ctxt) as client: + response = await client.post( + self.config.introspection.url, + data=form, + auth=auth, + timeout=10.0, # Add a reasonable timeout + ) + if response.status_code != 200: + logger.warning(f"Token introspection failed with status code: {response.status_code}") + raise ValueError(f"Token introspection failed: {response.status_code}") + + fields = response.json() + if not fields["active"]: + raise ValueError("Token not active") + principal = fields["sub"] or fields["username"] + access_attributes = get_attributes_from_claims(fields, self.config.claims_mapping) + return TokenValidationResult( + principal=principal, + access_attributes=access_attributes, + ) + except httpx.TimeoutException: + logger.exception("Token introspection request timed out") + raise + except ValueError: + # Re-raise ValueError exceptions to preserve their message + raise + except Exception as e: + logger.exception("Error during token introspection") + raise ValueError("Token introspection error") from e + async def close(self): - """Close the HTTP client.""" + pass async def _refresh_jwks(self) -> None: + """ + Refresh the JWKS cache. + + This is a simple cache that expires after a certain amount of time (defined by `key_recheck_period`). + If the cache is expired, we refresh the JWKS from the JWKS URI. + + Notes: for Kubernetes which doesn't fully implement the OIDC protocol: + * It doesn't have user authentication flows + * It doesn't have refresh tokens + """ async with self._jwks_lock: - if time.time() - self._jwks_at > self.config.cache_ttl: - async with httpx.AsyncClient() as client: - res = await client.get(self.config.jwks_uri, timeout=5) + if self.config.jwks is None: + raise ValueError("JWKS is not configured") + if time.time() - self._jwks_at > self.config.jwks.key_recheck_period: + verify = self.config.tls_cafile.as_posix() if self.config.tls_cafile else self.config.verify_tls + async with httpx.AsyncClient(verify=verify) as client: + res = await client.get(self.config.jwks.uri, timeout=5) res.raise_for_status() jwks_data = res.json()["keys"] updated = {} @@ -363,13 +363,11 @@ class CustomAuthProvider(AuthProvider): self._client = None -def create_auth_provider(config: AuthProviderConfig) -> AuthProvider: +def create_auth_provider(config: AuthenticationConfig) -> AuthProvider: """Factory function to create the appropriate auth provider.""" provider_type = config.provider_type.lower() - if provider_type == "kubernetes": - return KubernetesAuthProvider(KubernetesAuthProviderConfig.model_validate(config.config)) - elif provider_type == "custom": + if provider_type == "custom": return CustomAuthProvider(CustomAuthProviderConfig.model_validate(config.config)) elif provider_type == "oauth2_token": return OAuth2TokenAuthProvider(OAuth2TokenAuthProviderConfig.model_validate(config.config)) diff --git a/llama_stack/distribution/server/quota.py b/llama_stack/distribution/server/quota.py new file mode 100644 index 000000000..ddbffae64 --- /dev/null +++ b/llama_stack/distribution/server/quota.py @@ -0,0 +1,110 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +import time +from datetime import datetime, timedelta, timezone + +from starlette.types import ASGIApp, Receive, Scope, Send + +from llama_stack.log import get_logger +from llama_stack.providers.utils.kvstore.api import KVStore +from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl + +logger = get_logger(name=__name__, category="quota") + + +class QuotaMiddleware: + """ + ASGI middleware that enforces separate quotas for authenticated and anonymous clients + within a configurable time window. + + - For authenticated requests, it reads the client ID from the + `Authorization: Bearer ` header. + - For anonymous requests, it falls back to the IP address of the client. + Requests are counted in a KV store (e.g., SQLite), and HTTP 429 is returned + once a client exceeds its quota. + """ + + def __init__( + self, + app: ASGIApp, + kv_config: KVStoreConfig, + anonymous_max_requests: int, + authenticated_max_requests: int, + window_seconds: int = 86400, + ): + self.app = app + self.kv_config = kv_config + self.kv: KVStore | None = None + self.anonymous_max_requests = anonymous_max_requests + self.authenticated_max_requests = authenticated_max_requests + self.window_seconds = window_seconds + + if isinstance(self.kv_config, SqliteKVStoreConfig): + logger.warning( + "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. " + f"window_seconds={self.window_seconds}" + ) + + async def _get_kv(self) -> KVStore: + if self.kv is None: + self.kv = await kvstore_impl(self.kv_config) + return self.kv + + async def __call__(self, scope: Scope, receive: Receive, send: Send): + if scope["type"] == "http": + # pick key & limit based on auth + auth_id = scope.get("authenticated_client_id") + if auth_id: + key_id = auth_id + limit = self.authenticated_max_requests + else: + # fallback to IP + client = scope.get("client") + key_id = client[0] if client else "anonymous" + limit = self.anonymous_max_requests + + current_window = int(time.time() // self.window_seconds) + key = f"quota:{key_id}:{current_window}" + + try: + kv = await self._get_kv() + prev = await kv.get(key) or "0" + count = int(prev) + 1 + + if int(prev) == 0: + # Set with expiration datetime when it is the first request in the window. + expiration = datetime.now(timezone.utc) + timedelta(seconds=self.window_seconds) + await kv.set(key, str(count), expiration=expiration) + else: + await kv.set(key, str(count)) + except Exception: + logger.exception("Failed to access KV store for quota") + return await self._send_error(send, 500, "Quota service error") + + if count > limit: + logger.warning( + "Quota exceeded for client %s: %d/%d", + key_id, + count, + limit, + ) + return await self._send_error(send, 429, "Quota exceeded") + + return await self.app(scope, receive, send) + + async def _send_error(self, send: Send, status: int, message: str): + await send( + { + "type": "http.response.start", + "status": status, + "headers": [[b"content-type", b"application/json"]], + } + ) + body = json.dumps({"error": {"message": message}}).encode() + await send({"type": "http.response.body", "body": body}) diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index e25bf0817..52f2b71b0 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -60,6 +60,7 @@ from llama_stack.providers.utils.telemetry.tracing import ( from .auth import AuthenticationMiddleware from .endpoints import get_all_api_endpoints +from .quota import QuotaMiddleware REPO_ROOT = Path(__file__).parent.parent.parent.parent @@ -434,6 +435,35 @@ def main(args: argparse.Namespace | None = None): if config.server.auth: logger.info(f"Enabling authentication with provider: {config.server.auth.provider_type.value}") app.add_middleware(AuthenticationMiddleware, auth_config=config.server.auth) + else: + if config.server.quota: + quota = config.server.quota + logger.warning( + "Configured authenticated_max_requests (%d) but no auth is enabled; " + "falling back to anonymous_max_requests (%d) for all the requests", + quota.authenticated_max_requests, + quota.anonymous_max_requests, + ) + + if config.server.quota: + logger.info("Enabling quota middleware for authenticated and anonymous clients") + + quota = config.server.quota + anonymous_max_requests = quota.anonymous_max_requests + # if auth is disabled, use the anonymous max requests + authenticated_max_requests = quota.authenticated_max_requests if config.server.auth else anonymous_max_requests + + kv_config = quota.kvstore + window_map = {"day": 86400} + window_seconds = window_map[quota.period.value] + + app.add_middleware( + QuotaMiddleware, + kv_config=kv_config, + anonymous_max_requests=anonymous_max_requests, + authenticated_max_requests=authenticated_max_requests, + window_seconds=window_seconds, + ) try: impls = asyncio.run(construct_stack(config)) diff --git a/pyproject.toml b/pyproject.toml index a3ad3e530..6b873968a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,6 @@ dependencies = [ "tiktoken", "pillow", "h11>=0.16.0", - "kubernetes", ] [project.optional-dependencies] @@ -94,6 +93,7 @@ test = [ docs = [ "sphinx-autobuild", "myst-parser", + "sphinx", "sphinx-rtd-theme", "sphinx_rtd_dark_mode", "sphinx-copybutton", @@ -103,6 +103,8 @@ docs = [ "sphinxcontrib.video", "sphinxcontrib.mermaid", "tomli", + "linkify", + "sphinxcontrib.openapi", ] codegen = ["rich", "pydantic", "jinja2>=3.1.6"] ui = [ diff --git a/requirements.txt b/requirements.txt index 6dfcc1024..2fe72c803 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,19 +4,16 @@ annotated-types==0.7.0 anyio==4.8.0 attrs==25.1.0 blobfile==3.0.0 -cachetools==5.5.2 certifi==2025.1.31 charset-normalizer==3.4.1 click==8.1.8 colorama==0.4.6 ; sys_platform == 'win32' distro==1.9.0 -durationpy==0.9 ecdsa==0.19.1 exceptiongroup==1.2.2 ; python_full_version < '3.11' filelock==3.17.0 fire==0.7.0 fsspec==2024.12.0 -google-auth==2.38.0 h11==0.16.0 httpcore==1.0.9 httpx==0.28.1 @@ -26,14 +23,12 @@ jinja2==3.1.6 jiter==0.8.2 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 -kubernetes==32.0.1 llama-stack-client==0.2.7 lxml==5.3.1 markdown-it-py==3.0.0 markupsafe==3.0.2 mdurl==0.1.2 numpy==2.2.3 -oauthlib==3.2.2 openai==1.71.0 packaging==24.2 pandas==2.2.3 @@ -41,7 +36,6 @@ pillow==11.1.0 prompt-toolkit==3.0.50 pyaml==25.1.0 pyasn1==0.4.8 -pyasn1-modules==0.4.1 pycryptodomex==3.21.0 pydantic==2.10.6 pydantic-core==2.27.2 @@ -54,7 +48,6 @@ pyyaml==6.0.2 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 -requests-oauthlib==2.0.0 rich==13.9.4 rpds-py==0.22.3 rsa==4.9 @@ -68,4 +61,3 @@ typing-extensions==4.12.2 tzdata==2025.1 urllib3==2.3.0 wcwidth==0.2.13 -websocket-client==1.8.0 diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py index f15ca9de4..408acb88a 100644 --- a/tests/unit/server/test_auth.py +++ b/tests/unit/server/test_auth.py @@ -11,12 +11,10 @@ import pytest from fastapi import FastAPI from fastapi.testclient import TestClient -from llama_stack.distribution.datatypes import AccessAttributes +from llama_stack.distribution.datatypes import AuthenticationConfig from llama_stack.distribution.server.auth import AuthenticationMiddleware from llama_stack.distribution.server.auth_providers import ( - AuthProviderConfig, AuthProviderType, - TokenValidationResult, get_attributes_from_claims, ) @@ -62,7 +60,7 @@ def invalid_token(): @pytest.fixture def http_app(mock_auth_endpoint): app = FastAPI() - auth_config = AuthProviderConfig( + auth_config = AuthenticationConfig( provider_type=AuthProviderType.CUSTOM, config={"endpoint": mock_auth_endpoint}, ) @@ -78,7 +76,7 @@ def http_app(mock_auth_endpoint): @pytest.fixture def k8s_app(): app = FastAPI() - auth_config = AuthProviderConfig( + auth_config = AuthenticationConfig( provider_type=AuthProviderType.KUBERNETES, config={"api_server_url": "https://kubernetes.default.svc"}, ) @@ -118,7 +116,7 @@ def mock_scope(): @pytest.fixture def mock_http_middleware(mock_auth_endpoint): mock_app = AsyncMock() - auth_config = AuthProviderConfig( + auth_config = AuthenticationConfig( provider_type=AuthProviderType.CUSTOM, config={"endpoint": mock_auth_endpoint}, ) @@ -128,7 +126,7 @@ def mock_http_middleware(mock_auth_endpoint): @pytest.fixture def mock_k8s_middleware(): mock_app = AsyncMock() - auth_config = AuthProviderConfig( + auth_config = AuthenticationConfig( provider_type=AuthProviderType.KUBERNETES, config={"api_server_url": "https://kubernetes.default.svc"}, ) @@ -284,120 +282,19 @@ async def test_http_middleware_no_attributes(mock_http_middleware, mock_scope): assert attributes["roles"] == ["test.jwt.token"] -# Kubernetes Tests -def test_missing_auth_header_k8s(k8s_client): - response = k8s_client.get("/test") - assert response.status_code == 401 - assert "Missing or invalid Authorization header" in response.json()["error"]["message"] - - -def test_invalid_auth_header_format_k8s(k8s_client): - response = k8s_client.get("/test", headers={"Authorization": "InvalidFormat token123"}) - assert response.status_code == 401 - assert "Missing or invalid Authorization header" in response.json()["error"]["message"] - - -@patch("kubernetes.client.ApiClient") -def test_valid_k8s_authentication(mock_api_client, k8s_client, valid_token): - # Mock the Kubernetes client - mock_client = AsyncMock() - mock_api_client.return_value = mock_client - - # Mock successful token validation - mock_client.set_default_header = AsyncMock() - - # Mock the token validation to return valid access attributes - with patch("llama_stack.distribution.server.auth_providers.KubernetesAuthProvider.validate_token") as mock_validate: - mock_validate.return_value = TokenValidationResult( - principal="test-principal", - access_attributes=AccessAttributes( - roles=["admin"], teams=["ml-team"], projects=["llama-3"], namespaces=["research"] - ), - ) - response = k8s_client.get("/test", headers={"Authorization": f"Bearer {valid_token}"}) - assert response.status_code == 200 - assert response.json() == {"message": "Authentication successful"} - - -@patch("kubernetes.client.ApiClient") -def test_invalid_k8s_authentication(mock_api_client, k8s_client, invalid_token): - # Mock the Kubernetes client - mock_client = AsyncMock() - mock_api_client.return_value = mock_client - - # Mock failed token validation by raising an exception - with patch("llama_stack.distribution.server.auth_providers.KubernetesAuthProvider.validate_token") as mock_validate: - mock_validate.side_effect = ValueError("Invalid or expired token") - response = k8s_client.get("/test", headers={"Authorization": f"Bearer {invalid_token}"}) - assert response.status_code == 401 - assert "Invalid or expired token" in response.json()["error"]["message"] - - -@pytest.mark.asyncio -async def test_k8s_middleware_with_access_attributes(mock_k8s_middleware, mock_scope): - middleware, mock_app = mock_k8s_middleware - mock_receive = AsyncMock() - mock_send = AsyncMock() - - with patch("kubernetes.client.ApiClient") as mock_api_client: - mock_client = AsyncMock() - mock_api_client.return_value = mock_client - - # Mock successful token validation - mock_client.set_default_header = AsyncMock() - - # Mock token payload with access attributes - mock_token_parts = ["header", "eyJzdWIiOiJhZG1pbiIsImdyb3VwcyI6WyJtbC10ZWFtIl19", "signature"] - mock_scope["headers"][1] = (b"authorization", f"Bearer {'.'.join(mock_token_parts)}".encode()) - - await middleware(mock_scope, mock_receive, mock_send) - - assert "user_attributes" in mock_scope - assert mock_scope["user_attributes"]["roles"] == ["admin"] - assert mock_scope["user_attributes"]["teams"] == ["ml-team"] - - mock_app.assert_called_once_with(mock_scope, mock_receive, mock_send) - - -@pytest.mark.asyncio -async def test_k8s_middleware_no_attributes(mock_k8s_middleware, mock_scope): - """Test middleware behavior with no access attributes""" - middleware, mock_app = mock_k8s_middleware - mock_receive = AsyncMock() - mock_send = AsyncMock() - - with patch("kubernetes.client.ApiClient") as mock_api_client: - mock_client = AsyncMock() - mock_api_client.return_value = mock_client - - # Mock successful token validation - mock_client.set_default_header = AsyncMock() - - # Mock token payload without access attributes - mock_token_parts = ["header", "eyJzdWIiOiJhZG1pbiJ9", "signature"] - mock_scope["headers"][1] = (b"authorization", f"Bearer {'.'.join(mock_token_parts)}".encode()) - - await middleware(mock_scope, mock_receive, mock_send) - - assert "user_attributes" in mock_scope - attributes = mock_scope["user_attributes"] - assert "roles" in attributes - assert attributes["roles"] == ["admin"] - - mock_app.assert_called_once_with(mock_scope, mock_receive, mock_send) - - # oauth2 token provider tests @pytest.fixture def oauth2_app(): app = FastAPI() - auth_config = AuthProviderConfig( + auth_config = AuthenticationConfig( provider_type=AuthProviderType.OAUTH2_TOKEN, config={ - "jwks_uri": "http://mock-authz-service/token/introspect", - "cache_ttl": "3600", + "jwks": { + "uri": "http://mock-authz-service/token/introspect", + "key_recheck_period": "3600", + }, "audience": "llama-stack", }, ) @@ -517,3 +414,159 @@ def test_get_attributes_from_claims(): # TODO: add more tests for oauth2 token provider + + +# oauth token introspection tests +@pytest.fixture +def mock_introspection_endpoint(): + return "http://mock-authz-service/token/introspect" + + +@pytest.fixture +def introspection_app(mock_introspection_endpoint): + app = FastAPI() + auth_config = AuthenticationConfig( + provider_type=AuthProviderType.OAUTH2_TOKEN, + config={ + "jwks": None, + "introspection": {"url": mock_introspection_endpoint, "client_id": "myclient", "client_secret": "abcdefg"}, + }, + ) + app.add_middleware(AuthenticationMiddleware, auth_config=auth_config) + + @app.get("/test") + def test_endpoint(): + return {"message": "Authentication successful"} + + return app + + +@pytest.fixture +def introspection_app_with_custom_mapping(mock_introspection_endpoint): + app = FastAPI() + auth_config = AuthenticationConfig( + provider_type=AuthProviderType.OAUTH2_TOKEN, + config={ + "jwks": None, + "introspection": { + "url": mock_introspection_endpoint, + "client_id": "myclient", + "client_secret": "abcdefg", + "send_secret_in_body": "true", + }, + "claims_mapping": { + "sub": "roles", + "scope": "roles", + "groups": "teams", + "aud": "namespaces", + }, + }, + ) + app.add_middleware(AuthenticationMiddleware, auth_config=auth_config) + + @app.get("/test") + def test_endpoint(): + return {"message": "Authentication successful"} + + return app + + +@pytest.fixture +def introspection_client(introspection_app): + return TestClient(introspection_app) + + +@pytest.fixture +def introspection_client_with_custom_mapping(introspection_app_with_custom_mapping): + return TestClient(introspection_app_with_custom_mapping) + + +def test_missing_auth_header_introspection(introspection_client): + response = introspection_client.get("/test") + assert response.status_code == 401 + assert "Missing or invalid Authorization header" in response.json()["error"]["message"] + + +def test_invalid_auth_header_format_introspection(introspection_client): + response = introspection_client.get("/test", headers={"Authorization": "InvalidFormat token123"}) + assert response.status_code == 401 + assert "Missing or invalid Authorization header" in response.json()["error"]["message"] + + +async def mock_introspection_active(*args, **kwargs): + return MockResponse( + 200, + { + "active": True, + "sub": "my-user", + "groups": ["group1", "group2"], + "scope": "foo bar", + "aud": ["set1", "set2"], + }, + ) + + +async def mock_introspection_inactive(*args, **kwargs): + return MockResponse( + 200, + { + "active": False, + }, + ) + + +async def mock_introspection_invalid(*args, **kwargs): + class InvalidResponse: + def __init__(self, status_code): + self.status_code = status_code + + def json(self): + raise ValueError("Not JSON") + + return InvalidResponse(200) + + +async def mock_introspection_failed(*args, **kwargs): + return MockResponse( + 500, + {}, + ) + + +@patch("httpx.AsyncClient.post", new=mock_introspection_active) +def test_valid_introspection_authentication(introspection_client, valid_api_key): + response = introspection_client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"}) + assert response.status_code == 200 + assert response.json() == {"message": "Authentication successful"} + + +@patch("httpx.AsyncClient.post", new=mock_introspection_inactive) +def test_inactive_introspection_authentication(introspection_client, invalid_api_key): + response = introspection_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"}) + assert response.status_code == 401 + assert "Token not active" in response.json()["error"]["message"] + + +@patch("httpx.AsyncClient.post", new=mock_introspection_invalid) +def test_invalid_introspection_authentication(introspection_client, invalid_api_key): + response = introspection_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"}) + assert response.status_code == 401 + assert "Not JSON" in response.json()["error"]["message"] + + +@patch("httpx.AsyncClient.post", new=mock_introspection_failed) +def test_failed_introspection_authentication(introspection_client, invalid_api_key): + response = introspection_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"}) + assert response.status_code == 401 + assert "Token introspection failed: 500" in response.json()["error"]["message"] + + +@patch("httpx.AsyncClient.post", new=mock_introspection_active) +def test_valid_introspection_with_custom_mapping_authentication( + introspection_client_with_custom_mapping, valid_api_key +): + response = introspection_client_with_custom_mapping.get( + "/test", headers={"Authorization": f"Bearer {valid_api_key}"} + ) + assert response.status_code == 200 + assert response.json() == {"message": "Authentication successful"} diff --git a/tests/unit/server/test_quota.py b/tests/unit/server/test_quota.py new file mode 100644 index 000000000..763bf8e94 --- /dev/null +++ b/tests/unit/server/test_quota.py @@ -0,0 +1,127 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import pytest +from fastapi import FastAPI, Request +from fastapi.testclient import TestClient +from starlette.middleware.base import BaseHTTPMiddleware + +from llama_stack.distribution.datatypes import QuotaConfig, QuotaPeriod +from llama_stack.distribution.server.quota import QuotaMiddleware +from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig + + +class InjectClientIDMiddleware(BaseHTTPMiddleware): + """ + Middleware that injects 'authenticated_client_id' to mimic AuthenticationMiddleware. + """ + + def __init__(self, app, client_id="client1"): + super().__init__(app) + self.client_id = client_id + + async def dispatch(self, request: Request, call_next): + request.scope["authenticated_client_id"] = self.client_id + return await call_next(request) + + +def build_quota_config(db_path) -> QuotaConfig: + return QuotaConfig( + kvstore=SqliteKVStoreConfig(db_path=str(db_path)), + anonymous_max_requests=1, + authenticated_max_requests=2, + period=QuotaPeriod.DAY, + ) + + +@pytest.fixture +def auth_app(tmp_path, request): + """ + FastAPI app with InjectClientIDMiddleware and QuotaMiddleware for authenticated testing. + Each test gets its own DB file. + """ + inner_app = FastAPI() + + @inner_app.get("/test") + async def test_endpoint(): + return {"message": "ok"} + + db_path = tmp_path / f"quota_{request.node.name}.db" + quota = build_quota_config(db_path) + + app = InjectClientIDMiddleware( + QuotaMiddleware( + inner_app, + kv_config=quota.kvstore, + anonymous_max_requests=quota.anonymous_max_requests, + authenticated_max_requests=quota.authenticated_max_requests, + window_seconds=86400, + ), + client_id=f"client_{request.node.name}", + ) + return app + + +def test_authenticated_quota_allows_up_to_limit(auth_app): + client = TestClient(auth_app) + assert client.get("/test").status_code == 200 + assert client.get("/test").status_code == 200 + + +def test_authenticated_quota_blocks_after_limit(auth_app): + client = TestClient(auth_app) + client.get("/test") + client.get("/test") + resp = client.get("/test") + assert resp.status_code == 429 + assert resp.json()["error"]["message"] == "Quota exceeded" + + +def test_anonymous_quota_allows_up_to_limit(tmp_path, request): + inner_app = FastAPI() + + @inner_app.get("/test") + async def test_endpoint(): + return {"message": "ok"} + + db_path = tmp_path / f"quota_anon_{request.node.name}.db" + quota = build_quota_config(db_path) + + app = QuotaMiddleware( + inner_app, + kv_config=quota.kvstore, + anonymous_max_requests=quota.anonymous_max_requests, + authenticated_max_requests=quota.authenticated_max_requests, + window_seconds=86400, + ) + + client = TestClient(app) + assert client.get("/test").status_code == 200 + + +def test_anonymous_quota_blocks_after_limit(tmp_path, request): + inner_app = FastAPI() + + @inner_app.get("/test") + async def test_endpoint(): + return {"message": "ok"} + + db_path = tmp_path / f"quota_anon_{request.node.name}.db" + quota = build_quota_config(db_path) + + app = QuotaMiddleware( + inner_app, + kv_config=quota.kvstore, + anonymous_max_requests=quota.anonymous_max_requests, + authenticated_max_requests=quota.authenticated_max_requests, + window_seconds=86400, + ) + + client = TestClient(app) + client.get("/test") + resp = client.get("/test") + assert resp.status_code == 429 + assert resp.json()["error"]["message"] == "Quota exceeded" diff --git a/uv.lock b/uv.lock index c30e2c4c1..6d091193b 100644 --- a/uv.lock +++ b/uv.lock @@ -628,6 +628,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d5/50/83c593b07763e1161326b3b8c6686f0f4b0f24d5526546bee538c89837d6/decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186", size = 9073 }, ] +[[package]] +name = "deepmerge" +version = "2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a8/3a/b0ba594708f1ad0bc735884b3ad854d3ca3bdc1d741e56e40bbda6263499/deepmerge-2.0.tar.gz", hash = "sha256:5c3d86081fbebd04dd5de03626a0607b809a98fb6ccba5770b62466fe940ff20", size = 19890 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2d/82/e5d2c1c67d19841e9edc74954c827444ae826978499bde3dfc1d007c8c11/deepmerge-2.0-py3-none-any.whl", hash = "sha256:6de9ce507115cff0bed95ff0ce9ecc31088ef50cbdf09bc90a09349a318b3d00", size = 13475 }, +] + [[package]] name = "deprecated" version = "1.2.18" @@ -676,15 +685,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, ] -[[package]] -name = "durationpy" -version = "0.9" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/31/e9/f49c4e7fccb77fa5c43c2480e09a857a78b41e7331a75e128ed5df45c56b/durationpy-0.9.tar.gz", hash = "sha256:fd3feb0a69a0057d582ef643c355c40d2fa1c942191f914d12203b1a01ac722a", size = 3186 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4c/a3/ac312faeceffd2d8f86bc6dcb5c401188ba5a01bc88e69bed97578a0dfcd/durationpy-0.9-py3-none-any.whl", hash = "sha256:e65359a7af5cedad07fb77a2dd3f390f8eb0b74cb845589fa6c057086834dd38", size = 3461 }, -] - [[package]] name = "ecdsa" version = "0.19.1" @@ -863,20 +863,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599 }, ] -[[package]] -name = "google-auth" -version = "2.38.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cachetools" }, - { name = "pyasn1-modules" }, - { name = "rsa" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c6/eb/d504ba1daf190af6b204a9d4714d457462b486043744901a6eeea711f913/google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4", size = 270866 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9d/47/603554949a37bca5b7f894d51896a9c534b9eab808e2520a748e081669d0/google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a", size = 210770 }, -] - [[package]] name = "googleapis-common-protos" version = "1.67.0" @@ -1324,28 +1310,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c9/fb/108ecd1fe961941959ad0ee4e12ee7b8b1477247f30b1fdfd83ceaf017f0/jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409", size = 28965 }, ] -[[package]] -name = "kubernetes" -version = "32.0.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "certifi" }, - { name = "durationpy" }, - { name = "google-auth" }, - { name = "oauthlib" }, - { name = "python-dateutil" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "requests-oauthlib" }, - { name = "six" }, - { name = "urllib3" }, - { name = "websocket-client" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b7/e8/0598f0e8b4af37cd9b10d8b87386cf3173cb8045d834ab5f6ec347a758b3/kubernetes-32.0.1.tar.gz", hash = "sha256:42f43d49abd437ada79a79a16bd48a604d3471a117a8347e87db693f2ba0ba28", size = 946691 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/08/10/9f8af3e6f569685ce3af7faab51c8dd9d93b9c38eba339ca31c746119447/kubernetes-32.0.1-py2.py3-none-any.whl", hash = "sha256:35282ab8493b938b08ab5526c7ce66588232df00ef5e1dbe88a419107dc10998", size = 1988070 }, -] - [[package]] name = "levenshtein" version = "0.27.1" @@ -1429,6 +1393,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/1e/408fd10217eac0e43aea0604be22b4851a09e03d761d44d4ea12089dd70e/levenshtein-0.27.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:7987ef006a3cf56a4532bd4c90c2d3b7b4ca9ad3bf8ae1ee5713c4a3bdfda913", size = 98045 }, ] +[[package]] +name = "linkify" +version = "1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/c6/246100fa3967074d9725b3716913bd495823547bde5047050d4c3462f994/linkify-1.4.tar.gz", hash = "sha256:9ba276ba179525f7262820d90f009604e51cd4f1466c1112b882ef7eda243d5e", size = 1749 } + [[package]] name = "llama-stack" version = "0.2.7" @@ -1441,7 +1411,6 @@ dependencies = [ { name = "huggingface-hub" }, { name = "jinja2" }, { name = "jsonschema" }, - { name = "kubernetes" }, { name = "llama-stack-client" }, { name = "openai" }, { name = "pillow" }, @@ -1480,7 +1449,9 @@ dev = [ { name = "uvicorn" }, ] docs = [ + { name = "linkify" }, { name = "myst-parser" }, + { name = "sphinx" }, { name = "sphinx-autobuild" }, { name = "sphinx-copybutton" }, { name = "sphinx-design" }, @@ -1488,6 +1459,7 @@ docs = [ { name = "sphinx-rtd-theme" }, { name = "sphinx-tabs" }, { name = "sphinxcontrib-mermaid" }, + { name = "sphinxcontrib-openapi" }, { name = "sphinxcontrib-redoc" }, { name = "sphinxcontrib-video" }, { name = "tomli" }, @@ -1546,7 +1518,7 @@ requires-dist = [ { name = "jinja2", specifier = ">=3.1.6" }, { name = "jinja2", marker = "extra == 'codegen'", specifier = ">=3.1.6" }, { name = "jsonschema" }, - { name = "kubernetes" }, + { name = "linkify", marker = "extra == 'docs'" }, { name = "llama-stack-client", specifier = ">=0.2.7" }, { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.7" }, { name = "mcp", marker = "extra == 'test'" }, @@ -1581,6 +1553,7 @@ requires-dist = [ { name = "ruamel-yaml", marker = "extra == 'dev'" }, { name = "ruff", marker = "extra == 'dev'" }, { name = "setuptools" }, + { name = "sphinx", marker = "extra == 'docs'" }, { name = "sphinx-autobuild", marker = "extra == 'docs'" }, { name = "sphinx-copybutton", marker = "extra == 'docs'" }, { name = "sphinx-design", marker = "extra == 'docs'" }, @@ -1588,6 +1561,7 @@ requires-dist = [ { name = "sphinx-rtd-theme", marker = "extra == 'docs'" }, { name = "sphinx-tabs", marker = "extra == 'docs'" }, { name = "sphinxcontrib-mermaid", marker = "extra == 'docs'" }, + { name = "sphinxcontrib-openapi", marker = "extra == 'docs'" }, { name = "sphinxcontrib-redoc", marker = "extra == 'docs'" }, { name = "sphinxcontrib-video", marker = "extra == 'docs'" }, { name = "sqlite-vec", marker = "extra == 'unit'" }, @@ -1624,9 +1598,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cd/6b/31c07396c5b3010668e4eb38061a96ffacb47ec4b14d8aeb64c13856c485/llama_stack_client-0.2.7.tar.gz", hash = "sha256:11aee11fdd5e0e8caad07c0cce9c4d88640938844372e7e3453a91ea0757fcb3", size = 259273, upload-time = "2025-05-16T20:31:39.221Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/6b/31c07396c5b3010668e4eb38061a96ffacb47ec4b14d8aeb64c13856c485/llama_stack_client-0.2.7.tar.gz", hash = "sha256:11aee11fdd5e0e8caad07c0cce9c4d88640938844372e7e3453a91ea0757fcb3", size = 259273 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ac/69/6a5f4683afe355500df4376fdcbfb2fc1e6a0c3bcea5ff8f6114773a9acf/llama_stack_client-0.2.7-py3-none-any.whl", hash = "sha256:78b3f2abdb1770c7b1270a9c0ef58402a988401c564d2e6c83588779ac6fc38d", size = 292727, upload-time = "2025-05-16T20:31:37.587Z" }, + { url = "https://files.pythonhosted.org/packages/ac/69/6a5f4683afe355500df4376fdcbfb2fc1e6a0c3bcea5ff8f6114773a9acf/llama_stack_client-0.2.7-py3-none-any.whl", hash = "sha256:78b3f2abdb1770c7b1270a9c0ef58402a988401c564d2e6c83588779ac6fc38d", size = 292727 }, ] [[package]] @@ -1833,6 +1807,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, ] +[[package]] +name = "mistune" +version = "3.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c4/79/bda47f7dd7c3c55770478d6d02c9960c430b0cf1773b72366ff89126ea31/mistune-3.1.3.tar.gz", hash = "sha256:a7035c21782b2becb6be62f8f25d3df81ccb4d6fa477a6525b15af06539f02a0", size = 94347 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/4d/23c4e4f09da849e127e9f123241946c23c1e30f45a88366879e064211815/mistune-3.1.3-py3-none-any.whl", hash = "sha256:1a32314113cff28aa6432e99e522677c8587fd83e3d51c29b82a52409c842bd9", size = 53410 }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -2087,15 +2073,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/17/7f/d322a4125405920401450118dbdc52e0384026bd669939484670ce8b2ab9/numpy-2.2.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:783145835458e60fa97afac25d511d00a1eca94d4a8f3ace9fe2043003c678e4", size = 12839607 }, ] -[[package]] -name = "oauthlib" -version = "3.2.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6d/fa/fbf4001037904031639e6bfbfc02badfc7e12f137a8afa254df6c4c8a670/oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918", size = 177352 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/80/cab10959dc1faead58dc8384a781dfbf93cb4d33d50988f7a69f1b7c9bbe/oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", size = 151688 }, -] - [[package]] name = "openai" version = "1.71.0" @@ -2284,6 +2261,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772 }, ] +[[package]] +name = "picobox" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/b1/830714dd6778c1cb45826722b4e9bd21c94b33cca5df9ef2cc0b80c81b25/picobox-4.0.0.tar.gz", hash = "sha256:114da1b5606b2f615e8b0eb68d04198ad9de75af5adbcf5b36fe4f664ab927b6", size = 22666 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2d/c6/fd64ffd75d47c4fcf6c65808cc5c5c75e5d4357c197d3741ee1339e91257/picobox-4.0.0-py3-none-any.whl", hash = "sha256:4c27eb689fe45dabd9e64c382e04418147d0b746d155b4e80057dbb7ff82027e", size = 11641 }, +] + [[package]] name = "pillow" version = "11.1.0" @@ -2608,18 +2594,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/1e/a94a8d635fa3ce4cfc7f506003548d0a2447ae76fd5ca53932970fe3053f/pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", size = 77145 }, ] -[[package]] -name = "pyasn1-modules" -version = "0.4.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyasn1" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1d/67/6afbf0d507f73c32d21084a79946bfcfca5fbc62a72057e9c23797a737c9/pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c", size = 310028 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/77/89/bc88a6711935ba795a679ea6ebee07e128050d6382eaa35a0a47c8032bdc/pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd", size = 181537 }, -] - [[package]] name = "pycparser" version = "2.22" @@ -2875,9 +2849,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ac/82/4c9ecabab13363e72d880f2fb504c5f750433b2b6f16e99f4ec21ada284c/pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a", size = 17973, upload-time = "2025-05-05T19:44:34.99Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ac/82/4c9ecabab13363e72d880f2fb504c5f750433b2b6f16e99f4ec21ada284c/pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a", size = 17973 } wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382, upload-time = "2025-05-05T19:44:33.502Z" }, + { url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382 }, ] [[package]] @@ -3256,19 +3230,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, ] -[[package]] -name = "requests-oauthlib" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "oauthlib" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179 }, -] - [[package]] name = "rich" version = "13.9.4" @@ -3597,6 +3558,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/43/65c0acbd8cc6f50195a3a1fc195c404988b15c67090e73c7a41a9f57d6bd/sphinx_design-0.6.1-py3-none-any.whl", hash = "sha256:b11f37db1a802a183d61b159d9a202314d4d2fe29c163437001324fe2f19549c", size = 2215338 }, ] +[[package]] +name = "sphinx-mdinclude" +version = "0.6.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "docutils" }, + { name = "mistune" }, + { name = "pygments" }, + { name = "sphinx" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b6/a7/c9a7888bb2187fdb06955d71e75f6f266b7e179b356ac76138d160a5b7eb/sphinx_mdinclude-0.6.2.tar.gz", hash = "sha256:447462e82cb8be61404a2204227f920769eb923d2f57608e3325f3bb88286b4c", size = 65257 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/3d/6b41fe1637cd53c4b10d56e0e6f396546f837973dabf9c4b2a1de44620ac/sphinx_mdinclude-0.6.2-py3-none-any.whl", hash = "sha256:648e78edb067c0e4bffc22943278d49d54a0714494743592032fa3ad82a86984", size = 16911 }, +] + [[package]] name = "sphinx-rtd-dark-mode" version = "1.3.0" @@ -3664,6 +3640,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705 }, ] +[[package]] +name = "sphinxcontrib-httpdomain" +version = "1.8.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, + { name = "sphinx" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/be/ef/82d3cfafb7febce4f7df8dcf3cde9d072350b41066e05a4f559b4e9105d0/sphinxcontrib-httpdomain-1.8.1.tar.gz", hash = "sha256:6c2dfe6ca282d75f66df333869bb0ce7331c01b475db6809ff9d107b7cdfe04b", size = 19266 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/49/aad47b8cf27a0d7703f1311aad8c368bb22866ddee1a2d2cd3f69bc45e0c/sphinxcontrib_httpdomain-1.8.1-py2.py3-none-any.whl", hash = "sha256:21eefe1270e4d9de8d717cc89ee92cc4871b8736774393bafc5e38a6bb77b1d5", size = 25513 }, +] + [[package]] name = "sphinxcontrib-jquery" version = "4.1" @@ -3698,6 +3687,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cd/c8/784b9ac6ea08aa594c1a4becbd0dbe77186785362e31fd633b8c6ae0197a/sphinxcontrib_mermaid-1.0.0-py3-none-any.whl", hash = "sha256:60b72710ea02087f212028feb09711225fbc2e343a10d34822fe787510e1caa3", size = 9597 }, ] +[[package]] +name = "sphinxcontrib-openapi" +version = "0.8.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deepmerge" }, + { name = "jsonschema" }, + { name = "picobox" }, + { name = "pyyaml" }, + { name = "sphinx" }, + { name = "sphinx-mdinclude" }, + { name = "sphinxcontrib-httpdomain" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/a7/66a5c9aba7dbbb0c2b050f60e71402818cbf5f127ace13ed971029cc745e/sphinxcontrib-openapi-0.8.4.tar.gz", hash = "sha256:df883808a5b5e4b4113ad697185c43a3f42df3dce70453af78ba7076907e9a20", size = 71848 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/c3/ee00486f38d78309a60ee0d6031b2545b22ac5f0007d841dd174abc68774/sphinxcontrib_openapi-0.8.4-py3-none-any.whl", hash = "sha256:50911c18d452d9390ee3a384ef8dc8bde6135f542ba55691f81e1fbc0b71014e", size = 34510 }, +] + [[package]] name = "sphinxcontrib-qthelp" version = "2.0.0" @@ -4323,15 +4330,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 }, ] -[[package]] -name = "websocket-client" -version = "1.8.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf5948ed3f4681f7da2f9f64512e1d303f94b4cc174c24a5/websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da", size = 54648 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 }, -] - [[package]] name = "websockets" version = "15.0"