Merge branch 'main' into feat/sambanova-safety

This commit is contained in:
Jorge Piedrahita Ortiz 2025-05-21 11:32:42 -05:00 committed by GitHub
commit e12df4293b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
26 changed files with 1094 additions and 494 deletions

22
.github/actions/setup-runner/action.yml vendored Normal file
View file

@ -0,0 +1,22 @@
name: Setup runner
description: Prepare a runner for the tests (install uv, python, project dependencies, etc.)
runs:
using: "composite"
steps:
- name: Install uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
python-version: "3.10"
activate-environment: true
version: 0.7.6
- name: Install dependencies
shell: bash
run: |
uv sync --all-extras
uv pip install ollama faiss-cpu
# always test against the latest version of the client
# TODO: this is not necessarily a good idea. we need to test against both published and latest
# to find out backwards compatibility issues.
uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main
uv pip install -e .

View file

@ -23,23 +23,18 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
auth-provider: [kubernetes]
auth-provider: [oauth2_token]
fail-fast: false # we want to run all tests regardless of failure
steps:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
python-version: "3.10"
activate-environment: true
- name: Install dependencies
uses: ./.github/actions/setup-runner
- name: Set Up Environment and Install Dependencies
- name: Build Llama Stack
run: |
uv sync --extra dev --extra test
uv pip install -e .
llama stack build --template ollama --image-type venv
- name: Install minikube
@ -47,29 +42,53 @@ jobs:
uses: medyagh/setup-minikube@cea33675329b799adccc9526aa5daccc26cd5052 # v0.0.19
- name: Start minikube
if: ${{ matrix.auth-provider == 'kubernetes' }}
if: ${{ matrix.auth-provider == 'oauth2_token' }}
run: |
minikube start
kubectl get pods -A
- name: Configure Kube Auth
if: ${{ matrix.auth-provider == 'kubernetes' }}
if: ${{ matrix.auth-provider == 'oauth2_token' }}
run: |
kubectl create namespace llama-stack
kubectl create serviceaccount llama-stack-auth -n llama-stack
kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --serviceaccount=llama-stack:llama-stack-auth -n llama-stack
kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
cat <<EOF | kubectl apply -f -
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: allow-anonymous-openid
rules:
- nonResourceURLs: ["/openid/v1/jwks"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: allow-anonymous-openid
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: allow-anonymous-openid
subjects:
- kind: User
name: system:anonymous
apiGroup: rbac.authorization.k8s.io
EOF
- name: Set Kubernetes Config
if: ${{ matrix.auth-provider == 'kubernetes' }}
if: ${{ matrix.auth-provider == 'oauth2_token' }}
run: |
echo "KUBERNETES_API_SERVER_URL=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}')" >> $GITHUB_ENV
echo "KUBERNETES_API_SERVER_URL=$(kubectl get --raw /.well-known/openid-configuration| jq -r .jwks_uri)" >> $GITHUB_ENV
echo "KUBERNETES_CA_CERT_PATH=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}')" >> $GITHUB_ENV
echo "KUBERNETES_ISSUER=$(kubectl get --raw /.well-known/openid-configuration| jq -r .issuer)" >> $GITHUB_ENV
echo "KUBERNETES_AUDIENCE=$(kubectl create token default --duration=1h | cut -d. -f2 | base64 -d | jq -r '.aud[0]')" >> $GITHUB_ENV
- name: Set Kube Auth Config and run server
env:
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
if: ${{ matrix.auth-provider == 'kubernetes' }}
if: ${{ matrix.auth-provider == 'oauth2_token' }}
run: |
run_dir=$(mktemp -d)
cat <<'EOF' > $run_dir/run.yaml
@ -81,10 +100,10 @@ jobs:
port: 8321
EOF
yq eval '.server.auth = {"provider_type": "${{ matrix.auth-provider }}"}' -i $run_dir/run.yaml
yq eval '.server.auth.config = {"api_server_url": "${{ env.KUBERNETES_API_SERVER_URL }}", "ca_cert_path": "${{ env.KUBERNETES_CA_CERT_PATH }}"}' -i $run_dir/run.yaml
yq eval '.server.auth.config = {"tls_cafile": "${{ env.KUBERNETES_CA_CERT_PATH }}", "issuer": "${{ env.KUBERNETES_ISSUER }}", "audience": "${{ env.KUBERNETES_AUDIENCE }}"}' -i $run_dir/run.yaml
yq eval '.server.auth.config.jwks = {"uri": "${{ env.KUBERNETES_API_SERVER_URL }}"}' -i $run_dir/run.yaml
cat $run_dir/run.yaml
source .venv/bin/activate
nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 &
- name: Wait for Llama Stack server to be ready

View file

@ -32,24 +32,14 @@ jobs:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
python-version: "3.10"
activate-environment: true
- name: Install dependencies
uses: ./.github/actions/setup-runner
- name: Setup ollama
uses: ./.github/actions/setup-ollama
- name: Set Up Environment and Install Dependencies
- name: Build Llama Stack
run: |
uv sync --extra dev --extra test
uv pip install ollama faiss-cpu
# always test against the latest version of the client
# TODO: this is not necessarily a good idea. we need to test against both published and latest
# to find out backwards compatibility issues.
uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main
uv pip install -e .
llama stack build --template ollama --image-type venv
- name: Start Llama Stack server in background
@ -57,7 +47,6 @@ jobs:
env:
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
run: |
source .venv/bin/activate
LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv &
- name: Wait for Llama Stack server to be ready
@ -85,6 +74,7 @@ jobs:
echo "Ollama health check failed"
exit 1
fi
- name: Check Storage and Memory Available Before Tests
if: ${{ always() }}
run: |

View file

@ -50,21 +50,8 @@ jobs:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: '3.10'
- name: Install uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
python-version: "3.10"
- name: Install LlamaStack
run: |
uv venv
source .venv/bin/activate
uv pip install -e .
- name: Install dependencies
uses: ./.github/actions/setup-runner
- name: Print build dependencies
run: |
@ -79,7 +66,6 @@ jobs:
- name: Print dependencies in the image
if: matrix.image-type == 'venv'
run: |
source test/bin/activate
uv pip list
build-single-provider:
@ -88,21 +74,8 @@ jobs:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: '3.10'
- name: Install uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
python-version: "3.10"
- name: Install LlamaStack
run: |
uv venv
source .venv/bin/activate
uv pip install -e .
- name: Install dependencies
uses: ./.github/actions/setup-runner
- name: Build a single provider
run: |
@ -114,21 +87,8 @@ jobs:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: '3.10'
- name: Install uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
python-version: "3.10"
- name: Install LlamaStack
run: |
uv venv
source .venv/bin/activate
uv pip install -e .
- name: Install dependencies
uses: ./.github/actions/setup-runner
- name: Build a single provider
run: |
@ -152,21 +112,8 @@ jobs:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: '3.10'
- name: Install uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
python-version: "3.10"
- name: Install LlamaStack
run: |
uv venv
source .venv/bin/activate
uv pip install -e .
- name: Install dependencies
uses: ./.github/actions/setup-runner
- name: Pin template to UBI9 base
run: |

View file

@ -25,15 +25,8 @@ jobs:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Install uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
python-version: "3.10"
- name: Set Up Environment and Install Dependencies
run: |
uv sync --extra dev --extra test
uv pip install -e .
- name: Install dependencies
uses: ./.github/actions/setup-runner
- name: Apply image type to config file
run: |
@ -59,7 +52,6 @@ jobs:
env:
INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct"
run: |
source ci-test/bin/activate
uv run pip list
nohup uv run --active llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 &

View file

@ -30,17 +30,11 @@ jobs:
- "3.12"
- "3.13"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: ${{ matrix.python }}
- uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
python-version: ${{ matrix.python }}
enable-cache: false
- name: Install dependencies
uses: ./.github/actions/setup-runner
- name: Run unit tests
run: |

View file

@ -37,16 +37,8 @@ jobs:
- name: Checkout repository
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: Set up Python
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
with:
python-version: '3.11'
- name: Install the latest version of uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
- name: Sync with uv
run: uv sync --extra docs
- name: Install dependencies
uses: ./.github/actions/setup-runner
- name: Build HTML
run: |

View file

@ -167,14 +167,11 @@ If you have made changes to a provider's configuration in any form (introducing
If you are making changes to the documentation at [https://llama-stack.readthedocs.io/en/latest/](https://llama-stack.readthedocs.io/en/latest/), you can use the following command to build the documentation and preview your changes. You will need [Sphinx](https://www.sphinx-doc.org/en/master/) and the readthedocs theme.
```bash
cd docs
uv sync --extra docs
# This rebuilds the documentation pages.
uv run make html
uv run --with ".[docs]" make -C docs/ html
# This will start a local server (usually at http://127.0.0.1:8000) that automatically rebuilds and refreshes when you make changes to the documentation.
uv run sphinx-autobuild source build/html --write-all
uv run --with ".[docs]" sphinx-autobuild docs/source docs/build/html --write-all
```
### Update API Documentation

View file

@ -3,10 +3,10 @@
Here's a collection of comprehensive guides, examples, and resources for building AI applications with Llama Stack. For the complete documentation, visit our [ReadTheDocs page](https://llama-stack.readthedocs.io/en/latest/index.html).
## Render locally
From the llama-stack root directory, run the following command to render the docs locally:
```bash
pip install -r requirements.txt
cd docs
python -m sphinx_autobuild source _build
uv run --with ".[docs]" sphinx-autobuild docs/source docs/build/html --write-all
```
You can open up the docs in your browser at http://localhost:8000

View file

@ -1,16 +0,0 @@
linkify
myst-parser
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
sphinx==8.1.3
sphinx-copybutton
sphinx-design
sphinx-pdj-theme
sphinx-rtd-theme>=1.0.0
sphinx-tabs
sphinx_autobuild
sphinx_rtd_dark_mode
sphinxcontrib-mermaid
sphinxcontrib-openapi
sphinxcontrib-redoc
sphinxcontrib-video
tomli

View file

@ -53,14 +53,6 @@ myst_enable_extensions = ["colon_fence"]
html_theme = "sphinx_rtd_theme"
html_use_relative_paths = True
# html_theme = "sphinx_pdj_theme"
# html_theme_path = [sphinx_pdj_theme.get_html_theme_path()]
# html_theme = "pytorch_sphinx_theme"
# html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]
templates_path = ["_templates"]
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]

View file

@ -338,6 +338,48 @@ INFO: Application startup complete.
INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit)
INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK
```
### Listing Distributions
Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files.
```
llama stack list -h
usage: llama stack list [-h]
list the build stacks
options:
-h, --help show this help message and exit
```
Example Usage
```
llama stack list
```
### Removing a Distribution
Use the remove command to delete a distribution you've previously built.
```
llama stack rm -h
usage: llama stack rm [-h] [--all] [name]
Remove the build stack
positional arguments:
name Name of the stack to delete (default: None)
options:
-h, --help show this help message and exit
--all, -a Delete all stacks (use with caution) (default: False)
```
Example
```
llama stack rm llamastack-test
```
To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm <name>` to delete them when theyre no longer needed.
### Troubleshooting

View file

@ -118,11 +118,6 @@ server:
port: 8321 # Port to listen on (default: 8321)
tls_certfile: "/path/to/cert.pem" # Optional: Path to TLS certificate for HTTPS
tls_keyfile: "/path/to/key.pem" # Optional: Path to TLS key for HTTPS
auth: # Optional: Authentication configuration
provider_type: "kubernetes" # Type of auth provider
config: # Provider-specific configuration
api_server_url: "https://kubernetes.default.svc"
ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate
```
### Authentication Configuration
@ -135,7 +130,7 @@ Authorization: Bearer <token>
The server supports multiple authentication providers:
#### Kubernetes Provider
#### OAuth 2.0/OpenID Connect Provider with Kubernetes
The Kubernetes cluster must be configured to use a service account for authentication.
@ -146,14 +141,67 @@ kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --se
kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token
```
Validates tokens against the Kubernetes API server:
Make sure the `kube-apiserver` runs with `--anonymous-auth=true` to allow unauthenticated requests
and that the correct RoleBinding is created to allow the service account to access the necessary
resources. If that is not the case, you can create a RoleBinding for the service account to access
the necessary resources:
```yaml
# allow-anonymous-openid.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: allow-anonymous-openid
rules:
- nonResourceURLs: ["/openid/v1/jwks"]
verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: allow-anonymous-openid
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: allow-anonymous-openid
subjects:
- kind: User
name: system:anonymous
apiGroup: rbac.authorization.k8s.io
```
And then apply the configuration:
```bash
kubectl apply -f allow-anonymous-openid.yaml
```
Validates tokens against the Kubernetes API server through the OIDC provider:
```yaml
server:
auth:
provider_type: "kubernetes"
provider_type: "oauth2_token"
config:
api_server_url: "https://kubernetes.default.svc" # URL of the Kubernetes API server
ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate
jwks:
uri: "https://kubernetes.default.svc"
key_recheck_period: 3600
tls_cafile: "/path/to/ca.crt"
issuer: "https://kubernetes.default.svc"
audience: "https://kubernetes.default.svc"
```
To find your cluster's audience, run:
```bash
kubectl create token default --duration=1h | cut -d. -f2 | base64 -d | jq .aud
```
For the issuer, you can use the OIDC provider's URL:
```bash
kubectl get --raw /.well-known/openid-configuration| jq .issuer
```
For the tls_cafile, you can use the CA certificate of the OIDC provider:
```bash
kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}'
```
The provider extracts user information from the JWT token:
@ -208,6 +256,80 @@ And must respond with:
If no access attributes are returned, the token is used as a namespace.
### Quota Configuration
The `quota` section allows you to enable server-side request throttling for both
authenticated and anonymous clients. This is useful for preventing abuse, enforcing
fairness across tenants, and controlling infrastructure costs without requiring
client-side rate limiting or external proxies.
Quotas are disabled by default. When enabled, each client is tracked using either:
* Their authenticated `client_id` (derived from the Bearer token), or
* Their IP address (fallback for anonymous requests)
Quota state is stored in a SQLite-backed key-value store, and rate limits are applied
within a configurable time window (currently only `day` is supported).
#### Example
```yaml
server:
quota:
kvstore:
type: sqlite
db_path: ./quotas.db
anonymous_max_requests: 100
authenticated_max_requests: 1000
period: day
```
#### Configuration Options
| Field | Description |
| ---------------------------- | -------------------------------------------------------------------------- |
| `kvstore` | Required. Backend storage config for tracking request counts. |
| `kvstore.type` | Must be `"sqlite"` for now. Other backends may be supported in the future. |
| `kvstore.db_path` | File path to the SQLite database. |
| `anonymous_max_requests` | Max requests per period for unauthenticated clients. |
| `authenticated_max_requests` | Max requests per period for authenticated clients. |
| `period` | Time window for quota enforcement. Only `"day"` is supported. |
> Note: if `authenticated_max_requests` is set but no authentication provider is
configured, the server will fall back to applying `anonymous_max_requests` to all
clients.
#### Example with Authentication Enabled
```yaml
server:
port: 8321
auth:
provider_type: custom
config:
endpoint: https://auth.example.com/validate
quota:
kvstore:
type: sqlite
db_path: ./quotas.db
anonymous_max_requests: 100
authenticated_max_requests: 1000
period: day
```
If a client exceeds their limit, the server responds with:
```http
HTTP/1.1 429 Too Many Requests
Content-Type: application/json
{
"error": {
"message": "Quota exceeded"
}
}
```
## Extending to handle Safety
Configuring Safety can be a little involved so it is instructive to go through an example.

View file

@ -0,0 +1,56 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
from pathlib import Path
from llama_stack.cli.subcommand import Subcommand
from llama_stack.cli.table import print_table
class StackListBuilds(Subcommand):
"""List built stacks in .llama/distributions directory"""
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"list",
prog="llama stack list",
description="list the build stacks",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._list_stack_command)
def _get_distribution_dirs(self) -> dict[str, Path]:
"""Return a dictionary of distribution names and their paths"""
distributions = {}
dist_dir = Path.home() / ".llama" / "distributions"
if dist_dir.exists():
for stack_dir in dist_dir.iterdir():
if stack_dir.is_dir():
distributions[stack_dir.name] = stack_dir
return distributions
def _list_stack_command(self, args: argparse.Namespace) -> None:
distributions = self._get_distribution_dirs()
if not distributions:
print("No stacks found in ~/.llama/distributions")
return
headers = ["Stack Name", "Path"]
headers.extend(["Build Config", "Run Config"])
rows = []
for name, path in distributions.items():
row = [name, str(path)]
# Check for build and run config files
build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No"
run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No"
row.extend([build_config, run_config])
rows.append(row)
print_table(rows, headers, separate_rows=True)

View file

@ -0,0 +1,116 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import argparse
import shutil
import sys
from pathlib import Path
from termcolor import cprint
from llama_stack.cli.subcommand import Subcommand
from llama_stack.cli.table import print_table
class StackRemove(Subcommand):
"""Remove the build stack"""
def __init__(self, subparsers: argparse._SubParsersAction):
super().__init__()
self.parser = subparsers.add_parser(
"rm",
prog="llama stack rm",
description="Remove the build stack",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
self._add_arguments()
self.parser.set_defaults(func=self._remove_stack_build_command)
def _add_arguments(self) -> None:
self.parser.add_argument(
"name",
type=str,
nargs="?",
help="Name of the stack to delete",
)
self.parser.add_argument(
"--all",
"-a",
action="store_true",
help="Delete all stacks (use with caution)",
)
def _get_distribution_dirs(self) -> dict[str, Path]:
"""Return a dictionary of distribution names and their paths"""
distributions = {}
dist_dir = Path.home() / ".llama" / "distributions"
if dist_dir.exists():
for stack_dir in dist_dir.iterdir():
if stack_dir.is_dir():
distributions[stack_dir.name] = stack_dir
return distributions
def _list_stacks(self) -> None:
"""Display available stacks in a table"""
distributions = self._get_distribution_dirs()
if not distributions:
print("No stacks found in ~/.llama/distributions")
return
headers = ["Stack Name", "Path"]
rows = [[name, str(path)] for name, path in distributions.items()]
print_table(rows, headers, separate_rows=True)
def _remove_stack_build_command(self, args: argparse.Namespace) -> None:
distributions = self._get_distribution_dirs()
if args.all:
confirm = input("Are you sure you want to delete ALL stacks? [yes-i-really-want/N] ").lower()
if confirm != "yes-i-really-want":
print("Deletion cancelled.")
return
for name, path in distributions.items():
try:
shutil.rmtree(path)
print(f"Deleted stack: {name}")
except Exception as e:
cprint(
f"Failed to delete stack {name}: {e}",
color="red",
)
sys.exit(2)
if not args.name:
self._list_stacks()
if not args.name:
return
if args.name not in distributions:
self._list_stacks()
cprint(
f"Stack not found: {args.name}",
color="red",
)
return
stack_path = distributions[args.name]
confirm = input(f"Are you sure you want to delete stack '{args.name}'? [y/N] ").lower()
if confirm != "y":
print("Deletion cancelled.")
return
try:
shutil.rmtree(stack_path)
print(f"Successfully deleted stack: {args.name}")
except Exception as e:
cprint(
f"Failed to delete stack {args.name}: {e}",
color="red",
)
sys.exit(2)

View file

@ -7,12 +7,14 @@
import argparse
from importlib.metadata import version
from llama_stack.cli.stack.list_stacks import StackListBuilds
from llama_stack.cli.stack.utils import print_subcommand_description
from llama_stack.cli.subcommand import Subcommand
from .build import StackBuild
from .list_apis import StackListApis
from .list_providers import StackListProviders
from .remove import StackRemove
from .run import StackRun
@ -41,5 +43,6 @@ class StackParser(Subcommand):
StackListApis.create(subparsers)
StackListProviders.create(subparsers)
StackRun.create(subparsers)
StackRemove.create(subparsers)
StackListBuilds.create(subparsers)
print_subcommand_description(self.parser, subparsers)

View file

@ -25,7 +25,7 @@ from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime
from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
from llama_stack.apis.vector_io import VectorIO
from llama_stack.providers.datatypes import Api, ProviderSpec
from llama_stack.providers.utils.kvstore.config import KVStoreConfig
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
LLAMA_STACK_BUILD_CONFIG_VERSION = "2"
LLAMA_STACK_RUN_CONFIG_VERSION = "2"
@ -220,21 +220,34 @@ class LoggingConfig(BaseModel):
class AuthProviderType(str, Enum):
"""Supported authentication provider types."""
KUBERNETES = "kubernetes"
OAUTH2_TOKEN = "oauth2_token"
CUSTOM = "custom"
class AuthenticationConfig(BaseModel):
provider_type: AuthProviderType = Field(
...,
description="Type of authentication provider (e.g., 'kubernetes', 'custom')",
description="Type of authentication provider",
)
config: dict[str, str] = Field(
config: dict[str, Any] = Field(
...,
description="Provider-specific configuration",
)
class QuotaPeriod(str, Enum):
DAY = "day"
class QuotaConfig(BaseModel):
kvstore: SqliteKVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)")
anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period")
authenticated_max_requests: int = Field(
default=1000, description="Max requests for authenticated clients per period"
)
period: QuotaPeriod = Field(default=QuotaPeriod.DAY, description="Quota period to set")
class ServerConfig(BaseModel):
port: int = Field(
default=8321,
@ -262,6 +275,10 @@ class ServerConfig(BaseModel):
default=None,
description="The host the server should listen on",
)
quota: QuotaConfig | None = Field(
default=None,
description="Per client quota request configuration",
)
class StackRunConfig(BaseModel):

View file

@ -8,7 +8,8 @@ import json
import httpx
from llama_stack.distribution.server.auth_providers import AuthProviderConfig, create_auth_provider
from llama_stack.distribution.datatypes import AuthenticationConfig
from llama_stack.distribution.server.auth_providers import create_auth_provider
from llama_stack.log import get_logger
logger = get_logger(name=__name__, category="auth")
@ -77,7 +78,7 @@ class AuthenticationMiddleware:
access resources that don't have access_attributes defined.
"""
def __init__(self, app, auth_config: AuthProviderConfig):
def __init__(self, app, auth_config: AuthenticationConfig):
self.app = app
self.auth_provider = create_auth_provider(auth_config)
@ -113,6 +114,10 @@ class AuthenticationMiddleware:
"roles": [token],
}
# Store the client ID in the request scope so that downstream middleware (like QuotaMiddleware)
# can identify the requester and enforce per-client rate limits.
scope["authenticated_client_id"] = token
# Store attributes in request scope
scope["user_attributes"] = user_attributes
scope["principal"] = validation_result.principal

View file

@ -4,18 +4,19 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
import ssl
import time
from abc import ABC, abstractmethod
from asyncio import Lock
from enum import Enum
from pathlib import Path
from urllib.parse import parse_qs
import httpx
from jose import jwt
from pydantic import BaseModel, Field, field_validator
from pydantic import BaseModel, Field, field_validator, model_validator
from typing_extensions import Self
from llama_stack.distribution.datatypes import AccessAttributes
from llama_stack.distribution.datatypes import AccessAttributes, AuthenticationConfig, AuthProviderType
from llama_stack.log import get_logger
logger = get_logger(name=__name__, category="auth")
@ -73,21 +74,6 @@ class AuthRequest(BaseModel):
request: AuthRequestContext = Field(description="Context information about the request being authenticated")
class AuthProviderType(str, Enum):
"""Supported authentication provider types."""
KUBERNETES = "kubernetes"
CUSTOM = "custom"
OAUTH2_TOKEN = "oauth2_token"
class AuthProviderConfig(BaseModel):
"""Base configuration for authentication providers."""
provider_type: AuthProviderType = Field(..., description="Type of authentication provider")
config: dict[str, str] = Field(..., description="Provider-specific configuration")
class AuthProvider(ABC):
"""Abstract base class for authentication providers."""
@ -102,83 +88,6 @@ class AuthProvider(ABC):
pass
class KubernetesAuthProviderConfig(BaseModel):
api_server_url: str
ca_cert_path: str | None = None
class KubernetesAuthProvider(AuthProvider):
"""Kubernetes authentication provider that validates tokens against the Kubernetes API server."""
def __init__(self, config: KubernetesAuthProviderConfig):
self.config = config
self._client = None
async def _get_client(self):
"""Get or create a Kubernetes client."""
if self._client is None:
# kubernetes-client has not async support, see:
# https://github.com/kubernetes-client/python/issues/323
from kubernetes import client
from kubernetes.client import ApiClient
# Configure the client
configuration = client.Configuration()
configuration.host = self.config.api_server_url
if self.config.ca_cert_path:
configuration.ssl_ca_cert = self.config.ca_cert_path
configuration.verify_ssl = bool(self.config.ca_cert_path)
# Create API client
self._client = ApiClient(configuration)
return self._client
async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
"""Validate a Kubernetes token and return access attributes."""
try:
client = await self._get_client()
# Set the token in the client
client.set_default_header("Authorization", f"Bearer {token}")
# Make a request to validate the token
# We use the /api endpoint which requires authentication
from kubernetes.client import CoreV1Api
api = CoreV1Api(client)
api.get_api_resources(_request_timeout=3.0) # Set timeout for this specific request
# If we get here, the token is valid
# Extract user info from the token claims
import base64
# Decode the token (without verification since we've already validated it)
token_parts = token.split(".")
payload = json.loads(base64.b64decode(token_parts[1] + "=" * (-len(token_parts[1]) % 4)))
# Extract user information from the token
username = payload.get("sub", "")
groups = payload.get("groups", [])
return TokenValidationResult(
principal=username,
access_attributes=AccessAttributes(
roles=[username], # Use username as a role
teams=groups, # Use Kubernetes groups as teams
),
)
except Exception as e:
logger.exception("Failed to validate Kubernetes token")
raise ValueError("Invalid or expired token") from e
async def close(self):
"""Close the HTTP client."""
if self._client:
self._client.close()
self._client = None
def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> AccessAttributes:
attributes = AccessAttributes()
for claim_key, attribute_key in mapping.items():
@ -198,11 +107,24 @@ def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str])
return attributes
class OAuth2TokenAuthProviderConfig(BaseModel):
class OAuth2JWKSConfig(BaseModel):
# The JWKS URI for collecting public keys
jwks_uri: str
cache_ttl: int = 3600
uri: str
key_recheck_period: int = Field(default=3600, description="The period to recheck the JWKS URI for key updates")
class OAuth2IntrospectionConfig(BaseModel):
url: str
client_id: str
client_secret: str
send_secret_in_body: bool = False
class OAuth2TokenAuthProviderConfig(BaseModel):
audience: str = "llama-stack"
verify_tls: bool = True
tls_cafile: Path | None = None
issuer: str | None = Field(default=None, description="The OIDC issuer URL.")
claims_mapping: dict[str, str] = Field(
default_factory=lambda: {
"sub": "roles",
@ -214,6 +136,8 @@ class OAuth2TokenAuthProviderConfig(BaseModel):
"namespace": "namespaces",
},
)
jwks: OAuth2JWKSConfig | None
introspection: OAuth2IntrospectionConfig | None = None
@classmethod
@field_validator("claims_mapping")
@ -225,6 +149,14 @@ class OAuth2TokenAuthProviderConfig(BaseModel):
raise ValueError(f"claims_mapping value is not a valid attribute: {value}")
return v
@model_validator(mode="after")
def validate_mode(self) -> Self:
if not self.jwks and not self.introspection:
raise ValueError("One of jwks or introspection must be configured")
if self.jwks and self.introspection:
raise ValueError("At present only one of jwks or introspection should be configured")
return self
class OAuth2TokenAuthProvider(AuthProvider):
"""
@ -240,6 +172,13 @@ class OAuth2TokenAuthProvider(AuthProvider):
self._jwks_lock = Lock()
async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
if self.config.jwks:
return await self.validate_jwt_token(token, scope)
if self.config.introspection:
return await self.introspect_token(token, scope)
raise ValueError("One of jwks or introspection must be configured")
async def validate_jwt_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
"""Validate a token using the JWT token."""
await self._refresh_jwks()
@ -255,7 +194,7 @@ class OAuth2TokenAuthProvider(AuthProvider):
key_data,
algorithms=[algorithm],
audience=self.config.audience,
options={"verify_exp": True},
issuer=self.config.issuer,
)
except Exception as exc:
raise ValueError(f"Invalid JWT token: {token}") from exc
@ -269,14 +208,75 @@ class OAuth2TokenAuthProvider(AuthProvider):
access_attributes=access_attributes,
)
async def introspect_token(self, token: str, scope: dict | None = None) -> TokenValidationResult:
"""Validate a token using token introspection as defined by RFC 7662."""
form = {
"token": token,
}
if self.config.introspection is None:
raise ValueError("Introspection is not configured")
if self.config.introspection.send_secret_in_body:
form["client_id"] = self.config.introspection.client_id
form["client_secret"] = self.config.introspection.client_secret
auth = None
else:
auth = (self.config.introspection.client_id, self.config.introspection.client_secret)
ssl_ctxt = None
if self.config.tls_cafile:
ssl_ctxt = ssl.create_default_context(cafile=self.config.tls_cafile.as_posix())
try:
async with httpx.AsyncClient(verify=ssl_ctxt) as client:
response = await client.post(
self.config.introspection.url,
data=form,
auth=auth,
timeout=10.0, # Add a reasonable timeout
)
if response.status_code != 200:
logger.warning(f"Token introspection failed with status code: {response.status_code}")
raise ValueError(f"Token introspection failed: {response.status_code}")
fields = response.json()
if not fields["active"]:
raise ValueError("Token not active")
principal = fields["sub"] or fields["username"]
access_attributes = get_attributes_from_claims(fields, self.config.claims_mapping)
return TokenValidationResult(
principal=principal,
access_attributes=access_attributes,
)
except httpx.TimeoutException:
logger.exception("Token introspection request timed out")
raise
except ValueError:
# Re-raise ValueError exceptions to preserve their message
raise
except Exception as e:
logger.exception("Error during token introspection")
raise ValueError("Token introspection error") from e
async def close(self):
"""Close the HTTP client."""
pass
async def _refresh_jwks(self) -> None:
"""
Refresh the JWKS cache.
This is a simple cache that expires after a certain amount of time (defined by `key_recheck_period`).
If the cache is expired, we refresh the JWKS from the JWKS URI.
Notes: for Kubernetes which doesn't fully implement the OIDC protocol:
* It doesn't have user authentication flows
* It doesn't have refresh tokens
"""
async with self._jwks_lock:
if time.time() - self._jwks_at > self.config.cache_ttl:
async with httpx.AsyncClient() as client:
res = await client.get(self.config.jwks_uri, timeout=5)
if self.config.jwks is None:
raise ValueError("JWKS is not configured")
if time.time() - self._jwks_at > self.config.jwks.key_recheck_period:
verify = self.config.tls_cafile.as_posix() if self.config.tls_cafile else self.config.verify_tls
async with httpx.AsyncClient(verify=verify) as client:
res = await client.get(self.config.jwks.uri, timeout=5)
res.raise_for_status()
jwks_data = res.json()["keys"]
updated = {}
@ -363,13 +363,11 @@ class CustomAuthProvider(AuthProvider):
self._client = None
def create_auth_provider(config: AuthProviderConfig) -> AuthProvider:
def create_auth_provider(config: AuthenticationConfig) -> AuthProvider:
"""Factory function to create the appropriate auth provider."""
provider_type = config.provider_type.lower()
if provider_type == "kubernetes":
return KubernetesAuthProvider(KubernetesAuthProviderConfig.model_validate(config.config))
elif provider_type == "custom":
if provider_type == "custom":
return CustomAuthProvider(CustomAuthProviderConfig.model_validate(config.config))
elif provider_type == "oauth2_token":
return OAuth2TokenAuthProvider(OAuth2TokenAuthProviderConfig.model_validate(config.config))

View file

@ -0,0 +1,110 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
import time
from datetime import datetime, timedelta, timezone
from starlette.types import ASGIApp, Receive, Scope, Send
from llama_stack.log import get_logger
from llama_stack.providers.utils.kvstore.api import KVStore
from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl
logger = get_logger(name=__name__, category="quota")
class QuotaMiddleware:
"""
ASGI middleware that enforces separate quotas for authenticated and anonymous clients
within a configurable time window.
- For authenticated requests, it reads the client ID from the
`Authorization: Bearer <client_id>` header.
- For anonymous requests, it falls back to the IP address of the client.
Requests are counted in a KV store (e.g., SQLite), and HTTP 429 is returned
once a client exceeds its quota.
"""
def __init__(
self,
app: ASGIApp,
kv_config: KVStoreConfig,
anonymous_max_requests: int,
authenticated_max_requests: int,
window_seconds: int = 86400,
):
self.app = app
self.kv_config = kv_config
self.kv: KVStore | None = None
self.anonymous_max_requests = anonymous_max_requests
self.authenticated_max_requests = authenticated_max_requests
self.window_seconds = window_seconds
if isinstance(self.kv_config, SqliteKVStoreConfig):
logger.warning(
"QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
f"window_seconds={self.window_seconds}"
)
async def _get_kv(self) -> KVStore:
if self.kv is None:
self.kv = await kvstore_impl(self.kv_config)
return self.kv
async def __call__(self, scope: Scope, receive: Receive, send: Send):
if scope["type"] == "http":
# pick key & limit based on auth
auth_id = scope.get("authenticated_client_id")
if auth_id:
key_id = auth_id
limit = self.authenticated_max_requests
else:
# fallback to IP
client = scope.get("client")
key_id = client[0] if client else "anonymous"
limit = self.anonymous_max_requests
current_window = int(time.time() // self.window_seconds)
key = f"quota:{key_id}:{current_window}"
try:
kv = await self._get_kv()
prev = await kv.get(key) or "0"
count = int(prev) + 1
if int(prev) == 0:
# Set with expiration datetime when it is the first request in the window.
expiration = datetime.now(timezone.utc) + timedelta(seconds=self.window_seconds)
await kv.set(key, str(count), expiration=expiration)
else:
await kv.set(key, str(count))
except Exception:
logger.exception("Failed to access KV store for quota")
return await self._send_error(send, 500, "Quota service error")
if count > limit:
logger.warning(
"Quota exceeded for client %s: %d/%d",
key_id,
count,
limit,
)
return await self._send_error(send, 429, "Quota exceeded")
return await self.app(scope, receive, send)
async def _send_error(self, send: Send, status: int, message: str):
await send(
{
"type": "http.response.start",
"status": status,
"headers": [[b"content-type", b"application/json"]],
}
)
body = json.dumps({"error": {"message": message}}).encode()
await send({"type": "http.response.body", "body": body})

View file

@ -60,6 +60,7 @@ from llama_stack.providers.utils.telemetry.tracing import (
from .auth import AuthenticationMiddleware
from .endpoints import get_all_api_endpoints
from .quota import QuotaMiddleware
REPO_ROOT = Path(__file__).parent.parent.parent.parent
@ -434,6 +435,35 @@ def main(args: argparse.Namespace | None = None):
if config.server.auth:
logger.info(f"Enabling authentication with provider: {config.server.auth.provider_type.value}")
app.add_middleware(AuthenticationMiddleware, auth_config=config.server.auth)
else:
if config.server.quota:
quota = config.server.quota
logger.warning(
"Configured authenticated_max_requests (%d) but no auth is enabled; "
"falling back to anonymous_max_requests (%d) for all the requests",
quota.authenticated_max_requests,
quota.anonymous_max_requests,
)
if config.server.quota:
logger.info("Enabling quota middleware for authenticated and anonymous clients")
quota = config.server.quota
anonymous_max_requests = quota.anonymous_max_requests
# if auth is disabled, use the anonymous max requests
authenticated_max_requests = quota.authenticated_max_requests if config.server.auth else anonymous_max_requests
kv_config = quota.kvstore
window_map = {"day": 86400}
window_seconds = window_map[quota.period.value]
app.add_middleware(
QuotaMiddleware,
kv_config=kv_config,
anonymous_max_requests=anonymous_max_requests,
authenticated_max_requests=authenticated_max_requests,
window_seconds=window_seconds,
)
try:
impls = asyncio.run(construct_stack(config))

View file

@ -40,7 +40,6 @@ dependencies = [
"tiktoken",
"pillow",
"h11>=0.16.0",
"kubernetes",
]
[project.optional-dependencies]
@ -94,6 +93,7 @@ test = [
docs = [
"sphinx-autobuild",
"myst-parser",
"sphinx",
"sphinx-rtd-theme",
"sphinx_rtd_dark_mode",
"sphinx-copybutton",
@ -103,6 +103,8 @@ docs = [
"sphinxcontrib.video",
"sphinxcontrib.mermaid",
"tomli",
"linkify",
"sphinxcontrib.openapi",
]
codegen = ["rich", "pydantic", "jinja2>=3.1.6"]
ui = [

View file

@ -4,19 +4,16 @@ annotated-types==0.7.0
anyio==4.8.0
attrs==25.1.0
blobfile==3.0.0
cachetools==5.5.2
certifi==2025.1.31
charset-normalizer==3.4.1
click==8.1.8
colorama==0.4.6 ; sys_platform == 'win32'
distro==1.9.0
durationpy==0.9
ecdsa==0.19.1
exceptiongroup==1.2.2 ; python_full_version < '3.11'
filelock==3.17.0
fire==0.7.0
fsspec==2024.12.0
google-auth==2.38.0
h11==0.16.0
httpcore==1.0.9
httpx==0.28.1
@ -26,14 +23,12 @@ jinja2==3.1.6
jiter==0.8.2
jsonschema==4.23.0
jsonschema-specifications==2024.10.1
kubernetes==32.0.1
llama-stack-client==0.2.7
lxml==5.3.1
markdown-it-py==3.0.0
markupsafe==3.0.2
mdurl==0.1.2
numpy==2.2.3
oauthlib==3.2.2
openai==1.71.0
packaging==24.2
pandas==2.2.3
@ -41,7 +36,6 @@ pillow==11.1.0
prompt-toolkit==3.0.50
pyaml==25.1.0
pyasn1==0.4.8
pyasn1-modules==0.4.1
pycryptodomex==3.21.0
pydantic==2.10.6
pydantic-core==2.27.2
@ -54,7 +48,6 @@ pyyaml==6.0.2
referencing==0.36.2
regex==2024.11.6
requests==2.32.3
requests-oauthlib==2.0.0
rich==13.9.4
rpds-py==0.22.3
rsa==4.9
@ -68,4 +61,3 @@ typing-extensions==4.12.2
tzdata==2025.1
urllib3==2.3.0
wcwidth==0.2.13
websocket-client==1.8.0

View file

@ -11,12 +11,10 @@ import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
from llama_stack.distribution.datatypes import AccessAttributes
from llama_stack.distribution.datatypes import AuthenticationConfig
from llama_stack.distribution.server.auth import AuthenticationMiddleware
from llama_stack.distribution.server.auth_providers import (
AuthProviderConfig,
AuthProviderType,
TokenValidationResult,
get_attributes_from_claims,
)
@ -62,7 +60,7 @@ def invalid_token():
@pytest.fixture
def http_app(mock_auth_endpoint):
app = FastAPI()
auth_config = AuthProviderConfig(
auth_config = AuthenticationConfig(
provider_type=AuthProviderType.CUSTOM,
config={"endpoint": mock_auth_endpoint},
)
@ -78,7 +76,7 @@ def http_app(mock_auth_endpoint):
@pytest.fixture
def k8s_app():
app = FastAPI()
auth_config = AuthProviderConfig(
auth_config = AuthenticationConfig(
provider_type=AuthProviderType.KUBERNETES,
config={"api_server_url": "https://kubernetes.default.svc"},
)
@ -118,7 +116,7 @@ def mock_scope():
@pytest.fixture
def mock_http_middleware(mock_auth_endpoint):
mock_app = AsyncMock()
auth_config = AuthProviderConfig(
auth_config = AuthenticationConfig(
provider_type=AuthProviderType.CUSTOM,
config={"endpoint": mock_auth_endpoint},
)
@ -128,7 +126,7 @@ def mock_http_middleware(mock_auth_endpoint):
@pytest.fixture
def mock_k8s_middleware():
mock_app = AsyncMock()
auth_config = AuthProviderConfig(
auth_config = AuthenticationConfig(
provider_type=AuthProviderType.KUBERNETES,
config={"api_server_url": "https://kubernetes.default.svc"},
)
@ -284,120 +282,19 @@ async def test_http_middleware_no_attributes(mock_http_middleware, mock_scope):
assert attributes["roles"] == ["test.jwt.token"]
# Kubernetes Tests
def test_missing_auth_header_k8s(k8s_client):
response = k8s_client.get("/test")
assert response.status_code == 401
assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
def test_invalid_auth_header_format_k8s(k8s_client):
response = k8s_client.get("/test", headers={"Authorization": "InvalidFormat token123"})
assert response.status_code == 401
assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
@patch("kubernetes.client.ApiClient")
def test_valid_k8s_authentication(mock_api_client, k8s_client, valid_token):
# Mock the Kubernetes client
mock_client = AsyncMock()
mock_api_client.return_value = mock_client
# Mock successful token validation
mock_client.set_default_header = AsyncMock()
# Mock the token validation to return valid access attributes
with patch("llama_stack.distribution.server.auth_providers.KubernetesAuthProvider.validate_token") as mock_validate:
mock_validate.return_value = TokenValidationResult(
principal="test-principal",
access_attributes=AccessAttributes(
roles=["admin"], teams=["ml-team"], projects=["llama-3"], namespaces=["research"]
),
)
response = k8s_client.get("/test", headers={"Authorization": f"Bearer {valid_token}"})
assert response.status_code == 200
assert response.json() == {"message": "Authentication successful"}
@patch("kubernetes.client.ApiClient")
def test_invalid_k8s_authentication(mock_api_client, k8s_client, invalid_token):
# Mock the Kubernetes client
mock_client = AsyncMock()
mock_api_client.return_value = mock_client
# Mock failed token validation by raising an exception
with patch("llama_stack.distribution.server.auth_providers.KubernetesAuthProvider.validate_token") as mock_validate:
mock_validate.side_effect = ValueError("Invalid or expired token")
response = k8s_client.get("/test", headers={"Authorization": f"Bearer {invalid_token}"})
assert response.status_code == 401
assert "Invalid or expired token" in response.json()["error"]["message"]
@pytest.mark.asyncio
async def test_k8s_middleware_with_access_attributes(mock_k8s_middleware, mock_scope):
middleware, mock_app = mock_k8s_middleware
mock_receive = AsyncMock()
mock_send = AsyncMock()
with patch("kubernetes.client.ApiClient") as mock_api_client:
mock_client = AsyncMock()
mock_api_client.return_value = mock_client
# Mock successful token validation
mock_client.set_default_header = AsyncMock()
# Mock token payload with access attributes
mock_token_parts = ["header", "eyJzdWIiOiJhZG1pbiIsImdyb3VwcyI6WyJtbC10ZWFtIl19", "signature"]
mock_scope["headers"][1] = (b"authorization", f"Bearer {'.'.join(mock_token_parts)}".encode())
await middleware(mock_scope, mock_receive, mock_send)
assert "user_attributes" in mock_scope
assert mock_scope["user_attributes"]["roles"] == ["admin"]
assert mock_scope["user_attributes"]["teams"] == ["ml-team"]
mock_app.assert_called_once_with(mock_scope, mock_receive, mock_send)
@pytest.mark.asyncio
async def test_k8s_middleware_no_attributes(mock_k8s_middleware, mock_scope):
"""Test middleware behavior with no access attributes"""
middleware, mock_app = mock_k8s_middleware
mock_receive = AsyncMock()
mock_send = AsyncMock()
with patch("kubernetes.client.ApiClient") as mock_api_client:
mock_client = AsyncMock()
mock_api_client.return_value = mock_client
# Mock successful token validation
mock_client.set_default_header = AsyncMock()
# Mock token payload without access attributes
mock_token_parts = ["header", "eyJzdWIiOiJhZG1pbiJ9", "signature"]
mock_scope["headers"][1] = (b"authorization", f"Bearer {'.'.join(mock_token_parts)}".encode())
await middleware(mock_scope, mock_receive, mock_send)
assert "user_attributes" in mock_scope
attributes = mock_scope["user_attributes"]
assert "roles" in attributes
assert attributes["roles"] == ["admin"]
mock_app.assert_called_once_with(mock_scope, mock_receive, mock_send)
# oauth2 token provider tests
@pytest.fixture
def oauth2_app():
app = FastAPI()
auth_config = AuthProviderConfig(
auth_config = AuthenticationConfig(
provider_type=AuthProviderType.OAUTH2_TOKEN,
config={
"jwks_uri": "http://mock-authz-service/token/introspect",
"cache_ttl": "3600",
"jwks": {
"uri": "http://mock-authz-service/token/introspect",
"key_recheck_period": "3600",
},
"audience": "llama-stack",
},
)
@ -517,3 +414,159 @@ def test_get_attributes_from_claims():
# TODO: add more tests for oauth2 token provider
# oauth token introspection tests
@pytest.fixture
def mock_introspection_endpoint():
return "http://mock-authz-service/token/introspect"
@pytest.fixture
def introspection_app(mock_introspection_endpoint):
app = FastAPI()
auth_config = AuthenticationConfig(
provider_type=AuthProviderType.OAUTH2_TOKEN,
config={
"jwks": None,
"introspection": {"url": mock_introspection_endpoint, "client_id": "myclient", "client_secret": "abcdefg"},
},
)
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
@app.get("/test")
def test_endpoint():
return {"message": "Authentication successful"}
return app
@pytest.fixture
def introspection_app_with_custom_mapping(mock_introspection_endpoint):
app = FastAPI()
auth_config = AuthenticationConfig(
provider_type=AuthProviderType.OAUTH2_TOKEN,
config={
"jwks": None,
"introspection": {
"url": mock_introspection_endpoint,
"client_id": "myclient",
"client_secret": "abcdefg",
"send_secret_in_body": "true",
},
"claims_mapping": {
"sub": "roles",
"scope": "roles",
"groups": "teams",
"aud": "namespaces",
},
},
)
app.add_middleware(AuthenticationMiddleware, auth_config=auth_config)
@app.get("/test")
def test_endpoint():
return {"message": "Authentication successful"}
return app
@pytest.fixture
def introspection_client(introspection_app):
return TestClient(introspection_app)
@pytest.fixture
def introspection_client_with_custom_mapping(introspection_app_with_custom_mapping):
return TestClient(introspection_app_with_custom_mapping)
def test_missing_auth_header_introspection(introspection_client):
response = introspection_client.get("/test")
assert response.status_code == 401
assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
def test_invalid_auth_header_format_introspection(introspection_client):
response = introspection_client.get("/test", headers={"Authorization": "InvalidFormat token123"})
assert response.status_code == 401
assert "Missing or invalid Authorization header" in response.json()["error"]["message"]
async def mock_introspection_active(*args, **kwargs):
return MockResponse(
200,
{
"active": True,
"sub": "my-user",
"groups": ["group1", "group2"],
"scope": "foo bar",
"aud": ["set1", "set2"],
},
)
async def mock_introspection_inactive(*args, **kwargs):
return MockResponse(
200,
{
"active": False,
},
)
async def mock_introspection_invalid(*args, **kwargs):
class InvalidResponse:
def __init__(self, status_code):
self.status_code = status_code
def json(self):
raise ValueError("Not JSON")
return InvalidResponse(200)
async def mock_introspection_failed(*args, **kwargs):
return MockResponse(
500,
{},
)
@patch("httpx.AsyncClient.post", new=mock_introspection_active)
def test_valid_introspection_authentication(introspection_client, valid_api_key):
response = introspection_client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"})
assert response.status_code == 200
assert response.json() == {"message": "Authentication successful"}
@patch("httpx.AsyncClient.post", new=mock_introspection_inactive)
def test_inactive_introspection_authentication(introspection_client, invalid_api_key):
response = introspection_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"})
assert response.status_code == 401
assert "Token not active" in response.json()["error"]["message"]
@patch("httpx.AsyncClient.post", new=mock_introspection_invalid)
def test_invalid_introspection_authentication(introspection_client, invalid_api_key):
response = introspection_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"})
assert response.status_code == 401
assert "Not JSON" in response.json()["error"]["message"]
@patch("httpx.AsyncClient.post", new=mock_introspection_failed)
def test_failed_introspection_authentication(introspection_client, invalid_api_key):
response = introspection_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"})
assert response.status_code == 401
assert "Token introspection failed: 500" in response.json()["error"]["message"]
@patch("httpx.AsyncClient.post", new=mock_introspection_active)
def test_valid_introspection_with_custom_mapping_authentication(
introspection_client_with_custom_mapping, valid_api_key
):
response = introspection_client_with_custom_mapping.get(
"/test", headers={"Authorization": f"Bearer {valid_api_key}"}
)
assert response.status_code == 200
assert response.json() == {"message": "Authentication successful"}

View file

@ -0,0 +1,127 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import pytest
from fastapi import FastAPI, Request
from fastapi.testclient import TestClient
from starlette.middleware.base import BaseHTTPMiddleware
from llama_stack.distribution.datatypes import QuotaConfig, QuotaPeriod
from llama_stack.distribution.server.quota import QuotaMiddleware
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
class InjectClientIDMiddleware(BaseHTTPMiddleware):
"""
Middleware that injects 'authenticated_client_id' to mimic AuthenticationMiddleware.
"""
def __init__(self, app, client_id="client1"):
super().__init__(app)
self.client_id = client_id
async def dispatch(self, request: Request, call_next):
request.scope["authenticated_client_id"] = self.client_id
return await call_next(request)
def build_quota_config(db_path) -> QuotaConfig:
return QuotaConfig(
kvstore=SqliteKVStoreConfig(db_path=str(db_path)),
anonymous_max_requests=1,
authenticated_max_requests=2,
period=QuotaPeriod.DAY,
)
@pytest.fixture
def auth_app(tmp_path, request):
"""
FastAPI app with InjectClientIDMiddleware and QuotaMiddleware for authenticated testing.
Each test gets its own DB file.
"""
inner_app = FastAPI()
@inner_app.get("/test")
async def test_endpoint():
return {"message": "ok"}
db_path = tmp_path / f"quota_{request.node.name}.db"
quota = build_quota_config(db_path)
app = InjectClientIDMiddleware(
QuotaMiddleware(
inner_app,
kv_config=quota.kvstore,
anonymous_max_requests=quota.anonymous_max_requests,
authenticated_max_requests=quota.authenticated_max_requests,
window_seconds=86400,
),
client_id=f"client_{request.node.name}",
)
return app
def test_authenticated_quota_allows_up_to_limit(auth_app):
client = TestClient(auth_app)
assert client.get("/test").status_code == 200
assert client.get("/test").status_code == 200
def test_authenticated_quota_blocks_after_limit(auth_app):
client = TestClient(auth_app)
client.get("/test")
client.get("/test")
resp = client.get("/test")
assert resp.status_code == 429
assert resp.json()["error"]["message"] == "Quota exceeded"
def test_anonymous_quota_allows_up_to_limit(tmp_path, request):
inner_app = FastAPI()
@inner_app.get("/test")
async def test_endpoint():
return {"message": "ok"}
db_path = tmp_path / f"quota_anon_{request.node.name}.db"
quota = build_quota_config(db_path)
app = QuotaMiddleware(
inner_app,
kv_config=quota.kvstore,
anonymous_max_requests=quota.anonymous_max_requests,
authenticated_max_requests=quota.authenticated_max_requests,
window_seconds=86400,
)
client = TestClient(app)
assert client.get("/test").status_code == 200
def test_anonymous_quota_blocks_after_limit(tmp_path, request):
inner_app = FastAPI()
@inner_app.get("/test")
async def test_endpoint():
return {"message": "ok"}
db_path = tmp_path / f"quota_anon_{request.node.name}.db"
quota = build_quota_config(db_path)
app = QuotaMiddleware(
inner_app,
kv_config=quota.kvstore,
anonymous_max_requests=quota.anonymous_max_requests,
authenticated_max_requests=quota.authenticated_max_requests,
window_seconds=86400,
)
client = TestClient(app)
client.get("/test")
resp = client.get("/test")
assert resp.status_code == 429
assert resp.json()["error"]["message"] == "Quota exceeded"

186
uv.lock generated
View file

@ -628,6 +628,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d5/50/83c593b07763e1161326b3b8c6686f0f4b0f24d5526546bee538c89837d6/decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186", size = 9073 },
]
[[package]]
name = "deepmerge"
version = "2.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a8/3a/b0ba594708f1ad0bc735884b3ad854d3ca3bdc1d741e56e40bbda6263499/deepmerge-2.0.tar.gz", hash = "sha256:5c3d86081fbebd04dd5de03626a0607b809a98fb6ccba5770b62466fe940ff20", size = 19890 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2d/82/e5d2c1c67d19841e9edc74954c827444ae826978499bde3dfc1d007c8c11/deepmerge-2.0-py3-none-any.whl", hash = "sha256:6de9ce507115cff0bed95ff0ce9ecc31088ef50cbdf09bc90a09349a318b3d00", size = 13475 },
]
[[package]]
name = "deprecated"
version = "1.2.18"
@ -676,15 +685,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 },
]
[[package]]
name = "durationpy"
version = "0.9"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/31/e9/f49c4e7fccb77fa5c43c2480e09a857a78b41e7331a75e128ed5df45c56b/durationpy-0.9.tar.gz", hash = "sha256:fd3feb0a69a0057d582ef643c355c40d2fa1c942191f914d12203b1a01ac722a", size = 3186 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4c/a3/ac312faeceffd2d8f86bc6dcb5c401188ba5a01bc88e69bed97578a0dfcd/durationpy-0.9-py3-none-any.whl", hash = "sha256:e65359a7af5cedad07fb77a2dd3f390f8eb0b74cb845589fa6c057086834dd38", size = 3461 },
]
[[package]]
name = "ecdsa"
version = "0.19.1"
@ -863,20 +863,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599 },
]
[[package]]
name = "google-auth"
version = "2.38.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cachetools" },
{ name = "pyasn1-modules" },
{ name = "rsa" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c6/eb/d504ba1daf190af6b204a9d4714d457462b486043744901a6eeea711f913/google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4", size = 270866 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/9d/47/603554949a37bca5b7f894d51896a9c534b9eab808e2520a748e081669d0/google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a", size = 210770 },
]
[[package]]
name = "googleapis-common-protos"
version = "1.67.0"
@ -1324,28 +1310,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c9/fb/108ecd1fe961941959ad0ee4e12ee7b8b1477247f30b1fdfd83ceaf017f0/jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409", size = 28965 },
]
[[package]]
name = "kubernetes"
version = "32.0.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "durationpy" },
{ name = "google-auth" },
{ name = "oauthlib" },
{ name = "python-dateutil" },
{ name = "pyyaml" },
{ name = "requests" },
{ name = "requests-oauthlib" },
{ name = "six" },
{ name = "urllib3" },
{ name = "websocket-client" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b7/e8/0598f0e8b4af37cd9b10d8b87386cf3173cb8045d834ab5f6ec347a758b3/kubernetes-32.0.1.tar.gz", hash = "sha256:42f43d49abd437ada79a79a16bd48a604d3471a117a8347e87db693f2ba0ba28", size = 946691 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/08/10/9f8af3e6f569685ce3af7faab51c8dd9d93b9c38eba339ca31c746119447/kubernetes-32.0.1-py2.py3-none-any.whl", hash = "sha256:35282ab8493b938b08ab5526c7ce66588232df00ef5e1dbe88a419107dc10998", size = 1988070 },
]
[[package]]
name = "levenshtein"
version = "0.27.1"
@ -1429,6 +1393,12 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/dc/1e/408fd10217eac0e43aea0604be22b4851a09e03d761d44d4ea12089dd70e/levenshtein-0.27.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:7987ef006a3cf56a4532bd4c90c2d3b7b4ca9ad3bf8ae1ee5713c4a3bdfda913", size = 98045 },
]
[[package]]
name = "linkify"
version = "1.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/65/c6/246100fa3967074d9725b3716913bd495823547bde5047050d4c3462f994/linkify-1.4.tar.gz", hash = "sha256:9ba276ba179525f7262820d90f009604e51cd4f1466c1112b882ef7eda243d5e", size = 1749 }
[[package]]
name = "llama-stack"
version = "0.2.7"
@ -1441,7 +1411,6 @@ dependencies = [
{ name = "huggingface-hub" },
{ name = "jinja2" },
{ name = "jsonschema" },
{ name = "kubernetes" },
{ name = "llama-stack-client" },
{ name = "openai" },
{ name = "pillow" },
@ -1480,7 +1449,9 @@ dev = [
{ name = "uvicorn" },
]
docs = [
{ name = "linkify" },
{ name = "myst-parser" },
{ name = "sphinx" },
{ name = "sphinx-autobuild" },
{ name = "sphinx-copybutton" },
{ name = "sphinx-design" },
@ -1488,6 +1459,7 @@ docs = [
{ name = "sphinx-rtd-theme" },
{ name = "sphinx-tabs" },
{ name = "sphinxcontrib-mermaid" },
{ name = "sphinxcontrib-openapi" },
{ name = "sphinxcontrib-redoc" },
{ name = "sphinxcontrib-video" },
{ name = "tomli" },
@ -1546,7 +1518,7 @@ requires-dist = [
{ name = "jinja2", specifier = ">=3.1.6" },
{ name = "jinja2", marker = "extra == 'codegen'", specifier = ">=3.1.6" },
{ name = "jsonschema" },
{ name = "kubernetes" },
{ name = "linkify", marker = "extra == 'docs'" },
{ name = "llama-stack-client", specifier = ">=0.2.7" },
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.7" },
{ name = "mcp", marker = "extra == 'test'" },
@ -1581,6 +1553,7 @@ requires-dist = [
{ name = "ruamel-yaml", marker = "extra == 'dev'" },
{ name = "ruff", marker = "extra == 'dev'" },
{ name = "setuptools" },
{ name = "sphinx", marker = "extra == 'docs'" },
{ name = "sphinx-autobuild", marker = "extra == 'docs'" },
{ name = "sphinx-copybutton", marker = "extra == 'docs'" },
{ name = "sphinx-design", marker = "extra == 'docs'" },
@ -1588,6 +1561,7 @@ requires-dist = [
{ name = "sphinx-rtd-theme", marker = "extra == 'docs'" },
{ name = "sphinx-tabs", marker = "extra == 'docs'" },
{ name = "sphinxcontrib-mermaid", marker = "extra == 'docs'" },
{ name = "sphinxcontrib-openapi", marker = "extra == 'docs'" },
{ name = "sphinxcontrib-redoc", marker = "extra == 'docs'" },
{ name = "sphinxcontrib-video", marker = "extra == 'docs'" },
{ name = "sqlite-vec", marker = "extra == 'unit'" },
@ -1624,9 +1598,9 @@ dependencies = [
{ name = "tqdm" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/cd/6b/31c07396c5b3010668e4eb38061a96ffacb47ec4b14d8aeb64c13856c485/llama_stack_client-0.2.7.tar.gz", hash = "sha256:11aee11fdd5e0e8caad07c0cce9c4d88640938844372e7e3453a91ea0757fcb3", size = 259273, upload-time = "2025-05-16T20:31:39.221Z" }
sdist = { url = "https://files.pythonhosted.org/packages/cd/6b/31c07396c5b3010668e4eb38061a96ffacb47ec4b14d8aeb64c13856c485/llama_stack_client-0.2.7.tar.gz", hash = "sha256:11aee11fdd5e0e8caad07c0cce9c4d88640938844372e7e3453a91ea0757fcb3", size = 259273 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ac/69/6a5f4683afe355500df4376fdcbfb2fc1e6a0c3bcea5ff8f6114773a9acf/llama_stack_client-0.2.7-py3-none-any.whl", hash = "sha256:78b3f2abdb1770c7b1270a9c0ef58402a988401c564d2e6c83588779ac6fc38d", size = 292727, upload-time = "2025-05-16T20:31:37.587Z" },
{ url = "https://files.pythonhosted.org/packages/ac/69/6a5f4683afe355500df4376fdcbfb2fc1e6a0c3bcea5ff8f6114773a9acf/llama_stack_client-0.2.7-py3-none-any.whl", hash = "sha256:78b3f2abdb1770c7b1270a9c0ef58402a988401c564d2e6c83588779ac6fc38d", size = 292727 },
]
[[package]]
@ -1833,6 +1807,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 },
]
[[package]]
name = "mistune"
version = "3.1.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions", marker = "python_full_version < '3.11'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c4/79/bda47f7dd7c3c55770478d6d02c9960c430b0cf1773b72366ff89126ea31/mistune-3.1.3.tar.gz", hash = "sha256:a7035c21782b2becb6be62f8f25d3df81ccb4d6fa477a6525b15af06539f02a0", size = 94347 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/01/4d/23c4e4f09da849e127e9f123241946c23c1e30f45a88366879e064211815/mistune-3.1.3-py3-none-any.whl", hash = "sha256:1a32314113cff28aa6432e99e522677c8587fd83e3d51c29b82a52409c842bd9", size = 53410 },
]
[[package]]
name = "mpmath"
version = "1.3.0"
@ -2087,15 +2073,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/17/7f/d322a4125405920401450118dbdc52e0384026bd669939484670ce8b2ab9/numpy-2.2.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:783145835458e60fa97afac25d511d00a1eca94d4a8f3ace9fe2043003c678e4", size = 12839607 },
]
[[package]]
name = "oauthlib"
version = "3.2.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/6d/fa/fbf4001037904031639e6bfbfc02badfc7e12f137a8afa254df6c4c8a670/oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918", size = 177352 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/7e/80/cab10959dc1faead58dc8384a781dfbf93cb4d33d50988f7a69f1b7c9bbe/oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", size = 151688 },
]
[[package]]
name = "openai"
version = "1.71.0"
@ -2284,6 +2261,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772 },
]
[[package]]
name = "picobox"
version = "4.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/0f/b1/830714dd6778c1cb45826722b4e9bd21c94b33cca5df9ef2cc0b80c81b25/picobox-4.0.0.tar.gz", hash = "sha256:114da1b5606b2f615e8b0eb68d04198ad9de75af5adbcf5b36fe4f664ab927b6", size = 22666 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2d/c6/fd64ffd75d47c4fcf6c65808cc5c5c75e5d4357c197d3741ee1339e91257/picobox-4.0.0-py3-none-any.whl", hash = "sha256:4c27eb689fe45dabd9e64c382e04418147d0b746d155b4e80057dbb7ff82027e", size = 11641 },
]
[[package]]
name = "pillow"
version = "11.1.0"
@ -2608,18 +2594,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/62/1e/a94a8d635fa3ce4cfc7f506003548d0a2447ae76fd5ca53932970fe3053f/pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", size = 77145 },
]
[[package]]
name = "pyasn1-modules"
version = "0.4.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pyasn1" },
]
sdist = { url = "https://files.pythonhosted.org/packages/1d/67/6afbf0d507f73c32d21084a79946bfcfca5fbc62a72057e9c23797a737c9/pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c", size = 310028 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/77/89/bc88a6711935ba795a679ea6ebee07e128050d6382eaa35a0a47c8032bdc/pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd", size = 181537 },
]
[[package]]
name = "pycparser"
version = "2.22"
@ -2875,9 +2849,9 @@ source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "pytest" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ac/82/4c9ecabab13363e72d880f2fb504c5f750433b2b6f16e99f4ec21ada284c/pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a", size = 17973, upload-time = "2025-05-05T19:44:34.99Z" }
sdist = { url = "https://files.pythonhosted.org/packages/ac/82/4c9ecabab13363e72d880f2fb504c5f750433b2b6f16e99f4ec21ada284c/pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a", size = 17973 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382, upload-time = "2025-05-05T19:44:33.502Z" },
{ url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382 },
]
[[package]]
@ -3256,19 +3230,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
]
[[package]]
name = "requests-oauthlib"
version = "2.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "oauthlib" },
{ name = "requests" },
]
sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179 },
]
[[package]]
name = "rich"
version = "13.9.4"
@ -3597,6 +3558,21 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c6/43/65c0acbd8cc6f50195a3a1fc195c404988b15c67090e73c7a41a9f57d6bd/sphinx_design-0.6.1-py3-none-any.whl", hash = "sha256:b11f37db1a802a183d61b159d9a202314d4d2fe29c163437001324fe2f19549c", size = 2215338 },
]
[[package]]
name = "sphinx-mdinclude"
version = "0.6.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "docutils" },
{ name = "mistune" },
{ name = "pygments" },
{ name = "sphinx" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b6/a7/c9a7888bb2187fdb06955d71e75f6f266b7e179b356ac76138d160a5b7eb/sphinx_mdinclude-0.6.2.tar.gz", hash = "sha256:447462e82cb8be61404a2204227f920769eb923d2f57608e3325f3bb88286b4c", size = 65257 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/42/3d/6b41fe1637cd53c4b10d56e0e6f396546f837973dabf9c4b2a1de44620ac/sphinx_mdinclude-0.6.2-py3-none-any.whl", hash = "sha256:648e78edb067c0e4bffc22943278d49d54a0714494743592032fa3ad82a86984", size = 16911 },
]
[[package]]
name = "sphinx-rtd-dark-mode"
version = "1.3.0"
@ -3664,6 +3640,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705 },
]
[[package]]
name = "sphinxcontrib-httpdomain"
version = "1.8.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "six" },
{ name = "sphinx" },
]
sdist = { url = "https://files.pythonhosted.org/packages/be/ef/82d3cfafb7febce4f7df8dcf3cde9d072350b41066e05a4f559b4e9105d0/sphinxcontrib-httpdomain-1.8.1.tar.gz", hash = "sha256:6c2dfe6ca282d75f66df333869bb0ce7331c01b475db6809ff9d107b7cdfe04b", size = 19266 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/02/49/aad47b8cf27a0d7703f1311aad8c368bb22866ddee1a2d2cd3f69bc45e0c/sphinxcontrib_httpdomain-1.8.1-py2.py3-none-any.whl", hash = "sha256:21eefe1270e4d9de8d717cc89ee92cc4871b8736774393bafc5e38a6bb77b1d5", size = 25513 },
]
[[package]]
name = "sphinxcontrib-jquery"
version = "4.1"
@ -3698,6 +3687,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/cd/c8/784b9ac6ea08aa594c1a4becbd0dbe77186785362e31fd633b8c6ae0197a/sphinxcontrib_mermaid-1.0.0-py3-none-any.whl", hash = "sha256:60b72710ea02087f212028feb09711225fbc2e343a10d34822fe787510e1caa3", size = 9597 },
]
[[package]]
name = "sphinxcontrib-openapi"
version = "0.8.4"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "deepmerge" },
{ name = "jsonschema" },
{ name = "picobox" },
{ name = "pyyaml" },
{ name = "sphinx" },
{ name = "sphinx-mdinclude" },
{ name = "sphinxcontrib-httpdomain" },
]
sdist = { url = "https://files.pythonhosted.org/packages/c0/a7/66a5c9aba7dbbb0c2b050f60e71402818cbf5f127ace13ed971029cc745e/sphinxcontrib-openapi-0.8.4.tar.gz", hash = "sha256:df883808a5b5e4b4113ad697185c43a3f42df3dce70453af78ba7076907e9a20", size = 71848 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d5/c3/ee00486f38d78309a60ee0d6031b2545b22ac5f0007d841dd174abc68774/sphinxcontrib_openapi-0.8.4-py3-none-any.whl", hash = "sha256:50911c18d452d9390ee3a384ef8dc8bde6135f542ba55691f81e1fbc0b71014e", size = 34510 },
]
[[package]]
name = "sphinxcontrib-qthelp"
version = "2.0.0"
@ -4323,15 +4330,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 },
]
[[package]]
name = "websocket-client"
version = "1.8.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf5948ed3f4681f7da2f9f64512e1d303f94b4cc174c24a5/websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da", size = 54648 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 },
]
[[package]]
name = "websockets"
version = "15.0"