From 091d8c48f217b413fa267a3c0412c2967be601cd Mon Sep 17 00:00:00 2001 From: grs Date: Tue, 20 May 2025 22:45:11 -0400 Subject: [PATCH 1/7] feat: add additional auth provider that uses oauth token introspection (#2187) # What does this PR do? This adds an alternative option to the oauth_token auth provider that can be used with existing authorization services which support token introspection as defined in RFC 7662. This could be useful where token revocation needs to be handled or where opaque tokens (or other non jwt formatted tokens) are used ## Test Plan Tested against keycloak Signed-off-by: Gordon Sim --- llama_stack/distribution/datatypes.py | 2 +- .../distribution/server/auth_providers.py | 100 +++++++++-- tests/unit/server/test_auth.py | 162 +++++++++++++++++- 3 files changed, 251 insertions(+), 13 deletions(-) diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index 446a88ca0..be5629ba1 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -229,7 +229,7 @@ class AuthenticationConfig(BaseModel): ..., description="Type of authentication provider (e.g., 'kubernetes', 'custom')", ) - config: dict[str, str] = Field( + config: dict[str, Any] = Field( ..., description="Provider-specific configuration", ) diff --git a/llama_stack/distribution/server/auth_providers.py b/llama_stack/distribution/server/auth_providers.py index b73fded58..baab75eca 100644 --- a/llama_stack/distribution/server/auth_providers.py +++ b/llama_stack/distribution/server/auth_providers.py @@ -5,15 +5,18 @@ # the root directory of this source tree. import json +import ssl import time from abc import ABC, abstractmethod from asyncio import Lock from enum import Enum +from typing import Any from urllib.parse import parse_qs import httpx from jose import jwt -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field, field_validator, model_validator +from typing_extensions import Self from llama_stack.distribution.datatypes import AccessAttributes from llama_stack.log import get_logger @@ -85,7 +88,7 @@ class AuthProviderConfig(BaseModel): """Base configuration for authentication providers.""" provider_type: AuthProviderType = Field(..., description="Type of authentication provider") - config: dict[str, str] = Field(..., description="Provider-specific configuration") + config: dict[str, Any] = Field(..., description="Provider-specific configuration") class AuthProvider(ABC): @@ -198,10 +201,21 @@ def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) return attributes -class OAuth2TokenAuthProviderConfig(BaseModel): +class OAuth2JWKSConfig(BaseModel): # The JWKS URI for collecting public keys - jwks_uri: str + uri: str cache_ttl: int = 3600 + + +class OAuth2IntrospectionConfig(BaseModel): + url: str + client_id: str + client_secret: str + send_secret_in_body: bool = False + tls_cafile: str | None = None + + +class OAuth2TokenAuthProviderConfig(BaseModel): audience: str = "llama-stack" claims_mapping: dict[str, str] = Field( default_factory=lambda: { @@ -214,6 +228,8 @@ class OAuth2TokenAuthProviderConfig(BaseModel): "namespace": "namespaces", }, ) + jwks: OAuth2JWKSConfig | None + introspection: OAuth2IntrospectionConfig | None = None @classmethod @field_validator("claims_mapping") @@ -225,6 +241,14 @@ class OAuth2TokenAuthProviderConfig(BaseModel): raise ValueError(f"claims_mapping value is not a valid attribute: {value}") return v + @model_validator(mode="after") + def validate_mode(self) -> Self: + if not self.jwks and not self.introspection: + raise ValueError("One of jwks or introspection must be configured") + if self.jwks and self.introspection: + raise ValueError("At present only one of jwks or introspection should be configured") + return self + class OAuth2TokenAuthProvider(AuthProvider): """ @@ -240,8 +264,17 @@ class OAuth2TokenAuthProvider(AuthProvider): self._jwks_lock = Lock() async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult: + if self.config.jwks: + return await self.validate_jwt_token(token, self.config.jwks, scope) + if self.config.introspection: + return await self.introspect_token(token, self.config.introspection, scope) + raise ValueError("One of jwks or introspection must be configured") + + async def validate_jwt_token( + self, token: str, config: OAuth2JWKSConfig, scope: dict | None = None + ) -> TokenValidationResult: """Validate a token using the JWT token.""" - await self._refresh_jwks() + await self._refresh_jwks(config) try: header = jwt.get_unverified_header(token) @@ -269,14 +302,61 @@ class OAuth2TokenAuthProvider(AuthProvider): access_attributes=access_attributes, ) - async def close(self): - """Close the HTTP client.""" + async def introspect_token( + self, token: str, config: OAuth2IntrospectionConfig, scope: dict | None = None + ) -> TokenValidationResult: + """Validate a token using token introspection as defined by RFC 7662.""" + form = { + "token": token, + } + if config.send_secret_in_body: + form["client_id"] = config.client_id + form["client_secret"] = config.client_secret + auth = None + else: + auth = (config.client_id, config.client_secret) + ssl_ctxt = None + if config.tls_cafile: + ssl_ctxt = ssl.create_default_context(cafile=config.tls_cafile) + try: + async with httpx.AsyncClient(verify=ssl_ctxt) as client: + response = await client.post( + config.url, + data=form, + auth=auth, + timeout=10.0, # Add a reasonable timeout + ) + if response.status_code != 200: + logger.warning(f"Token introspection failed with status code: {response.status_code}") + raise ValueError(f"Token introspection failed: {response.status_code}") - async def _refresh_jwks(self) -> None: + fields = response.json() + if not fields["active"]: + raise ValueError("Token not active") + principal = fields["sub"] or fields["username"] + access_attributes = get_attributes_from_claims(fields, self.config.claims_mapping) + return TokenValidationResult( + principal=principal, + access_attributes=access_attributes, + ) + except httpx.TimeoutException: + logger.exception("Token introspection request timed out") + raise + except ValueError: + # Re-raise ValueError exceptions to preserve their message + raise + except Exception as e: + logger.exception("Error during token introspection") + raise ValueError("Token introspection error") from e + + async def close(self): + pass + + async def _refresh_jwks(self, config: OAuth2JWKSConfig) -> None: async with self._jwks_lock: - if time.time() - self._jwks_at > self.config.cache_ttl: + if time.time() - self._jwks_at > config.cache_ttl: async with httpx.AsyncClient() as client: - res = await client.get(self.config.jwks_uri, timeout=5) + res = await client.get(config.uri, timeout=5) res.raise_for_status() jwks_data = res.json()["keys"] updated = {} diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py index f15ca9de4..56458c0e7 100644 --- a/tests/unit/server/test_auth.py +++ b/tests/unit/server/test_auth.py @@ -396,8 +396,10 @@ def oauth2_app(): auth_config = AuthProviderConfig( provider_type=AuthProviderType.OAUTH2_TOKEN, config={ - "jwks_uri": "http://mock-authz-service/token/introspect", - "cache_ttl": "3600", + "jwks": { + "uri": "http://mock-authz-service/token/introspect", + "cache_ttl": "3600", + }, "audience": "llama-stack", }, ) @@ -517,3 +519,159 @@ def test_get_attributes_from_claims(): # TODO: add more tests for oauth2 token provider + + +# oauth token introspection tests +@pytest.fixture +def mock_introspection_endpoint(): + return "http://mock-authz-service/token/introspect" + + +@pytest.fixture +def introspection_app(mock_introspection_endpoint): + app = FastAPI() + auth_config = AuthProviderConfig( + provider_type=AuthProviderType.OAUTH2_TOKEN, + config={ + "jwks": None, + "introspection": {"url": mock_introspection_endpoint, "client_id": "myclient", "client_secret": "abcdefg"}, + }, + ) + app.add_middleware(AuthenticationMiddleware, auth_config=auth_config) + + @app.get("/test") + def test_endpoint(): + return {"message": "Authentication successful"} + + return app + + +@pytest.fixture +def introspection_app_with_custom_mapping(mock_introspection_endpoint): + app = FastAPI() + auth_config = AuthProviderConfig( + provider_type=AuthProviderType.OAUTH2_TOKEN, + config={ + "jwks": None, + "introspection": { + "url": mock_introspection_endpoint, + "client_id": "myclient", + "client_secret": "abcdefg", + "send_secret_in_body": "true", + }, + "claims_mapping": { + "sub": "roles", + "scope": "roles", + "groups": "teams", + "aud": "namespaces", + }, + }, + ) + app.add_middleware(AuthenticationMiddleware, auth_config=auth_config) + + @app.get("/test") + def test_endpoint(): + return {"message": "Authentication successful"} + + return app + + +@pytest.fixture +def introspection_client(introspection_app): + return TestClient(introspection_app) + + +@pytest.fixture +def introspection_client_with_custom_mapping(introspection_app_with_custom_mapping): + return TestClient(introspection_app_with_custom_mapping) + + +def test_missing_auth_header_introspection(introspection_client): + response = introspection_client.get("/test") + assert response.status_code == 401 + assert "Missing or invalid Authorization header" in response.json()["error"]["message"] + + +def test_invalid_auth_header_format_introspection(introspection_client): + response = introspection_client.get("/test", headers={"Authorization": "InvalidFormat token123"}) + assert response.status_code == 401 + assert "Missing or invalid Authorization header" in response.json()["error"]["message"] + + +async def mock_introspection_active(*args, **kwargs): + return MockResponse( + 200, + { + "active": True, + "sub": "my-user", + "groups": ["group1", "group2"], + "scope": "foo bar", + "aud": ["set1", "set2"], + }, + ) + + +async def mock_introspection_inactive(*args, **kwargs): + return MockResponse( + 200, + { + "active": False, + }, + ) + + +async def mock_introspection_invalid(*args, **kwargs): + class InvalidResponse: + def __init__(self, status_code): + self.status_code = status_code + + def json(self): + raise ValueError("Not JSON") + + return InvalidResponse(200) + + +async def mock_introspection_failed(*args, **kwargs): + return MockResponse( + 500, + {}, + ) + + +@patch("httpx.AsyncClient.post", new=mock_introspection_active) +def test_valid_introspection_authentication(introspection_client, valid_api_key): + response = introspection_client.get("/test", headers={"Authorization": f"Bearer {valid_api_key}"}) + assert response.status_code == 200 + assert response.json() == {"message": "Authentication successful"} + + +@patch("httpx.AsyncClient.post", new=mock_introspection_inactive) +def test_inactive_introspection_authentication(introspection_client, invalid_api_key): + response = introspection_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"}) + assert response.status_code == 401 + assert "Token not active" in response.json()["error"]["message"] + + +@patch("httpx.AsyncClient.post", new=mock_introspection_invalid) +def test_invalid_introspection_authentication(introspection_client, invalid_api_key): + response = introspection_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"}) + assert response.status_code == 401 + assert "Not JSON" in response.json()["error"]["message"] + + +@patch("httpx.AsyncClient.post", new=mock_introspection_failed) +def test_failed_introspection_authentication(introspection_client, invalid_api_key): + response = introspection_client.get("/test", headers={"Authorization": f"Bearer {invalid_api_key}"}) + assert response.status_code == 401 + assert "Token introspection failed: 500" in response.json()["error"]["message"] + + +@patch("httpx.AsyncClient.post", new=mock_introspection_active) +def test_valid_introspection_with_custom_mapping_authentication( + introspection_client_with_custom_mapping, valid_api_key +): + response = introspection_client_with_custom_mapping.get( + "/test", headers={"Authorization": f"Bearer {valid_api_key}"} + ) + assert response.status_code == 200 + assert response.json() == {"message": "Authentication successful"} From 5a3d777b20ea19870cc4ffec70af31055f1aacbc Mon Sep 17 00:00:00 2001 From: Abhishek koserwal Date: Wed, 21 May 2025 13:55:51 +0530 Subject: [PATCH 2/7] feat: add llama stack rm command (#2127) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] ``` llama stack rm llamastack-test ``` [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) #225 ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) --- docs/source/distributions/building_distro.md | 42 +++++++ llama_stack/cli/stack/list_stacks.py | 56 +++++++++ llama_stack/cli/stack/remove.py | 116 +++++++++++++++++++ llama_stack/cli/stack/stack.py | 5 +- 4 files changed, 218 insertions(+), 1 deletion(-) create mode 100644 llama_stack/cli/stack/list_stacks.py create mode 100644 llama_stack/cli/stack/remove.py diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md index d9b73c910..0dbabf8aa 100644 --- a/docs/source/distributions/building_distro.md +++ b/docs/source/distributions/building_distro.md @@ -338,6 +338,48 @@ INFO: Application startup complete. INFO: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit) INFO: 2401:db00:35c:2d2b:face:0:c9:0:54678 - "GET /models/list HTTP/1.1" 200 OK ``` +### Listing Distributions +Using the list command, you can view all existing Llama Stack distributions, including stacks built from templates, from scratch, or using custom configuration files. + +``` +llama stack list -h +usage: llama stack list [-h] + +list the build stacks + +options: + -h, --help show this help message and exit +``` + +Example Usage + +``` +llama stack list +``` + +### Removing a Distribution +Use the remove command to delete a distribution you've previously built. + +``` +llama stack rm -h +usage: llama stack rm [-h] [--all] [name] + +Remove the build stack + +positional arguments: + name Name of the stack to delete (default: None) + +options: + -h, --help show this help message and exit + --all, -a Delete all stacks (use with caution) (default: False) +``` + +Example +``` +llama stack rm llamastack-test +``` + +To keep your environment organized and avoid clutter, consider using `llama stack list` to review old or unused distributions and `llama stack rm ` to delete them when they’re no longer needed. ### Troubleshooting diff --git a/llama_stack/cli/stack/list_stacks.py b/llama_stack/cli/stack/list_stacks.py new file mode 100644 index 000000000..2ea0fdeea --- /dev/null +++ b/llama_stack/cli/stack/list_stacks.py @@ -0,0 +1,56 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import argparse +from pathlib import Path + +from llama_stack.cli.subcommand import Subcommand +from llama_stack.cli.table import print_table + + +class StackListBuilds(Subcommand): + """List built stacks in .llama/distributions directory""" + + def __init__(self, subparsers: argparse._SubParsersAction): + super().__init__() + self.parser = subparsers.add_parser( + "list", + prog="llama stack list", + description="list the build stacks", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + self._add_arguments() + self.parser.set_defaults(func=self._list_stack_command) + + def _get_distribution_dirs(self) -> dict[str, Path]: + """Return a dictionary of distribution names and their paths""" + distributions = {} + dist_dir = Path.home() / ".llama" / "distributions" + + if dist_dir.exists(): + for stack_dir in dist_dir.iterdir(): + if stack_dir.is_dir(): + distributions[stack_dir.name] = stack_dir + return distributions + + def _list_stack_command(self, args: argparse.Namespace) -> None: + distributions = self._get_distribution_dirs() + + if not distributions: + print("No stacks found in ~/.llama/distributions") + return + + headers = ["Stack Name", "Path"] + headers.extend(["Build Config", "Run Config"]) + rows = [] + for name, path in distributions.items(): + row = [name, str(path)] + # Check for build and run config files + build_config = "Yes" if (path / f"{name}-build.yaml").exists() else "No" + run_config = "Yes" if (path / f"{name}-run.yaml").exists() else "No" + row.extend([build_config, run_config]) + rows.append(row) + print_table(rows, headers, separate_rows=True) diff --git a/llama_stack/cli/stack/remove.py b/llama_stack/cli/stack/remove.py new file mode 100644 index 000000000..be7c49a5d --- /dev/null +++ b/llama_stack/cli/stack/remove.py @@ -0,0 +1,116 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import argparse +import shutil +import sys +from pathlib import Path + +from termcolor import cprint + +from llama_stack.cli.subcommand import Subcommand +from llama_stack.cli.table import print_table + + +class StackRemove(Subcommand): + """Remove the build stack""" + + def __init__(self, subparsers: argparse._SubParsersAction): + super().__init__() + self.parser = subparsers.add_parser( + "rm", + prog="llama stack rm", + description="Remove the build stack", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + self._add_arguments() + self.parser.set_defaults(func=self._remove_stack_build_command) + + def _add_arguments(self) -> None: + self.parser.add_argument( + "name", + type=str, + nargs="?", + help="Name of the stack to delete", + ) + self.parser.add_argument( + "--all", + "-a", + action="store_true", + help="Delete all stacks (use with caution)", + ) + + def _get_distribution_dirs(self) -> dict[str, Path]: + """Return a dictionary of distribution names and their paths""" + distributions = {} + dist_dir = Path.home() / ".llama" / "distributions" + + if dist_dir.exists(): + for stack_dir in dist_dir.iterdir(): + if stack_dir.is_dir(): + distributions[stack_dir.name] = stack_dir + return distributions + + def _list_stacks(self) -> None: + """Display available stacks in a table""" + distributions = self._get_distribution_dirs() + if not distributions: + print("No stacks found in ~/.llama/distributions") + return + + headers = ["Stack Name", "Path"] + rows = [[name, str(path)] for name, path in distributions.items()] + print_table(rows, headers, separate_rows=True) + + def _remove_stack_build_command(self, args: argparse.Namespace) -> None: + distributions = self._get_distribution_dirs() + + if args.all: + confirm = input("Are you sure you want to delete ALL stacks? [yes-i-really-want/N] ").lower() + if confirm != "yes-i-really-want": + print("Deletion cancelled.") + return + + for name, path in distributions.items(): + try: + shutil.rmtree(path) + print(f"Deleted stack: {name}") + except Exception as e: + cprint( + f"Failed to delete stack {name}: {e}", + color="red", + ) + sys.exit(2) + + if not args.name: + self._list_stacks() + if not args.name: + return + + if args.name not in distributions: + self._list_stacks() + cprint( + f"Stack not found: {args.name}", + color="red", + ) + return + + stack_path = distributions[args.name] + + confirm = input(f"Are you sure you want to delete stack '{args.name}'? [y/N] ").lower() + if confirm != "y": + print("Deletion cancelled.") + return + + try: + shutil.rmtree(stack_path) + print(f"Successfully deleted stack: {args.name}") + except Exception as e: + cprint( + f"Failed to delete stack {args.name}: {e}", + color="red", + ) + sys.exit(2) diff --git a/llama_stack/cli/stack/stack.py b/llama_stack/cli/stack/stack.py index ccf1a5ffc..3aff78e23 100644 --- a/llama_stack/cli/stack/stack.py +++ b/llama_stack/cli/stack/stack.py @@ -7,12 +7,14 @@ import argparse from importlib.metadata import version +from llama_stack.cli.stack.list_stacks import StackListBuilds from llama_stack.cli.stack.utils import print_subcommand_description from llama_stack.cli.subcommand import Subcommand from .build import StackBuild from .list_apis import StackListApis from .list_providers import StackListProviders +from .remove import StackRemove from .run import StackRun @@ -41,5 +43,6 @@ class StackParser(Subcommand): StackListApis.create(subparsers) StackListProviders.create(subparsers) StackRun.create(subparsers) - + StackRemove.create(subparsers) + StackListBuilds.create(subparsers) print_subcommand_description(self.parser, subparsers) From 2890243107c74a7a88b82595db49e9540d0a0561 Mon Sep 17 00:00:00 2001 From: liangwen12year <36004580+liangwen12year@users.noreply.github.com> Date: Wed, 21 May 2025 04:58:45 -0400 Subject: [PATCH 3/7] =?UTF-8?q?feat(quota):=20add=20server=E2=80=91side=20?= =?UTF-8?q?per=E2=80=91client=20request=20quotas=20(requires=20auth)=20(#2?= =?UTF-8?q?096)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? feat(quota): add server‑side per‑client request quotas (requires auth) Unrestricted usage can lead to runaway costs and fragmented client-side workarounds. This commit introduces a native quota mechanism to the server, giving operators a unified, centrally managed throttle for per-client requests—without needing extra proxies or custom client logic. This helps contain cloud-compute expenses, enables fine-grained usage control, and simplifies deployment and monitoring of Llama Stack services. Quotas are fully opt-in and have no effect unless explicitly configured. Notice that Quotas are fully opt-in and require authentication to be enabled. The 'sqlite' is the only supported quota `type` at this time, any other `type` will be rejected. And the only supported `period` is 'day'. Highlights: - Adds `QuotaMiddleware` to enforce per-client request quotas: - Uses `Authorization: Bearer ` (from AuthenticationMiddleware) - Tracks usage via a SQLite-based KV store - Returns 429 when the quota is exceeded - Extends `ServerConfig` with a `quota` section (type + config) - Enforces strict coupling: quotas require authentication or the server will fail to start Behavior changes: - Quotas are disabled by default unless explicitly configured - SQLite defaults to `./quotas.db` if no DB path is set - The server requires authentication when quotas are enabled To enable per-client request quotas in `run.yaml`, add: ``` server: port: 8321 auth: provider_type: "custom" config: endpoint: "https://auth.example.com/validate" quota: type: sqlite config: db_path: ./quotas.db limit: max_requests: 1000 period: day [//]: # (If resolving an issue, uncomment and update the line below) Closes #2093 ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: Wen Liang Co-authored-by: Wen Liang --- docs/source/distributions/configuration.md | 74 ++++++++++++ llama_stack/distribution/datatypes.py | 19 ++- llama_stack/distribution/server/auth.py | 4 + llama_stack/distribution/server/quota.py | 110 ++++++++++++++++++ llama_stack/distribution/server/server.py | 30 +++++ tests/unit/server/test_quota.py | 127 +++++++++++++++++++++ 6 files changed, 363 insertions(+), 1 deletion(-) create mode 100644 llama_stack/distribution/server/quota.py create mode 100644 tests/unit/server/test_quota.py diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index b62227a84..7a42f503a 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -208,6 +208,80 @@ And must respond with: If no access attributes are returned, the token is used as a namespace. +### Quota Configuration + +The `quota` section allows you to enable server-side request throttling for both +authenticated and anonymous clients. This is useful for preventing abuse, enforcing +fairness across tenants, and controlling infrastructure costs without requiring +client-side rate limiting or external proxies. + +Quotas are disabled by default. When enabled, each client is tracked using either: + +* Their authenticated `client_id` (derived from the Bearer token), or +* Their IP address (fallback for anonymous requests) + +Quota state is stored in a SQLite-backed key-value store, and rate limits are applied +within a configurable time window (currently only `day` is supported). + +#### Example + +```yaml +server: + quota: + kvstore: + type: sqlite + db_path: ./quotas.db + anonymous_max_requests: 100 + authenticated_max_requests: 1000 + period: day +``` + +#### Configuration Options + +| Field | Description | +| ---------------------------- | -------------------------------------------------------------------------- | +| `kvstore` | Required. Backend storage config for tracking request counts. | +| `kvstore.type` | Must be `"sqlite"` for now. Other backends may be supported in the future. | +| `kvstore.db_path` | File path to the SQLite database. | +| `anonymous_max_requests` | Max requests per period for unauthenticated clients. | +| `authenticated_max_requests` | Max requests per period for authenticated clients. | +| `period` | Time window for quota enforcement. Only `"day"` is supported. | + +> Note: if `authenticated_max_requests` is set but no authentication provider is +configured, the server will fall back to applying `anonymous_max_requests` to all +clients. + +#### Example with Authentication Enabled + +```yaml +server: + port: 8321 + auth: + provider_type: custom + config: + endpoint: https://auth.example.com/validate + quota: + kvstore: + type: sqlite + db_path: ./quotas.db + anonymous_max_requests: 100 + authenticated_max_requests: 1000 + period: day +``` + +If a client exceeds their limit, the server responds with: + +```http +HTTP/1.1 429 Too Many Requests +Content-Type: application/json + +{ + "error": { + "message": "Quota exceeded" + } +} +``` + ## Extending to handle Safety Configuring Safety can be a little involved so it is instructive to go through an example. diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index be5629ba1..ca3664828 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -25,7 +25,7 @@ from llama_stack.apis.tools import Tool, ToolGroup, ToolGroupInput, ToolRuntime from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput from llama_stack.apis.vector_io import VectorIO from llama_stack.providers.datatypes import Api, ProviderSpec -from llama_stack.providers.utils.kvstore.config import KVStoreConfig +from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig LLAMA_STACK_BUILD_CONFIG_VERSION = "2" LLAMA_STACK_RUN_CONFIG_VERSION = "2" @@ -235,6 +235,19 @@ class AuthenticationConfig(BaseModel): ) +class QuotaPeriod(str, Enum): + DAY = "day" + + +class QuotaConfig(BaseModel): + kvstore: SqliteKVStoreConfig = Field(description="Config for KV store backend (SQLite only for now)") + anonymous_max_requests: int = Field(default=100, description="Max requests for unauthenticated clients per period") + authenticated_max_requests: int = Field( + default=1000, description="Max requests for authenticated clients per period" + ) + period: QuotaPeriod = Field(default=QuotaPeriod.DAY, description="Quota period to set") + + class ServerConfig(BaseModel): port: int = Field( default=8321, @@ -262,6 +275,10 @@ class ServerConfig(BaseModel): default=None, description="The host the server should listen on", ) + quota: QuotaConfig | None = Field( + default=None, + description="Per client quota request configuration", + ) class StackRunConfig(BaseModel): diff --git a/llama_stack/distribution/server/auth.py b/llama_stack/distribution/server/auth.py index 83436c51f..67acffe3e 100644 --- a/llama_stack/distribution/server/auth.py +++ b/llama_stack/distribution/server/auth.py @@ -113,6 +113,10 @@ class AuthenticationMiddleware: "roles": [token], } + # Store the client ID in the request scope so that downstream middleware (like QuotaMiddleware) + # can identify the requester and enforce per-client rate limits. + scope["authenticated_client_id"] = token + # Store attributes in request scope scope["user_attributes"] = user_attributes scope["principal"] = validation_result.principal diff --git a/llama_stack/distribution/server/quota.py b/llama_stack/distribution/server/quota.py new file mode 100644 index 000000000..ddbffae64 --- /dev/null +++ b/llama_stack/distribution/server/quota.py @@ -0,0 +1,110 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +import time +from datetime import datetime, timedelta, timezone + +from starlette.types import ASGIApp, Receive, Scope, Send + +from llama_stack.log import get_logger +from llama_stack.providers.utils.kvstore.api import KVStore +from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig +from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl + +logger = get_logger(name=__name__, category="quota") + + +class QuotaMiddleware: + """ + ASGI middleware that enforces separate quotas for authenticated and anonymous clients + within a configurable time window. + + - For authenticated requests, it reads the client ID from the + `Authorization: Bearer ` header. + - For anonymous requests, it falls back to the IP address of the client. + Requests are counted in a KV store (e.g., SQLite), and HTTP 429 is returned + once a client exceeds its quota. + """ + + def __init__( + self, + app: ASGIApp, + kv_config: KVStoreConfig, + anonymous_max_requests: int, + authenticated_max_requests: int, + window_seconds: int = 86400, + ): + self.app = app + self.kv_config = kv_config + self.kv: KVStore | None = None + self.anonymous_max_requests = anonymous_max_requests + self.authenticated_max_requests = authenticated_max_requests + self.window_seconds = window_seconds + + if isinstance(self.kv_config, SqliteKVStoreConfig): + logger.warning( + "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. " + f"window_seconds={self.window_seconds}" + ) + + async def _get_kv(self) -> KVStore: + if self.kv is None: + self.kv = await kvstore_impl(self.kv_config) + return self.kv + + async def __call__(self, scope: Scope, receive: Receive, send: Send): + if scope["type"] == "http": + # pick key & limit based on auth + auth_id = scope.get("authenticated_client_id") + if auth_id: + key_id = auth_id + limit = self.authenticated_max_requests + else: + # fallback to IP + client = scope.get("client") + key_id = client[0] if client else "anonymous" + limit = self.anonymous_max_requests + + current_window = int(time.time() // self.window_seconds) + key = f"quota:{key_id}:{current_window}" + + try: + kv = await self._get_kv() + prev = await kv.get(key) or "0" + count = int(prev) + 1 + + if int(prev) == 0: + # Set with expiration datetime when it is the first request in the window. + expiration = datetime.now(timezone.utc) + timedelta(seconds=self.window_seconds) + await kv.set(key, str(count), expiration=expiration) + else: + await kv.set(key, str(count)) + except Exception: + logger.exception("Failed to access KV store for quota") + return await self._send_error(send, 500, "Quota service error") + + if count > limit: + logger.warning( + "Quota exceeded for client %s: %d/%d", + key_id, + count, + limit, + ) + return await self._send_error(send, 429, "Quota exceeded") + + return await self.app(scope, receive, send) + + async def _send_error(self, send: Send, status: int, message: str): + await send( + { + "type": "http.response.start", + "status": status, + "headers": [[b"content-type", b"application/json"]], + } + ) + body = json.dumps({"error": {"message": message}}).encode() + await send({"type": "http.response.body", "body": body}) diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index e25bf0817..52f2b71b0 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -60,6 +60,7 @@ from llama_stack.providers.utils.telemetry.tracing import ( from .auth import AuthenticationMiddleware from .endpoints import get_all_api_endpoints +from .quota import QuotaMiddleware REPO_ROOT = Path(__file__).parent.parent.parent.parent @@ -434,6 +435,35 @@ def main(args: argparse.Namespace | None = None): if config.server.auth: logger.info(f"Enabling authentication with provider: {config.server.auth.provider_type.value}") app.add_middleware(AuthenticationMiddleware, auth_config=config.server.auth) + else: + if config.server.quota: + quota = config.server.quota + logger.warning( + "Configured authenticated_max_requests (%d) but no auth is enabled; " + "falling back to anonymous_max_requests (%d) for all the requests", + quota.authenticated_max_requests, + quota.anonymous_max_requests, + ) + + if config.server.quota: + logger.info("Enabling quota middleware for authenticated and anonymous clients") + + quota = config.server.quota + anonymous_max_requests = quota.anonymous_max_requests + # if auth is disabled, use the anonymous max requests + authenticated_max_requests = quota.authenticated_max_requests if config.server.auth else anonymous_max_requests + + kv_config = quota.kvstore + window_map = {"day": 86400} + window_seconds = window_map[quota.period.value] + + app.add_middleware( + QuotaMiddleware, + kv_config=kv_config, + anonymous_max_requests=anonymous_max_requests, + authenticated_max_requests=authenticated_max_requests, + window_seconds=window_seconds, + ) try: impls = asyncio.run(construct_stack(config)) diff --git a/tests/unit/server/test_quota.py b/tests/unit/server/test_quota.py new file mode 100644 index 000000000..763bf8e94 --- /dev/null +++ b/tests/unit/server/test_quota.py @@ -0,0 +1,127 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import pytest +from fastapi import FastAPI, Request +from fastapi.testclient import TestClient +from starlette.middleware.base import BaseHTTPMiddleware + +from llama_stack.distribution.datatypes import QuotaConfig, QuotaPeriod +from llama_stack.distribution.server.quota import QuotaMiddleware +from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig + + +class InjectClientIDMiddleware(BaseHTTPMiddleware): + """ + Middleware that injects 'authenticated_client_id' to mimic AuthenticationMiddleware. + """ + + def __init__(self, app, client_id="client1"): + super().__init__(app) + self.client_id = client_id + + async def dispatch(self, request: Request, call_next): + request.scope["authenticated_client_id"] = self.client_id + return await call_next(request) + + +def build_quota_config(db_path) -> QuotaConfig: + return QuotaConfig( + kvstore=SqliteKVStoreConfig(db_path=str(db_path)), + anonymous_max_requests=1, + authenticated_max_requests=2, + period=QuotaPeriod.DAY, + ) + + +@pytest.fixture +def auth_app(tmp_path, request): + """ + FastAPI app with InjectClientIDMiddleware and QuotaMiddleware for authenticated testing. + Each test gets its own DB file. + """ + inner_app = FastAPI() + + @inner_app.get("/test") + async def test_endpoint(): + return {"message": "ok"} + + db_path = tmp_path / f"quota_{request.node.name}.db" + quota = build_quota_config(db_path) + + app = InjectClientIDMiddleware( + QuotaMiddleware( + inner_app, + kv_config=quota.kvstore, + anonymous_max_requests=quota.anonymous_max_requests, + authenticated_max_requests=quota.authenticated_max_requests, + window_seconds=86400, + ), + client_id=f"client_{request.node.name}", + ) + return app + + +def test_authenticated_quota_allows_up_to_limit(auth_app): + client = TestClient(auth_app) + assert client.get("/test").status_code == 200 + assert client.get("/test").status_code == 200 + + +def test_authenticated_quota_blocks_after_limit(auth_app): + client = TestClient(auth_app) + client.get("/test") + client.get("/test") + resp = client.get("/test") + assert resp.status_code == 429 + assert resp.json()["error"]["message"] == "Quota exceeded" + + +def test_anonymous_quota_allows_up_to_limit(tmp_path, request): + inner_app = FastAPI() + + @inner_app.get("/test") + async def test_endpoint(): + return {"message": "ok"} + + db_path = tmp_path / f"quota_anon_{request.node.name}.db" + quota = build_quota_config(db_path) + + app = QuotaMiddleware( + inner_app, + kv_config=quota.kvstore, + anonymous_max_requests=quota.anonymous_max_requests, + authenticated_max_requests=quota.authenticated_max_requests, + window_seconds=86400, + ) + + client = TestClient(app) + assert client.get("/test").status_code == 200 + + +def test_anonymous_quota_blocks_after_limit(tmp_path, request): + inner_app = FastAPI() + + @inner_app.get("/test") + async def test_endpoint(): + return {"message": "ok"} + + db_path = tmp_path / f"quota_anon_{request.node.name}.db" + quota = build_quota_config(db_path) + + app = QuotaMiddleware( + inner_app, + kv_config=quota.kvstore, + anonymous_max_requests=quota.anonymous_max_requests, + authenticated_max_requests=quota.authenticated_max_requests, + window_seconds=86400, + ) + + client = TestClient(app) + client.get("/test") + resp = client.get("/test") + assert resp.status_code == 429 + assert resp.json()["error"]["message"] == "Quota exceeded" From c25acedbcd910c9643269f655b058906ac53a0b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Wed, 21 May 2025 16:23:54 +0200 Subject: [PATCH 4/7] chore: remove k8s auth in favor of k8s jwks endpoint (#2216) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? Kubernetes since 1.20 exposes a JWKS endpoint that we can use with our recent oauth2 recent implementation. The CI test has been kept intact for validation. Signed-off-by: Sébastien Han --- .github/workflows/integration-auth-tests.yml | 39 ++++- docs/source/distributions/configuration.md | 68 ++++++-- llama_stack/distribution/datatypes.py | 4 +- llama_stack/distribution/server/auth.py | 5 +- .../distribution/server/auth_providers.py | 162 +++++------------- pyproject.toml | 1 - requirements.txt | 8 - tests/unit/server/test_auth.py | 121 +------------ uv.lock | 98 +---------- 9 files changed, 147 insertions(+), 359 deletions(-) diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml index 82a76ad32..994bd1dec 100644 --- a/.github/workflows/integration-auth-tests.yml +++ b/.github/workflows/integration-auth-tests.yml @@ -23,7 +23,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - auth-provider: [kubernetes] + auth-provider: [oauth2_token] fail-fast: false # we want to run all tests regardless of failure steps: @@ -47,29 +47,53 @@ jobs: uses: medyagh/setup-minikube@cea33675329b799adccc9526aa5daccc26cd5052 # v0.0.19 - name: Start minikube - if: ${{ matrix.auth-provider == 'kubernetes' }} + if: ${{ matrix.auth-provider == 'oauth2_token' }} run: | minikube start kubectl get pods -A - name: Configure Kube Auth - if: ${{ matrix.auth-provider == 'kubernetes' }} + if: ${{ matrix.auth-provider == 'oauth2_token' }} run: | kubectl create namespace llama-stack kubectl create serviceaccount llama-stack-auth -n llama-stack kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --serviceaccount=llama-stack:llama-stack-auth -n llama-stack kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token + cat <> $GITHUB_ENV + echo "KUBERNETES_API_SERVER_URL=$(kubectl get --raw /.well-known/openid-configuration| jq -r .jwks_uri)" >> $GITHUB_ENV echo "KUBERNETES_CA_CERT_PATH=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}')" >> $GITHUB_ENV + echo "KUBERNETES_ISSUER=$(kubectl get --raw /.well-known/openid-configuration| jq -r .issuer)" >> $GITHUB_ENV + echo "KUBERNETES_AUDIENCE=$(kubectl create token default --duration=1h | cut -d. -f2 | base64 -d | jq -r '.aud[0]')" >> $GITHUB_ENV - name: Set Kube Auth Config and run server env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" - if: ${{ matrix.auth-provider == 'kubernetes' }} + if: ${{ matrix.auth-provider == 'oauth2_token' }} run: | run_dir=$(mktemp -d) cat <<'EOF' > $run_dir/run.yaml @@ -81,7 +105,8 @@ jobs: port: 8321 EOF yq eval '.server.auth = {"provider_type": "${{ matrix.auth-provider }}"}' -i $run_dir/run.yaml - yq eval '.server.auth.config = {"api_server_url": "${{ env.KUBERNETES_API_SERVER_URL }}", "ca_cert_path": "${{ env.KUBERNETES_CA_CERT_PATH }}"}' -i $run_dir/run.yaml + yq eval '.server.auth.config = {"tls_cafile": "${{ env.KUBERNETES_CA_CERT_PATH }}", "issuer": "${{ env.KUBERNETES_ISSUER }}", "audience": "${{ env.KUBERNETES_AUDIENCE }}"}' -i $run_dir/run.yaml + yq eval '.server.auth.config.jwks = {"uri": "${{ env.KUBERNETES_API_SERVER_URL }}"}' -i $run_dir/run.yaml cat $run_dir/run.yaml source .venv/bin/activate diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index 7a42f503a..77b52a621 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -118,11 +118,6 @@ server: port: 8321 # Port to listen on (default: 8321) tls_certfile: "/path/to/cert.pem" # Optional: Path to TLS certificate for HTTPS tls_keyfile: "/path/to/key.pem" # Optional: Path to TLS key for HTTPS - auth: # Optional: Authentication configuration - provider_type: "kubernetes" # Type of auth provider - config: # Provider-specific configuration - api_server_url: "https://kubernetes.default.svc" - ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate ``` ### Authentication Configuration @@ -135,7 +130,7 @@ Authorization: Bearer The server supports multiple authentication providers: -#### Kubernetes Provider +#### OAuth 2.0/OpenID Connect Provider with Kubernetes The Kubernetes cluster must be configured to use a service account for authentication. @@ -146,14 +141,67 @@ kubectl create rolebinding llama-stack-auth-rolebinding --clusterrole=admin --se kubectl create token llama-stack-auth -n llama-stack > llama-stack-auth-token ``` -Validates tokens against the Kubernetes API server: +Make sure the `kube-apiserver` runs with `--anonymous-auth=true` to allow unauthenticated requests +and that the correct RoleBinding is created to allow the service account to access the necessary +resources. If that is not the case, you can create a RoleBinding for the service account to access +the necessary resources: + +```yaml +# allow-anonymous-openid.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: allow-anonymous-openid +rules: +- nonResourceURLs: ["/openid/v1/jwks"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: allow-anonymous-openid +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: allow-anonymous-openid +subjects: +- kind: User + name: system:anonymous + apiGroup: rbac.authorization.k8s.io +``` + +And then apply the configuration: +```bash +kubectl apply -f allow-anonymous-openid.yaml +``` + +Validates tokens against the Kubernetes API server through the OIDC provider: ```yaml server: auth: - provider_type: "kubernetes" + provider_type: "oauth2_token" config: - api_server_url: "https://kubernetes.default.svc" # URL of the Kubernetes API server - ca_cert_path: "/path/to/ca.crt" # Optional: Path to CA certificate + jwks: + uri: "https://kubernetes.default.svc" + cache_ttl: 3600 + tls_cafile: "/path/to/ca.crt" + issuer: "https://kubernetes.default.svc" + audience: "https://kubernetes.default.svc" +``` + +To find your cluster's audience, run: +```bash +kubectl create token default --duration=1h | cut -d. -f2 | base64 -d | jq .aud +``` + +For the issuer, you can use the OIDC provider's URL: +```bash +kubectl get --raw /.well-known/openid-configuration| jq .issuer +``` + +For the tls_cafile, you can use the CA certificate of the OIDC provider: +```bash +kubectl config view --minify -o jsonpath='{.clusters[0].cluster.certificate-authority}' ``` The provider extracts user information from the JWT token: diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index ca3664828..eb790ad93 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -220,14 +220,14 @@ class LoggingConfig(BaseModel): class AuthProviderType(str, Enum): """Supported authentication provider types.""" - KUBERNETES = "kubernetes" + OAUTH2_TOKEN = "oauth2_token" CUSTOM = "custom" class AuthenticationConfig(BaseModel): provider_type: AuthProviderType = Field( ..., - description="Type of authentication provider (e.g., 'kubernetes', 'custom')", + description="Type of authentication provider", ) config: dict[str, Any] = Field( ..., diff --git a/llama_stack/distribution/server/auth.py b/llama_stack/distribution/server/auth.py index 67acffe3e..fb26b49a7 100644 --- a/llama_stack/distribution/server/auth.py +++ b/llama_stack/distribution/server/auth.py @@ -8,7 +8,8 @@ import json import httpx -from llama_stack.distribution.server.auth_providers import AuthProviderConfig, create_auth_provider +from llama_stack.distribution.datatypes import AuthenticationConfig +from llama_stack.distribution.server.auth_providers import create_auth_provider from llama_stack.log import get_logger logger = get_logger(name=__name__, category="auth") @@ -77,7 +78,7 @@ class AuthenticationMiddleware: access resources that don't have access_attributes defined. """ - def __init__(self, app, auth_config: AuthProviderConfig): + def __init__(self, app, auth_config: AuthenticationConfig): self.app = app self.auth_provider = create_auth_provider(auth_config) diff --git a/llama_stack/distribution/server/auth_providers.py b/llama_stack/distribution/server/auth_providers.py index baab75eca..39f258c3b 100644 --- a/llama_stack/distribution/server/auth_providers.py +++ b/llama_stack/distribution/server/auth_providers.py @@ -4,13 +4,11 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import json import ssl import time from abc import ABC, abstractmethod from asyncio import Lock -from enum import Enum -from typing import Any +from pathlib import Path from urllib.parse import parse_qs import httpx @@ -18,7 +16,7 @@ from jose import jwt from pydantic import BaseModel, Field, field_validator, model_validator from typing_extensions import Self -from llama_stack.distribution.datatypes import AccessAttributes +from llama_stack.distribution.datatypes import AccessAttributes, AuthenticationConfig, AuthProviderType from llama_stack.log import get_logger logger = get_logger(name=__name__, category="auth") @@ -76,21 +74,6 @@ class AuthRequest(BaseModel): request: AuthRequestContext = Field(description="Context information about the request being authenticated") -class AuthProviderType(str, Enum): - """Supported authentication provider types.""" - - KUBERNETES = "kubernetes" - CUSTOM = "custom" - OAUTH2_TOKEN = "oauth2_token" - - -class AuthProviderConfig(BaseModel): - """Base configuration for authentication providers.""" - - provider_type: AuthProviderType = Field(..., description="Type of authentication provider") - config: dict[str, Any] = Field(..., description="Provider-specific configuration") - - class AuthProvider(ABC): """Abstract base class for authentication providers.""" @@ -105,83 +88,6 @@ class AuthProvider(ABC): pass -class KubernetesAuthProviderConfig(BaseModel): - api_server_url: str - ca_cert_path: str | None = None - - -class KubernetesAuthProvider(AuthProvider): - """Kubernetes authentication provider that validates tokens against the Kubernetes API server.""" - - def __init__(self, config: KubernetesAuthProviderConfig): - self.config = config - self._client = None - - async def _get_client(self): - """Get or create a Kubernetes client.""" - if self._client is None: - # kubernetes-client has not async support, see: - # https://github.com/kubernetes-client/python/issues/323 - from kubernetes import client - from kubernetes.client import ApiClient - - # Configure the client - configuration = client.Configuration() - configuration.host = self.config.api_server_url - if self.config.ca_cert_path: - configuration.ssl_ca_cert = self.config.ca_cert_path - configuration.verify_ssl = bool(self.config.ca_cert_path) - - # Create API client - self._client = ApiClient(configuration) - return self._client - - async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult: - """Validate a Kubernetes token and return access attributes.""" - try: - client = await self._get_client() - - # Set the token in the client - client.set_default_header("Authorization", f"Bearer {token}") - - # Make a request to validate the token - # We use the /api endpoint which requires authentication - from kubernetes.client import CoreV1Api - - api = CoreV1Api(client) - api.get_api_resources(_request_timeout=3.0) # Set timeout for this specific request - - # If we get here, the token is valid - # Extract user info from the token claims - import base64 - - # Decode the token (without verification since we've already validated it) - token_parts = token.split(".") - payload = json.loads(base64.b64decode(token_parts[1] + "=" * (-len(token_parts[1]) % 4))) - - # Extract user information from the token - username = payload.get("sub", "") - groups = payload.get("groups", []) - - return TokenValidationResult( - principal=username, - access_attributes=AccessAttributes( - roles=[username], # Use username as a role - teams=groups, # Use Kubernetes groups as teams - ), - ) - - except Exception as e: - logger.exception("Failed to validate Kubernetes token") - raise ValueError("Invalid or expired token") from e - - async def close(self): - """Close the HTTP client.""" - if self._client: - self._client.close() - self._client = None - - def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) -> AccessAttributes: attributes = AccessAttributes() for claim_key, attribute_key in mapping.items(): @@ -212,11 +118,13 @@ class OAuth2IntrospectionConfig(BaseModel): client_id: str client_secret: str send_secret_in_body: bool = False - tls_cafile: str | None = None class OAuth2TokenAuthProviderConfig(BaseModel): audience: str = "llama-stack" + verify_tls: bool = True + tls_cafile: Path | None = None + issuer: str | None = Field(default=None, description="The OIDC issuer URL.") claims_mapping: dict[str, str] = Field( default_factory=lambda: { "sub": "roles", @@ -265,16 +173,14 @@ class OAuth2TokenAuthProvider(AuthProvider): async def validate_token(self, token: str, scope: dict | None = None) -> TokenValidationResult: if self.config.jwks: - return await self.validate_jwt_token(token, self.config.jwks, scope) + return await self.validate_jwt_token(token, scope) if self.config.introspection: - return await self.introspect_token(token, self.config.introspection, scope) + return await self.introspect_token(token, scope) raise ValueError("One of jwks or introspection must be configured") - async def validate_jwt_token( - self, token: str, config: OAuth2JWKSConfig, scope: dict | None = None - ) -> TokenValidationResult: + async def validate_jwt_token(self, token: str, scope: dict | None = None) -> TokenValidationResult: """Validate a token using the JWT token.""" - await self._refresh_jwks(config) + await self._refresh_jwks() try: header = jwt.get_unverified_header(token) @@ -288,7 +194,7 @@ class OAuth2TokenAuthProvider(AuthProvider): key_data, algorithms=[algorithm], audience=self.config.audience, - options={"verify_exp": True}, + issuer=self.config.issuer, ) except Exception as exc: raise ValueError(f"Invalid JWT token: {token}") from exc @@ -302,26 +208,27 @@ class OAuth2TokenAuthProvider(AuthProvider): access_attributes=access_attributes, ) - async def introspect_token( - self, token: str, config: OAuth2IntrospectionConfig, scope: dict | None = None - ) -> TokenValidationResult: + async def introspect_token(self, token: str, scope: dict | None = None) -> TokenValidationResult: """Validate a token using token introspection as defined by RFC 7662.""" form = { "token": token, } - if config.send_secret_in_body: - form["client_id"] = config.client_id - form["client_secret"] = config.client_secret + if self.config.introspection is None: + raise ValueError("Introspection is not configured") + + if self.config.introspection.send_secret_in_body: + form["client_id"] = self.config.introspection.client_id + form["client_secret"] = self.config.introspection.client_secret auth = None else: - auth = (config.client_id, config.client_secret) + auth = (self.config.introspection.client_id, self.config.introspection.client_secret) ssl_ctxt = None - if config.tls_cafile: - ssl_ctxt = ssl.create_default_context(cafile=config.tls_cafile) + if self.config.tls_cafile: + ssl_ctxt = ssl.create_default_context(cafile=self.config.tls_cafile.as_posix()) try: async with httpx.AsyncClient(verify=ssl_ctxt) as client: response = await client.post( - config.url, + self.config.introspection.url, data=form, auth=auth, timeout=10.0, # Add a reasonable timeout @@ -352,11 +259,24 @@ class OAuth2TokenAuthProvider(AuthProvider): async def close(self): pass - async def _refresh_jwks(self, config: OAuth2JWKSConfig) -> None: + async def _refresh_jwks(self) -> None: + """ + Refresh the JWKS cache. + + This is a simple cache that expires after a certain amount of time (defined by `cache_ttl`). + If the cache is expired, we refresh the JWKS from the JWKS URI. + + Notes: for Kubernetes which doesn't fully implement the OIDC protocol: + * It doesn't have user authentication flows + * It doesn't have refresh tokens + """ async with self._jwks_lock: - if time.time() - self._jwks_at > config.cache_ttl: - async with httpx.AsyncClient() as client: - res = await client.get(config.uri, timeout=5) + if self.config.jwks is None: + raise ValueError("JWKS is not configured") + if time.time() - self._jwks_at > self.config.jwks.cache_ttl: + verify = self.config.tls_cafile.as_posix() if self.config.tls_cafile else self.config.verify_tls + async with httpx.AsyncClient(verify=verify) as client: + res = await client.get(self.config.jwks.uri, timeout=5) res.raise_for_status() jwks_data = res.json()["keys"] updated = {} @@ -443,13 +363,11 @@ class CustomAuthProvider(AuthProvider): self._client = None -def create_auth_provider(config: AuthProviderConfig) -> AuthProvider: +def create_auth_provider(config: AuthenticationConfig) -> AuthProvider: """Factory function to create the appropriate auth provider.""" provider_type = config.provider_type.lower() - if provider_type == "kubernetes": - return KubernetesAuthProvider(KubernetesAuthProviderConfig.model_validate(config.config)) - elif provider_type == "custom": + if provider_type == "custom": return CustomAuthProvider(CustomAuthProviderConfig.model_validate(config.config)) elif provider_type == "oauth2_token": return OAuth2TokenAuthProvider(OAuth2TokenAuthProviderConfig.model_validate(config.config)) diff --git a/pyproject.toml b/pyproject.toml index a41830e64..8b922bafb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,6 @@ dependencies = [ "tiktoken", "pillow", "h11>=0.16.0", - "kubernetes", ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index 6dfcc1024..2fe72c803 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,19 +4,16 @@ annotated-types==0.7.0 anyio==4.8.0 attrs==25.1.0 blobfile==3.0.0 -cachetools==5.5.2 certifi==2025.1.31 charset-normalizer==3.4.1 click==8.1.8 colorama==0.4.6 ; sys_platform == 'win32' distro==1.9.0 -durationpy==0.9 ecdsa==0.19.1 exceptiongroup==1.2.2 ; python_full_version < '3.11' filelock==3.17.0 fire==0.7.0 fsspec==2024.12.0 -google-auth==2.38.0 h11==0.16.0 httpcore==1.0.9 httpx==0.28.1 @@ -26,14 +23,12 @@ jinja2==3.1.6 jiter==0.8.2 jsonschema==4.23.0 jsonschema-specifications==2024.10.1 -kubernetes==32.0.1 llama-stack-client==0.2.7 lxml==5.3.1 markdown-it-py==3.0.0 markupsafe==3.0.2 mdurl==0.1.2 numpy==2.2.3 -oauthlib==3.2.2 openai==1.71.0 packaging==24.2 pandas==2.2.3 @@ -41,7 +36,6 @@ pillow==11.1.0 prompt-toolkit==3.0.50 pyaml==25.1.0 pyasn1==0.4.8 -pyasn1-modules==0.4.1 pycryptodomex==3.21.0 pydantic==2.10.6 pydantic-core==2.27.2 @@ -54,7 +48,6 @@ pyyaml==6.0.2 referencing==0.36.2 regex==2024.11.6 requests==2.32.3 -requests-oauthlib==2.0.0 rich==13.9.4 rpds-py==0.22.3 rsa==4.9 @@ -68,4 +61,3 @@ typing-extensions==4.12.2 tzdata==2025.1 urllib3==2.3.0 wcwidth==0.2.13 -websocket-client==1.8.0 diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py index 56458c0e7..94c486f18 100644 --- a/tests/unit/server/test_auth.py +++ b/tests/unit/server/test_auth.py @@ -11,12 +11,10 @@ import pytest from fastapi import FastAPI from fastapi.testclient import TestClient -from llama_stack.distribution.datatypes import AccessAttributes +from llama_stack.distribution.datatypes import AuthenticationConfig from llama_stack.distribution.server.auth import AuthenticationMiddleware from llama_stack.distribution.server.auth_providers import ( - AuthProviderConfig, AuthProviderType, - TokenValidationResult, get_attributes_from_claims, ) @@ -62,7 +60,7 @@ def invalid_token(): @pytest.fixture def http_app(mock_auth_endpoint): app = FastAPI() - auth_config = AuthProviderConfig( + auth_config = AuthenticationConfig( provider_type=AuthProviderType.CUSTOM, config={"endpoint": mock_auth_endpoint}, ) @@ -78,7 +76,7 @@ def http_app(mock_auth_endpoint): @pytest.fixture def k8s_app(): app = FastAPI() - auth_config = AuthProviderConfig( + auth_config = AuthenticationConfig( provider_type=AuthProviderType.KUBERNETES, config={"api_server_url": "https://kubernetes.default.svc"}, ) @@ -118,7 +116,7 @@ def mock_scope(): @pytest.fixture def mock_http_middleware(mock_auth_endpoint): mock_app = AsyncMock() - auth_config = AuthProviderConfig( + auth_config = AuthenticationConfig( provider_type=AuthProviderType.CUSTOM, config={"endpoint": mock_auth_endpoint}, ) @@ -128,7 +126,7 @@ def mock_http_middleware(mock_auth_endpoint): @pytest.fixture def mock_k8s_middleware(): mock_app = AsyncMock() - auth_config = AuthProviderConfig( + auth_config = AuthenticationConfig( provider_type=AuthProviderType.KUBERNETES, config={"api_server_url": "https://kubernetes.default.svc"}, ) @@ -284,116 +282,13 @@ async def test_http_middleware_no_attributes(mock_http_middleware, mock_scope): assert attributes["roles"] == ["test.jwt.token"] -# Kubernetes Tests -def test_missing_auth_header_k8s(k8s_client): - response = k8s_client.get("/test") - assert response.status_code == 401 - assert "Missing or invalid Authorization header" in response.json()["error"]["message"] - - -def test_invalid_auth_header_format_k8s(k8s_client): - response = k8s_client.get("/test", headers={"Authorization": "InvalidFormat token123"}) - assert response.status_code == 401 - assert "Missing or invalid Authorization header" in response.json()["error"]["message"] - - -@patch("kubernetes.client.ApiClient") -def test_valid_k8s_authentication(mock_api_client, k8s_client, valid_token): - # Mock the Kubernetes client - mock_client = AsyncMock() - mock_api_client.return_value = mock_client - - # Mock successful token validation - mock_client.set_default_header = AsyncMock() - - # Mock the token validation to return valid access attributes - with patch("llama_stack.distribution.server.auth_providers.KubernetesAuthProvider.validate_token") as mock_validate: - mock_validate.return_value = TokenValidationResult( - principal="test-principal", - access_attributes=AccessAttributes( - roles=["admin"], teams=["ml-team"], projects=["llama-3"], namespaces=["research"] - ), - ) - response = k8s_client.get("/test", headers={"Authorization": f"Bearer {valid_token}"}) - assert response.status_code == 200 - assert response.json() == {"message": "Authentication successful"} - - -@patch("kubernetes.client.ApiClient") -def test_invalid_k8s_authentication(mock_api_client, k8s_client, invalid_token): - # Mock the Kubernetes client - mock_client = AsyncMock() - mock_api_client.return_value = mock_client - - # Mock failed token validation by raising an exception - with patch("llama_stack.distribution.server.auth_providers.KubernetesAuthProvider.validate_token") as mock_validate: - mock_validate.side_effect = ValueError("Invalid or expired token") - response = k8s_client.get("/test", headers={"Authorization": f"Bearer {invalid_token}"}) - assert response.status_code == 401 - assert "Invalid or expired token" in response.json()["error"]["message"] - - -@pytest.mark.asyncio -async def test_k8s_middleware_with_access_attributes(mock_k8s_middleware, mock_scope): - middleware, mock_app = mock_k8s_middleware - mock_receive = AsyncMock() - mock_send = AsyncMock() - - with patch("kubernetes.client.ApiClient") as mock_api_client: - mock_client = AsyncMock() - mock_api_client.return_value = mock_client - - # Mock successful token validation - mock_client.set_default_header = AsyncMock() - - # Mock token payload with access attributes - mock_token_parts = ["header", "eyJzdWIiOiJhZG1pbiIsImdyb3VwcyI6WyJtbC10ZWFtIl19", "signature"] - mock_scope["headers"][1] = (b"authorization", f"Bearer {'.'.join(mock_token_parts)}".encode()) - - await middleware(mock_scope, mock_receive, mock_send) - - assert "user_attributes" in mock_scope - assert mock_scope["user_attributes"]["roles"] == ["admin"] - assert mock_scope["user_attributes"]["teams"] == ["ml-team"] - - mock_app.assert_called_once_with(mock_scope, mock_receive, mock_send) - - -@pytest.mark.asyncio -async def test_k8s_middleware_no_attributes(mock_k8s_middleware, mock_scope): - """Test middleware behavior with no access attributes""" - middleware, mock_app = mock_k8s_middleware - mock_receive = AsyncMock() - mock_send = AsyncMock() - - with patch("kubernetes.client.ApiClient") as mock_api_client: - mock_client = AsyncMock() - mock_api_client.return_value = mock_client - - # Mock successful token validation - mock_client.set_default_header = AsyncMock() - - # Mock token payload without access attributes - mock_token_parts = ["header", "eyJzdWIiOiJhZG1pbiJ9", "signature"] - mock_scope["headers"][1] = (b"authorization", f"Bearer {'.'.join(mock_token_parts)}".encode()) - - await middleware(mock_scope, mock_receive, mock_send) - - assert "user_attributes" in mock_scope - attributes = mock_scope["user_attributes"] - assert "roles" in attributes - assert attributes["roles"] == ["admin"] - - mock_app.assert_called_once_with(mock_scope, mock_receive, mock_send) - - # oauth2 token provider tests @pytest.fixture def oauth2_app(): app = FastAPI() - auth_config = AuthProviderConfig( + auth_config = AuthenticationConfig( provider_type=AuthProviderType.OAUTH2_TOKEN, config={ "jwks": { @@ -530,7 +425,7 @@ def mock_introspection_endpoint(): @pytest.fixture def introspection_app(mock_introspection_endpoint): app = FastAPI() - auth_config = AuthProviderConfig( + auth_config = AuthenticationConfig( provider_type=AuthProviderType.OAUTH2_TOKEN, config={ "jwks": None, @@ -549,7 +444,7 @@ def introspection_app(mock_introspection_endpoint): @pytest.fixture def introspection_app_with_custom_mapping(mock_introspection_endpoint): app = FastAPI() - auth_config = AuthProviderConfig( + auth_config = AuthenticationConfig( provider_type=AuthProviderType.OAUTH2_TOKEN, config={ "jwks": None, diff --git a/uv.lock b/uv.lock index c30e2c4c1..a987ddc9e 100644 --- a/uv.lock +++ b/uv.lock @@ -676,15 +676,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, ] -[[package]] -name = "durationpy" -version = "0.9" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/31/e9/f49c4e7fccb77fa5c43c2480e09a857a78b41e7331a75e128ed5df45c56b/durationpy-0.9.tar.gz", hash = "sha256:fd3feb0a69a0057d582ef643c355c40d2fa1c942191f914d12203b1a01ac722a", size = 3186 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4c/a3/ac312faeceffd2d8f86bc6dcb5c401188ba5a01bc88e69bed97578a0dfcd/durationpy-0.9-py3-none-any.whl", hash = "sha256:e65359a7af5cedad07fb77a2dd3f390f8eb0b74cb845589fa6c057086834dd38", size = 3461 }, -] - [[package]] name = "ecdsa" version = "0.19.1" @@ -863,20 +854,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599 }, ] -[[package]] -name = "google-auth" -version = "2.38.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cachetools" }, - { name = "pyasn1-modules" }, - { name = "rsa" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/c6/eb/d504ba1daf190af6b204a9d4714d457462b486043744901a6eeea711f913/google_auth-2.38.0.tar.gz", hash = "sha256:8285113607d3b80a3f1543b75962447ba8a09fe85783432a784fdeef6ac094c4", size = 270866 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9d/47/603554949a37bca5b7f894d51896a9c534b9eab808e2520a748e081669d0/google_auth-2.38.0-py2.py3-none-any.whl", hash = "sha256:e7dae6694313f434a2727bf2906f27ad259bae090d7aa896590d86feec3d9d4a", size = 210770 }, -] - [[package]] name = "googleapis-common-protos" version = "1.67.0" @@ -1324,28 +1301,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c9/fb/108ecd1fe961941959ad0ee4e12ee7b8b1477247f30b1fdfd83ceaf017f0/jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409", size = 28965 }, ] -[[package]] -name = "kubernetes" -version = "32.0.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "certifi" }, - { name = "durationpy" }, - { name = "google-auth" }, - { name = "oauthlib" }, - { name = "python-dateutil" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "requests-oauthlib" }, - { name = "six" }, - { name = "urllib3" }, - { name = "websocket-client" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/b7/e8/0598f0e8b4af37cd9b10d8b87386cf3173cb8045d834ab5f6ec347a758b3/kubernetes-32.0.1.tar.gz", hash = "sha256:42f43d49abd437ada79a79a16bd48a604d3471a117a8347e87db693f2ba0ba28", size = 946691 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/08/10/9f8af3e6f569685ce3af7faab51c8dd9d93b9c38eba339ca31c746119447/kubernetes-32.0.1-py2.py3-none-any.whl", hash = "sha256:35282ab8493b938b08ab5526c7ce66588232df00ef5e1dbe88a419107dc10998", size = 1988070 }, -] - [[package]] name = "levenshtein" version = "0.27.1" @@ -1441,7 +1396,6 @@ dependencies = [ { name = "huggingface-hub" }, { name = "jinja2" }, { name = "jsonschema" }, - { name = "kubernetes" }, { name = "llama-stack-client" }, { name = "openai" }, { name = "pillow" }, @@ -1546,7 +1500,6 @@ requires-dist = [ { name = "jinja2", specifier = ">=3.1.6" }, { name = "jinja2", marker = "extra == 'codegen'", specifier = ">=3.1.6" }, { name = "jsonschema" }, - { name = "kubernetes" }, { name = "llama-stack-client", specifier = ">=0.2.7" }, { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.7" }, { name = "mcp", marker = "extra == 'test'" }, @@ -1624,9 +1577,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/cd/6b/31c07396c5b3010668e4eb38061a96ffacb47ec4b14d8aeb64c13856c485/llama_stack_client-0.2.7.tar.gz", hash = "sha256:11aee11fdd5e0e8caad07c0cce9c4d88640938844372e7e3453a91ea0757fcb3", size = 259273, upload-time = "2025-05-16T20:31:39.221Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/6b/31c07396c5b3010668e4eb38061a96ffacb47ec4b14d8aeb64c13856c485/llama_stack_client-0.2.7.tar.gz", hash = "sha256:11aee11fdd5e0e8caad07c0cce9c4d88640938844372e7e3453a91ea0757fcb3", size = 259273 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ac/69/6a5f4683afe355500df4376fdcbfb2fc1e6a0c3bcea5ff8f6114773a9acf/llama_stack_client-0.2.7-py3-none-any.whl", hash = "sha256:78b3f2abdb1770c7b1270a9c0ef58402a988401c564d2e6c83588779ac6fc38d", size = 292727, upload-time = "2025-05-16T20:31:37.587Z" }, + { url = "https://files.pythonhosted.org/packages/ac/69/6a5f4683afe355500df4376fdcbfb2fc1e6a0c3bcea5ff8f6114773a9acf/llama_stack_client-0.2.7-py3-none-any.whl", hash = "sha256:78b3f2abdb1770c7b1270a9c0ef58402a988401c564d2e6c83588779ac6fc38d", size = 292727 }, ] [[package]] @@ -2087,15 +2040,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/17/7f/d322a4125405920401450118dbdc52e0384026bd669939484670ce8b2ab9/numpy-2.2.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:783145835458e60fa97afac25d511d00a1eca94d4a8f3ace9fe2043003c678e4", size = 12839607 }, ] -[[package]] -name = "oauthlib" -version = "3.2.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6d/fa/fbf4001037904031639e6bfbfc02badfc7e12f137a8afa254df6c4c8a670/oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918", size = 177352 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/80/cab10959dc1faead58dc8384a781dfbf93cb4d33d50988f7a69f1b7c9bbe/oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", size = 151688 }, -] - [[package]] name = "openai" version = "1.71.0" @@ -2608,18 +2552,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/1e/a94a8d635fa3ce4cfc7f506003548d0a2447ae76fd5ca53932970fe3053f/pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d", size = 77145 }, ] -[[package]] -name = "pyasn1-modules" -version = "0.4.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pyasn1" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/1d/67/6afbf0d507f73c32d21084a79946bfcfca5fbc62a72057e9c23797a737c9/pyasn1_modules-0.4.1.tar.gz", hash = "sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c", size = 310028 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/77/89/bc88a6711935ba795a679ea6ebee07e128050d6382eaa35a0a47c8032bdc/pyasn1_modules-0.4.1-py3-none-any.whl", hash = "sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd", size = 181537 }, -] - [[package]] name = "pycparser" version = "2.22" @@ -2875,9 +2807,9 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/ac/82/4c9ecabab13363e72d880f2fb504c5f750433b2b6f16e99f4ec21ada284c/pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a", size = 17973, upload-time = "2025-05-05T19:44:34.99Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ac/82/4c9ecabab13363e72d880f2fb504c5f750433b2b6f16e99f4ec21ada284c/pytest_timeout-2.4.0.tar.gz", hash = "sha256:7e68e90b01f9eff71332b25001f85c75495fc4e3a836701876183c4bcfd0540a", size = 17973 } wheels = [ - { url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382, upload-time = "2025-05-05T19:44:33.502Z" }, + { url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382 }, ] [[package]] @@ -3256,19 +3188,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 }, ] -[[package]] -name = "requests-oauthlib" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "oauthlib" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179 }, -] - [[package]] name = "rich" version = "13.9.4" @@ -4323,15 +4242,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 }, ] -[[package]] -name = "websocket-client" -version = "1.8.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf5948ed3f4681f7da2f9f64512e1d303f94b4cc174c24a5/websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da", size = 54648 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826 }, -] - [[package]] name = "websockets" version = "15.0" From 1862de4be51fa3697d54525c65aebe9edc6c8514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Wed, 21 May 2025 17:30:23 +0200 Subject: [PATCH 5/7] chore: clarify cache_ttl to be key_recheck_period (#2220) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? The cache_ttl config value is not in fact tied to the lifetime of any of the keys, it represents the time interval between for our key cache refresher. Signed-off-by: Sébastien Han --- docs/source/distributions/configuration.md | 2 +- llama_stack/distribution/server/auth_providers.py | 6 +++--- tests/unit/server/test_auth.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/distributions/configuration.md b/docs/source/distributions/configuration.md index 77b52a621..de99b6576 100644 --- a/docs/source/distributions/configuration.md +++ b/docs/source/distributions/configuration.md @@ -183,7 +183,7 @@ server: config: jwks: uri: "https://kubernetes.default.svc" - cache_ttl: 3600 + key_recheck_period: 3600 tls_cafile: "/path/to/ca.crt" issuer: "https://kubernetes.default.svc" audience: "https://kubernetes.default.svc" diff --git a/llama_stack/distribution/server/auth_providers.py b/llama_stack/distribution/server/auth_providers.py index 39f258c3b..723a65b77 100644 --- a/llama_stack/distribution/server/auth_providers.py +++ b/llama_stack/distribution/server/auth_providers.py @@ -110,7 +110,7 @@ def get_attributes_from_claims(claims: dict[str, str], mapping: dict[str, str]) class OAuth2JWKSConfig(BaseModel): # The JWKS URI for collecting public keys uri: str - cache_ttl: int = 3600 + key_recheck_period: int = Field(default=3600, description="The period to recheck the JWKS URI for key updates") class OAuth2IntrospectionConfig(BaseModel): @@ -263,7 +263,7 @@ class OAuth2TokenAuthProvider(AuthProvider): """ Refresh the JWKS cache. - This is a simple cache that expires after a certain amount of time (defined by `cache_ttl`). + This is a simple cache that expires after a certain amount of time (defined by `key_recheck_period`). If the cache is expired, we refresh the JWKS from the JWKS URI. Notes: for Kubernetes which doesn't fully implement the OIDC protocol: @@ -273,7 +273,7 @@ class OAuth2TokenAuthProvider(AuthProvider): async with self._jwks_lock: if self.config.jwks is None: raise ValueError("JWKS is not configured") - if time.time() - self._jwks_at > self.config.jwks.cache_ttl: + if time.time() - self._jwks_at > self.config.jwks.key_recheck_period: verify = self.config.tls_cafile.as_posix() if self.config.tls_cafile else self.config.verify_tls async with httpx.AsyncClient(verify=verify) as client: res = await client.get(self.config.jwks.uri, timeout=5) diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py index 94c486f18..408acb88a 100644 --- a/tests/unit/server/test_auth.py +++ b/tests/unit/server/test_auth.py @@ -293,7 +293,7 @@ def oauth2_app(): config={ "jwks": { "uri": "http://mock-authz-service/token/introspect", - "cache_ttl": "3600", + "key_recheck_period": "3600", }, "audience": "llama-stack", }, From 6a62e783b905e57c15be351ade856c33752c0dd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Wed, 21 May 2025 17:31:14 +0200 Subject: [PATCH 6/7] chore: refactor workflow writting (#2225) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? Use a composite action to avoid similar steps repetitions and centralization of the defaults. Signed-off-by: Sébastien Han --- .github/actions/setup-runner/action.yml | 22 ++++++ .github/workflows/integration-auth-tests.yml | 12 +--- .github/workflows/integration-tests.yml | 18 ++--- .github/workflows/providers-build.yml | 69 +++---------------- .github/workflows/test-external-providers.yml | 12 +--- .github/workflows/unit-tests.yml | 14 ++-- .github/workflows/update-readthedocs.yml | 12 +--- 7 files changed, 45 insertions(+), 114 deletions(-) create mode 100644 .github/actions/setup-runner/action.yml diff --git a/.github/actions/setup-runner/action.yml b/.github/actions/setup-runner/action.yml new file mode 100644 index 000000000..972dcbdae --- /dev/null +++ b/.github/actions/setup-runner/action.yml @@ -0,0 +1,22 @@ +name: Setup runner +description: Prepare a runner for the tests (install uv, python, project dependencies, etc.) +runs: + using: "composite" + steps: + - name: Install uv + uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 + with: + python-version: "3.10" + activate-environment: true + version: 0.7.6 + + - name: Install dependencies + shell: bash + run: | + uv sync --all-extras + uv pip install ollama faiss-cpu + # always test against the latest version of the client + # TODO: this is not necessarily a good idea. we need to test against both published and latest + # to find out backwards compatibility issues. + uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main + uv pip install -e . diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml index 994bd1dec..25f696c9e 100644 --- a/.github/workflows/integration-auth-tests.yml +++ b/.github/workflows/integration-auth-tests.yml @@ -30,16 +30,11 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - activate-environment: true + - name: Install dependencies + uses: ./.github/actions/setup-runner - - name: Set Up Environment and Install Dependencies + - name: Build Llama Stack run: | - uv sync --extra dev --extra test - uv pip install -e . llama stack build --template ollama --image-type venv - name: Install minikube @@ -109,7 +104,6 @@ jobs: yq eval '.server.auth.config.jwks = {"uri": "${{ env.KUBERNETES_API_SERVER_URL }}"}' -i $run_dir/run.yaml cat $run_dir/run.yaml - source .venv/bin/activate nohup uv run llama stack run $run_dir/run.yaml --image-type venv > server.log 2>&1 & - name: Wait for Llama Stack server to be ready diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index da41e2185..2414522a7 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -32,24 +32,14 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - activate-environment: true + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Setup ollama uses: ./.github/actions/setup-ollama - - name: Set Up Environment and Install Dependencies + - name: Build Llama Stack run: | - uv sync --extra dev --extra test - uv pip install ollama faiss-cpu - # always test against the latest version of the client - # TODO: this is not necessarily a good idea. we need to test against both published and latest - # to find out backwards compatibility issues. - uv pip install git+https://github.com/meta-llama/llama-stack-client-python.git@main - uv pip install -e . llama stack build --template ollama --image-type venv - name: Start Llama Stack server in background @@ -57,7 +47,6 @@ jobs: env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" run: | - source .venv/bin/activate LLAMA_STACK_LOG_FILE=server.log nohup uv run llama stack run ./llama_stack/templates/ollama/run.yaml --image-type venv & - name: Wait for Llama Stack server to be ready @@ -85,6 +74,7 @@ jobs: echo "Ollama health check failed" exit 1 fi + - name: Check Storage and Memory Available Before Tests if: ${{ always() }} run: | diff --git a/.github/workflows/providers-build.yml b/.github/workflows/providers-build.yml index 3c1682833..cf53459b9 100644 --- a/.github/workflows/providers-build.yml +++ b/.github/workflows/providers-build.yml @@ -50,21 +50,8 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: '3.10' - - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - - - name: Install LlamaStack - run: | - uv venv - source .venv/bin/activate - uv pip install -e . + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Print build dependencies run: | @@ -79,7 +66,6 @@ jobs: - name: Print dependencies in the image if: matrix.image-type == 'venv' run: | - source test/bin/activate uv pip list build-single-provider: @@ -88,21 +74,8 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: '3.10' - - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - - - name: Install LlamaStack - run: | - uv venv - source .venv/bin/activate - uv pip install -e . + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Build a single provider run: | @@ -114,21 +87,8 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: '3.10' - - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - - - name: Install LlamaStack - run: | - uv venv - source .venv/bin/activate - uv pip install -e . + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Build a single provider run: | @@ -152,21 +112,8 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: '3.10' - - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - - - name: Install LlamaStack - run: | - uv venv - source .venv/bin/activate - uv pip install -e . + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Pin template to UBI9 base run: | diff --git a/.github/workflows/test-external-providers.yml b/.github/workflows/test-external-providers.yml index 2e18fc5eb..06ab7cf3c 100644 --- a/.github/workflows/test-external-providers.yml +++ b/.github/workflows/test-external-providers.yml @@ -25,15 +25,8 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Install uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: "3.10" - - - name: Set Up Environment and Install Dependencies - run: | - uv sync --extra dev --extra test - uv pip install -e . + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Apply image type to config file run: | @@ -59,7 +52,6 @@ jobs: env: INFERENCE_MODEL: "meta-llama/Llama-3.2-3B-Instruct" run: | - source ci-test/bin/activate uv run pip list nohup uv run --active llama stack run tests/external-provider/llama-stack-provider-ollama/run.yaml --image-type ${{ matrix.image-type }} > server.log 2>&1 & diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index d2dd34e05..fc0459f0f 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -30,17 +30,11 @@ jobs: - "3.12" - "3.13" steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + - name: Checkout repository + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python ${{ matrix.python }} - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: ${{ matrix.python }} - - - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - with: - python-version: ${{ matrix.python }} - enable-cache: false + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Run unit tests run: | diff --git a/.github/workflows/update-readthedocs.yml b/.github/workflows/update-readthedocs.yml index 04e23bca9..981332a77 100644 --- a/.github/workflows/update-readthedocs.yml +++ b/.github/workflows/update-readthedocs.yml @@ -37,16 +37,8 @@ jobs: - name: Checkout repository uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0 - with: - python-version: '3.11' - - - name: Install the latest version of uv - uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1 - - - name: Sync with uv - run: uv sync --extra docs + - name: Install dependencies + uses: ./.github/actions/setup-runner - name: Build HTML run: | From 85b5f3172b0cf3eb7febcd20cd4df4a60c3c39ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Wed, 21 May 2025 17:35:27 +0200 Subject: [PATCH 7/7] docs: misc cleanup (#2223) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? * remove requirements.txt to use pyproject.toml as the source of truth * update relevant docs Signed-off-by: Sébastien Han --- CONTRIBUTING.md | 7 +--- docs/readme.md | 6 +-- docs/requirements.txt | 16 -------- docs/source/conf.py | 8 ---- pyproject.toml | 3 ++ uv.lock | 88 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 96 insertions(+), 32 deletions(-) delete mode 100644 docs/requirements.txt diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d7c3e3e2f..8f71a6ba1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -167,14 +167,11 @@ If you have made changes to a provider's configuration in any form (introducing If you are making changes to the documentation at [https://llama-stack.readthedocs.io/en/latest/](https://llama-stack.readthedocs.io/en/latest/), you can use the following command to build the documentation and preview your changes. You will need [Sphinx](https://www.sphinx-doc.org/en/master/) and the readthedocs theme. ```bash -cd docs -uv sync --extra docs - # This rebuilds the documentation pages. -uv run make html +uv run --with ".[docs]" make -C docs/ html # This will start a local server (usually at http://127.0.0.1:8000) that automatically rebuilds and refreshes when you make changes to the documentation. -uv run sphinx-autobuild source build/html --write-all +uv run --with ".[docs]" sphinx-autobuild docs/source docs/build/html --write-all ``` ### Update API Documentation diff --git a/docs/readme.md b/docs/readme.md index b88a4738d..d84dbe6eb 100644 --- a/docs/readme.md +++ b/docs/readme.md @@ -3,10 +3,10 @@ Here's a collection of comprehensive guides, examples, and resources for building AI applications with Llama Stack. For the complete documentation, visit our [ReadTheDocs page](https://llama-stack.readthedocs.io/en/latest/index.html). ## Render locally + +From the llama-stack root directory, run the following command to render the docs locally: ```bash -pip install -r requirements.txt -cd docs -python -m sphinx_autobuild source _build +uv run --with ".[docs]" sphinx-autobuild docs/source docs/build/html --write-all ``` You can open up the docs in your browser at http://localhost:8000 diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 6cd45c33b..000000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,16 +0,0 @@ -linkify -myst-parser --e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme -sphinx==8.1.3 -sphinx-copybutton -sphinx-design -sphinx-pdj-theme -sphinx-rtd-theme>=1.0.0 -sphinx-tabs -sphinx_autobuild -sphinx_rtd_dark_mode -sphinxcontrib-mermaid -sphinxcontrib-openapi -sphinxcontrib-redoc -sphinxcontrib-video -tomli diff --git a/docs/source/conf.py b/docs/source/conf.py index 501a923dd..43e8dbdd5 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -53,14 +53,6 @@ myst_enable_extensions = ["colon_fence"] html_theme = "sphinx_rtd_theme" html_use_relative_paths = True - -# html_theme = "sphinx_pdj_theme" -# html_theme_path = [sphinx_pdj_theme.get_html_theme_path()] - -# html_theme = "pytorch_sphinx_theme" -# html_theme_path = [pytorch_sphinx_theme.get_html_theme_path()] - - templates_path = ["_templates"] exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] diff --git a/pyproject.toml b/pyproject.toml index 8b922bafb..ce44479ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,6 +93,7 @@ test = [ docs = [ "sphinx-autobuild", "myst-parser", + "sphinx", "sphinx-rtd-theme", "sphinx_rtd_dark_mode", "sphinx-copybutton", @@ -102,6 +103,8 @@ docs = [ "sphinxcontrib.video", "sphinxcontrib.mermaid", "tomli", + "linkify", + "sphinxcontrib.openapi", ] codegen = ["rich", "pydantic", "jinja2>=3.1.6"] ui = [ diff --git a/uv.lock b/uv.lock index a987ddc9e..6d091193b 100644 --- a/uv.lock +++ b/uv.lock @@ -628,6 +628,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d5/50/83c593b07763e1161326b3b8c6686f0f4b0f24d5526546bee538c89837d6/decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186", size = 9073 }, ] +[[package]] +name = "deepmerge" +version = "2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a8/3a/b0ba594708f1ad0bc735884b3ad854d3ca3bdc1d741e56e40bbda6263499/deepmerge-2.0.tar.gz", hash = "sha256:5c3d86081fbebd04dd5de03626a0607b809a98fb6ccba5770b62466fe940ff20", size = 19890 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2d/82/e5d2c1c67d19841e9edc74954c827444ae826978499bde3dfc1d007c8c11/deepmerge-2.0-py3-none-any.whl", hash = "sha256:6de9ce507115cff0bed95ff0ce9ecc31088ef50cbdf09bc90a09349a318b3d00", size = 13475 }, +] + [[package]] name = "deprecated" version = "1.2.18" @@ -1384,6 +1393,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/dc/1e/408fd10217eac0e43aea0604be22b4851a09e03d761d44d4ea12089dd70e/levenshtein-0.27.1-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:7987ef006a3cf56a4532bd4c90c2d3b7b4ca9ad3bf8ae1ee5713c4a3bdfda913", size = 98045 }, ] +[[package]] +name = "linkify" +version = "1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/65/c6/246100fa3967074d9725b3716913bd495823547bde5047050d4c3462f994/linkify-1.4.tar.gz", hash = "sha256:9ba276ba179525f7262820d90f009604e51cd4f1466c1112b882ef7eda243d5e", size = 1749 } + [[package]] name = "llama-stack" version = "0.2.7" @@ -1434,7 +1449,9 @@ dev = [ { name = "uvicorn" }, ] docs = [ + { name = "linkify" }, { name = "myst-parser" }, + { name = "sphinx" }, { name = "sphinx-autobuild" }, { name = "sphinx-copybutton" }, { name = "sphinx-design" }, @@ -1442,6 +1459,7 @@ docs = [ { name = "sphinx-rtd-theme" }, { name = "sphinx-tabs" }, { name = "sphinxcontrib-mermaid" }, + { name = "sphinxcontrib-openapi" }, { name = "sphinxcontrib-redoc" }, { name = "sphinxcontrib-video" }, { name = "tomli" }, @@ -1500,6 +1518,7 @@ requires-dist = [ { name = "jinja2", specifier = ">=3.1.6" }, { name = "jinja2", marker = "extra == 'codegen'", specifier = ">=3.1.6" }, { name = "jsonschema" }, + { name = "linkify", marker = "extra == 'docs'" }, { name = "llama-stack-client", specifier = ">=0.2.7" }, { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.7" }, { name = "mcp", marker = "extra == 'test'" }, @@ -1534,6 +1553,7 @@ requires-dist = [ { name = "ruamel-yaml", marker = "extra == 'dev'" }, { name = "ruff", marker = "extra == 'dev'" }, { name = "setuptools" }, + { name = "sphinx", marker = "extra == 'docs'" }, { name = "sphinx-autobuild", marker = "extra == 'docs'" }, { name = "sphinx-copybutton", marker = "extra == 'docs'" }, { name = "sphinx-design", marker = "extra == 'docs'" }, @@ -1541,6 +1561,7 @@ requires-dist = [ { name = "sphinx-rtd-theme", marker = "extra == 'docs'" }, { name = "sphinx-tabs", marker = "extra == 'docs'" }, { name = "sphinxcontrib-mermaid", marker = "extra == 'docs'" }, + { name = "sphinxcontrib-openapi", marker = "extra == 'docs'" }, { name = "sphinxcontrib-redoc", marker = "extra == 'docs'" }, { name = "sphinxcontrib-video", marker = "extra == 'docs'" }, { name = "sqlite-vec", marker = "extra == 'unit'" }, @@ -1786,6 +1807,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979 }, ] +[[package]] +name = "mistune" +version = "3.1.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c4/79/bda47f7dd7c3c55770478d6d02c9960c430b0cf1773b72366ff89126ea31/mistune-3.1.3.tar.gz", hash = "sha256:a7035c21782b2becb6be62f8f25d3df81ccb4d6fa477a6525b15af06539f02a0", size = 94347 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/01/4d/23c4e4f09da849e127e9f123241946c23c1e30f45a88366879e064211815/mistune-3.1.3-py3-none-any.whl", hash = "sha256:1a32314113cff28aa6432e99e522677c8587fd83e3d51c29b82a52409c842bd9", size = 53410 }, +] + [[package]] name = "mpmath" version = "1.3.0" @@ -2228,6 +2261,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772 }, ] +[[package]] +name = "picobox" +version = "4.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/b1/830714dd6778c1cb45826722b4e9bd21c94b33cca5df9ef2cc0b80c81b25/picobox-4.0.0.tar.gz", hash = "sha256:114da1b5606b2f615e8b0eb68d04198ad9de75af5adbcf5b36fe4f664ab927b6", size = 22666 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2d/c6/fd64ffd75d47c4fcf6c65808cc5c5c75e5d4357c197d3741ee1339e91257/picobox-4.0.0-py3-none-any.whl", hash = "sha256:4c27eb689fe45dabd9e64c382e04418147d0b746d155b4e80057dbb7ff82027e", size = 11641 }, +] + [[package]] name = "pillow" version = "11.1.0" @@ -3516,6 +3558,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/43/65c0acbd8cc6f50195a3a1fc195c404988b15c67090e73c7a41a9f57d6bd/sphinx_design-0.6.1-py3-none-any.whl", hash = "sha256:b11f37db1a802a183d61b159d9a202314d4d2fe29c163437001324fe2f19549c", size = 2215338 }, ] +[[package]] +name = "sphinx-mdinclude" +version = "0.6.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "docutils" }, + { name = "mistune" }, + { name = "pygments" }, + { name = "sphinx" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b6/a7/c9a7888bb2187fdb06955d71e75f6f266b7e179b356ac76138d160a5b7eb/sphinx_mdinclude-0.6.2.tar.gz", hash = "sha256:447462e82cb8be61404a2204227f920769eb923d2f57608e3325f3bb88286b4c", size = 65257 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/42/3d/6b41fe1637cd53c4b10d56e0e6f396546f837973dabf9c4b2a1de44620ac/sphinx_mdinclude-0.6.2-py3-none-any.whl", hash = "sha256:648e78edb067c0e4bffc22943278d49d54a0714494743592032fa3ad82a86984", size = 16911 }, +] + [[package]] name = "sphinx-rtd-dark-mode" version = "1.3.0" @@ -3583,6 +3640,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705 }, ] +[[package]] +name = "sphinxcontrib-httpdomain" +version = "1.8.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "six" }, + { name = "sphinx" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/be/ef/82d3cfafb7febce4f7df8dcf3cde9d072350b41066e05a4f559b4e9105d0/sphinxcontrib-httpdomain-1.8.1.tar.gz", hash = "sha256:6c2dfe6ca282d75f66df333869bb0ce7331c01b475db6809ff9d107b7cdfe04b", size = 19266 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/02/49/aad47b8cf27a0d7703f1311aad8c368bb22866ddee1a2d2cd3f69bc45e0c/sphinxcontrib_httpdomain-1.8.1-py2.py3-none-any.whl", hash = "sha256:21eefe1270e4d9de8d717cc89ee92cc4871b8736774393bafc5e38a6bb77b1d5", size = 25513 }, +] + [[package]] name = "sphinxcontrib-jquery" version = "4.1" @@ -3617,6 +3687,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cd/c8/784b9ac6ea08aa594c1a4becbd0dbe77186785362e31fd633b8c6ae0197a/sphinxcontrib_mermaid-1.0.0-py3-none-any.whl", hash = "sha256:60b72710ea02087f212028feb09711225fbc2e343a10d34822fe787510e1caa3", size = 9597 }, ] +[[package]] +name = "sphinxcontrib-openapi" +version = "0.8.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "deepmerge" }, + { name = "jsonschema" }, + { name = "picobox" }, + { name = "pyyaml" }, + { name = "sphinx" }, + { name = "sphinx-mdinclude" }, + { name = "sphinxcontrib-httpdomain" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c0/a7/66a5c9aba7dbbb0c2b050f60e71402818cbf5f127ace13ed971029cc745e/sphinxcontrib-openapi-0.8.4.tar.gz", hash = "sha256:df883808a5b5e4b4113ad697185c43a3f42df3dce70453af78ba7076907e9a20", size = 71848 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/c3/ee00486f38d78309a60ee0d6031b2545b22ac5f0007d841dd174abc68774/sphinxcontrib_openapi-0.8.4-py3-none-any.whl", hash = "sha256:50911c18d452d9390ee3a384ef8dc8bde6135f542ba55691f81e1fbc0b71014e", size = 34510 }, +] + [[package]] name = "sphinxcontrib-qthelp" version = "2.0.0"