mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-21 16:07:16 +00:00
**This PR changes configurations in a backward incompatible way.** Run configs today repeat full SQLite/Postgres snippets everywhere a store is needed, which means duplicated credentials, extra connection pools, and lots of drift between files. This PR introduces named storage backends so the stack and providers can share a single catalog and reference those backends by name. ## Key Changes - Add `storage.backends` to `StackRunConfig`, register each KV/SQL backend once at startup, and validate that references point to the right family. - Move server stores under `storage.stores` with lightweight references (backend + namespace/table) instead of full configs. - Update every provider/config/doc to use the new reference style; docs/codegen now surface the simplified YAML. ## Migration Before: ```yaml metadata_store: type: sqlite db_path: ~/.llama/distributions/foo/registry.db inference_store: type: postgres host: ${env.POSTGRES_HOST} port: ${env.POSTGRES_PORT} db: ${env.POSTGRES_DB} user: ${env.POSTGRES_USER} password: ${env.POSTGRES_PASSWORD} conversations_store: type: postgres host: ${env.POSTGRES_HOST} port: ${env.POSTGRES_PORT} db: ${env.POSTGRES_DB} user: ${env.POSTGRES_USER} password: ${env.POSTGRES_PASSWORD} ``` After: ```yaml storage: backends: kv_default: type: kv_sqlite db_path: ~/.llama/distributions/foo/kvstore.db sql_default: type: sql_postgres host: ${env.POSTGRES_HOST} port: ${env.POSTGRES_PORT} db: ${env.POSTGRES_DB} user: ${env.POSTGRES_USER} password: ${env.POSTGRES_PASSWORD} stores: metadata: backend: kv_default namespace: registry inference: backend: sql_default table_name: inference_store max_write_queue_size: 10000 num_writers: 4 conversations: backend: sql_default table_name: openai_conversations ``` Provider configs follow the same pattern—for example, a Chroma vector adapter switches from: ```yaml providers: vector_io: - provider_id: chromadb provider_type: remote::chromadb config: url: ${env.CHROMADB_URL} kvstore: type: sqlite db_path: ~/.llama/distributions/foo/chroma.db ``` to: ```yaml providers: vector_io: - provider_id: chromadb provider_type: remote::chromadb config: url: ${env.CHROMADB_URL} persistence: backend: kv_default namespace: vector_io::chroma_remote ``` Once the backends are declared, everything else just points at them, so rotating credentials or swapping to Postgres happens in one place and the stack reuses a single connection pool.
110 lines
4.2 KiB
Python
110 lines
4.2 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import json
|
|
import time
|
|
from datetime import UTC, datetime, timedelta
|
|
|
|
from starlette.types import ASGIApp, Receive, Scope, Send
|
|
|
|
from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
|
|
from llama_stack.log import get_logger
|
|
from llama_stack.providers.utils.kvstore.api import KVStore
|
|
from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl
|
|
|
|
logger = get_logger(name=__name__, category="core::server")
|
|
|
|
|
|
class QuotaMiddleware:
|
|
"""
|
|
ASGI middleware that enforces separate quotas for authenticated and anonymous clients
|
|
within a configurable time window.
|
|
|
|
- For authenticated requests, it reads the client ID from the
|
|
`Authorization: Bearer <client_id>` header.
|
|
- For anonymous requests, it falls back to the IP address of the client.
|
|
Requests are counted in a KV store (e.g., SQLite), and HTTP 429 is returned
|
|
once a client exceeds its quota.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
app: ASGIApp,
|
|
kv_config: KVStoreReference,
|
|
anonymous_max_requests: int,
|
|
authenticated_max_requests: int,
|
|
window_seconds: int = 86400,
|
|
):
|
|
self.app = app
|
|
self.kv_config = kv_config
|
|
self.kv: KVStore | None = None
|
|
self.anonymous_max_requests = anonymous_max_requests
|
|
self.authenticated_max_requests = authenticated_max_requests
|
|
self.window_seconds = window_seconds
|
|
|
|
async def _get_kv(self) -> KVStore:
|
|
if self.kv is None:
|
|
self.kv = await kvstore_impl(self.kv_config)
|
|
backend_config = _KVSTORE_BACKENDS.get(self.kv_config.backend)
|
|
if backend_config and backend_config.type == StorageBackendType.KV_SQLITE:
|
|
logger.warning(
|
|
"QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
|
|
f"window_seconds={self.window_seconds}"
|
|
)
|
|
return self.kv
|
|
|
|
async def __call__(self, scope: Scope, receive: Receive, send: Send):
|
|
if scope["type"] == "http":
|
|
# pick key & limit based on auth
|
|
auth_id = scope.get("authenticated_client_id")
|
|
if auth_id:
|
|
key_id = auth_id
|
|
limit = self.authenticated_max_requests
|
|
else:
|
|
# fallback to IP
|
|
client = scope.get("client")
|
|
key_id = client[0] if client else "anonymous"
|
|
limit = self.anonymous_max_requests
|
|
|
|
current_window = int(time.time() // self.window_seconds)
|
|
key = f"quota:{key_id}:{current_window}"
|
|
|
|
try:
|
|
kv = await self._get_kv()
|
|
prev = await kv.get(key) or "0"
|
|
count = int(prev) + 1
|
|
|
|
if int(prev) == 0:
|
|
# Set with expiration datetime when it is the first request in the window.
|
|
expiration = datetime.now(UTC) + timedelta(seconds=self.window_seconds)
|
|
await kv.set(key, str(count), expiration=expiration)
|
|
else:
|
|
await kv.set(key, str(count))
|
|
except Exception:
|
|
logger.exception("Failed to access KV store for quota")
|
|
return await self._send_error(send, 500, "Quota service error")
|
|
|
|
if count > limit:
|
|
logger.warning(
|
|
"Quota exceeded for client %s: %d/%d",
|
|
key_id,
|
|
count,
|
|
limit,
|
|
)
|
|
return await self._send_error(send, 429, "Quota exceeded")
|
|
|
|
return await self.app(scope, receive, send)
|
|
|
|
async def _send_error(self, send: Send, status: int, message: str):
|
|
await send(
|
|
{
|
|
"type": "http.response.start",
|
|
"status": status,
|
|
"headers": [[b"content-type", b"application/json"]],
|
|
}
|
|
)
|
|
body = json.dumps({"error": {"message": message}}).encode()
|
|
await send({"type": "http.response.body", "body": body})
|