simplified some, walked back some decisions

2025-12-12 20:12:33 +00:00 · 2025-10-17 10:05:07 -07:00 · 2025-10-17 10:05:07 -07:00 · 636764c2a1
commit 636764c2a1
parent af7472cdb0
90 changed files with 887 additions and 570 deletions
--- a/llama_stack/core/conversations/conversations.py
+++ b/llama_stack/core/conversations/conversations.py
@ -55,10 +55,10 @@ class ConversationServiceImpl(Conversations):
        self.deps = deps
        self.policy = config.policy

-        # Use conversations store reference from storage config
-        conversations_ref = config.run_config.storage.conversations
+        # Use conversations store reference from run config
+        conversations_ref = config.run_config.conversations_store
        if not conversations_ref:
-            raise ValueError("storage.conversations must be configured in run config")
+            raise ValueError("conversations_store must be configured in run config")

        base_sql_store = sqlstore_impl(conversations_ref)
        self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)
--- a/llama_stack/core/datatypes.py
+++ b/llama_stack/core/datatypes.py
@ -26,7 +26,13 @@ from llama_stack.apis.tools import ToolGroup, ToolGroupInput, ToolRuntime
 from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.access_control.datatypes import AccessRule
-from llama_stack.core.storage.datatypes import KVStoreReference, StorageConfig
+from llama_stack.core.storage.datatypes import (
+    InferenceStoreReference,
+    KVStoreReference,
+    SqlStoreReference,
+    StorageBackendType,
+    StorageConfig,
+)
 from llama_stack.providers.datatypes import Api, ProviderSpec

 LLAMA_STACK_BUILD_CONFIG_VERSION = 2
@ -464,10 +470,19 @@ can be instantiated multiple times (with different configs) if necessary.
 """,
    )
    storage: StorageConfig = Field(
-        description="""
-Storage backend configurations. Each backend is named, and can be referenced by various components
-throughout the Stack (both by its core as well as providers).
-""",
+        description="Catalog of named storage backends available to the stack",
+    )
+    metadata_store: KVStoreReference | None = Field(
+        default=None,
+        description="Reference to the KV store backend used by the distribution registry (kv_* backend).",
+    )
+    inference_store: InferenceStoreReference | None = Field(
+        default=None,
+        description="Reference to the SQL store backend used by the inference API (sql_* backend).",
+    )
+    conversations_store: SqlStoreReference | None = Field(
+        default=None,
+        description="Reference to the SQL store backend used by the conversations API (sql_* backend).",
    )

    # registry of "resources" in the distribution
@ -507,6 +522,47 @@ throughout the Stack (both by its core as well as providers).
            return Path(v)
        return v

+    @model_validator(mode="after")
+    def validate_storage_references(self) -> "StackRunConfig":
+        backend_map = self.storage.backends if self.storage else {}
+        kv_backends = {
+            name
+            for name, cfg in backend_map.items()
+            if cfg.type
+            in {
+                StorageBackendType.KV_REDIS,
+                StorageBackendType.KV_SQLITE,
+                StorageBackendType.KV_POSTGRES,
+                StorageBackendType.KV_MONGODB,
+            }
+        }
+        sql_backends = {
+            name
+            for name, cfg in backend_map.items()
+            if cfg.type in {StorageBackendType.SQL_SQLITE, StorageBackendType.SQL_POSTGRES}
+        }
+
+        def _ensure_backend(reference, expected_set, store_name: str) -> None:
+            if reference is None:
+                return
+            backend_name = reference.backend
+            if backend_name not in backend_map:
+                raise ValueError(
+                    f"{store_name} references unknown backend '{backend_name}'. "
+                    f"Available backends: {sorted(backend_map)}"
+                )
+            if backend_name not in expected_set:
+                raise ValueError(
+                    f"{store_name} references backend '{backend_name}' of type "
+                    f"'{backend_map[backend_name].type.value}', but a backend of type "
+                    f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required."
+                )
+
+        _ensure_backend(self.metadata_store, kv_backends, "metadata_store")
+        _ensure_backend(self.inference_store, sql_backends, "inference_store")
+        _ensure_backend(self.conversations_store, sql_backends, "conversations_store")
+        return self
+

 class BuildConfig(BaseModel):
    version: int = LLAMA_STACK_BUILD_CONFIG_VERSION
--- a/llama_stack/core/prompts/prompts.py
+++ b/llama_stack/core/prompts/prompts.py
@ -41,11 +41,10 @@ class PromptServiceImpl(Prompts):

    async def initialize(self) -> None:
        # Use metadata store backend with prompts-specific namespace
-        metadata_ref = self.config.run_config.storage.metadata
-        prompts_ref = KVStoreReference(
-            namespace="prompts",
-            backend=metadata_ref.backend if metadata_ref else None,
-        )
+        metadata_ref = self.config.run_config.metadata_store
+        if not metadata_ref:
+            raise ValueError("metadata_store must be configured in run config")
+        prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
        self.kvstore = await kvstore_impl(prompts_ref)

    def _get_default_key(self, prompt_id: str) -> str:
--- a/llama_stack/core/routers/init.py
+++ b/llama_stack/core/routers/init.py
@ -80,9 +80,9 @@ async def get_auto_router_impl(

    # TODO: move pass configs to routers instead
    if api == Api.inference:
-        inference_ref = run_config.storage.inference
+        inference_ref = run_config.inference_store
        if not inference_ref:
-            raise ValueError("storage.inference must be configured in run config")
+            raise ValueError("inference_store must be configured in run config")

        inference_store = InferenceStore(
            reference=inference_ref,
--- a/llama_stack/core/server/quota.py
+++ b/llama_stack/core/server/quota.py
@ -10,10 +10,10 @@ from datetime import UTC, datetime, timedelta

 from starlette.types import ASGIApp, Receive, Scope, Send

+from llama_stack.core.storage.datatypes import KVStoreReference, StorageBackendType
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore.api import KVStore
-from llama_stack.providers.utils.kvstore.config import KVStoreConfig, SqliteKVStoreConfig
-from llama_stack.providers.utils.kvstore.kvstore import kvstore_impl
+from llama_stack.providers.utils.kvstore.kvstore import _KVSTORE_BACKENDS, kvstore_impl

 logger = get_logger(name=__name__, category="core::server")

@ -33,7 +33,7 @@ class QuotaMiddleware:
    def __init__(
        self,
        app: ASGIApp,
-        kv_config: KVStoreConfig,
+        kv_config: KVStoreReference,
        anonymous_max_requests: int,
        authenticated_max_requests: int,
        window_seconds: int = 86400,
@ -45,15 +45,15 @@ class QuotaMiddleware:
        self.authenticated_max_requests = authenticated_max_requests
        self.window_seconds = window_seconds

-        if isinstance(self.kv_config, SqliteKVStoreConfig):
-            logger.warning(
-                "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
-                f"window_seconds={self.window_seconds}"
-            )
-
    async def _get_kv(self) -> KVStore:
        if self.kv is None:
            self.kv = await kvstore_impl(self.kv_config)
+            backend_config = _KVSTORE_BACKENDS.get(self.kv_config.backend)
+            if backend_config and backend_config.type == StorageBackendType.KV_SQLITE:
+                logger.warning(
+                    "QuotaMiddleware: Using SQLite backend. Expiry/TTL is not enforced; cleanup is manual. "
+                    f"window_seconds={self.window_seconds}"
+                )
        return self.kv

    async def __call__(self, scope: Scope, receive: Receive, send: Send):
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@ -368,7 +368,9 @@ class Stack:
                logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")

        _initialize_storage(self.run_config)
-        dist_registry, _ = await create_dist_registry(self.run_config.storage, self.run_config.image_name)
+        if not self.run_config.metadata_store:
+            raise ValueError("metadata_store must be configured with a kv_* backend")
+        dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name)
        policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []

        internal_impls = {}
--- a/llama_stack/core/storage/datatypes.py
+++ b/llama_stack/core/storage/datatypes.py
@ -27,10 +27,6 @@ class CommonConfig(BaseModel):
        default=None,
        description="All keys will be prefixed with this namespace",
    )
-    default: bool = Field(
-        default=False,
-        description="Mark this KV store as the default choice when a reference omits the backend name",
-    )


 class RedisKVStoreConfig(CommonConfig):
@ -143,13 +139,6 @@ class MongoDBKVStoreConfig(CommonConfig):
        }


-class CommonSqlStoreConfig(BaseModel):
-    default: bool = Field(
-        default=False,
-        description="Mark this SQL store as the default choice when a reference omits the backend name",
-    )
-
-
 class SqlAlchemySqlStoreConfig(BaseModel):
    @property
    @abstractmethod
@ -161,7 +150,7 @@ class SqlAlchemySqlStoreConfig(BaseModel):
        return ["sqlalchemy[asyncio]"]


-class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig, CommonSqlStoreConfig):
+class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
    type: Literal[StorageBackendType.SQL_SQLITE] = StorageBackendType.SQL_SQLITE
    db_path: str = Field(
        description="Database path, e.g. ~/.llama/distributions/ollama/sqlstore.db",
@ -219,9 +208,8 @@ class SqlStoreReference(BaseModel):
        description="Name of the table to use for the SqlStore",
    )

-    backend: str | None = Field(
-        description="Name of backend from persistence.backends, a default will be used if not specified",
-        default=None,
+    backend: str = Field(
+        description="Name of backend from storage.backends",
    )


@ -233,9 +221,8 @@ class KVStoreReference(BaseModel):
        description="Key prefix for KVStore backends",
    )

-    backend: str | None = Field(
-        description="Name of backend from persistence.backends, a default will be used if not specified",
-        default=None,
+    backend: str = Field(
+        description="Name of backend from storage.backends",
    )


@ -263,21 +250,11 @@ class InferenceStoreReference(SqlStoreReference):
    )


+class ResponsesStoreReference(InferenceStoreReference):
+    """Responses store configuration with queue tuning."""
+
+
 class StorageConfig(BaseModel):
    backends: dict[str, StorageBackendConfig] = Field(
        description="Named backend configurations (e.g., 'default', 'cache')",
    )
-
-    # these are stores used natively by the Stack
-    metadata: KVStoreReference | None = Field(
-        default=None,
-        description="Metadata store configuration (uses KVStore backend)",
-    )
-    inference: InferenceStoreReference | None = Field(
-        default=None,
-        description="Inference store configuration (uses SqlStore backend)",
-    )
-    conversations: SqlStoreReference | None = Field(
-        default=None,
-        description="Conversations store configuration (uses SqlStore backend)",
-    )
--- a/llama_stack/core/store/registry.py
+++ b/llama_stack/core/store/registry.py
@ -11,7 +11,7 @@ from typing import Protocol
 import pydantic

 from llama_stack.core.datatypes import RoutableObjectWithProvider
-from llama_stack.core.storage.datatypes import KVStoreReference, StorageConfig
+from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl

@ -190,17 +190,10 @@ class CachedDiskDistributionRegistry(DiskDistributionRegistry):


 async def create_dist_registry(
-    storage: StorageConfig,
-    image_name: str,
+    metadata_store: KVStoreReference, image_name: str
 ) -> tuple[CachedDiskDistributionRegistry, KVStore]:
    # instantiate kvstore for storing and retrieving distribution metadata
-    # Use metadata store backend with registry-specific namespace
-    metadata_ref = storage.metadata
-    registry_ref = KVStoreReference(
-        namespace="registry",
-        backend=metadata_ref.backend if metadata_ref else None,
-    )
-    dist_kvstore = await kvstore_impl(registry_ref)
+    dist_kvstore = await kvstore_impl(metadata_store)
    dist_registry = CachedDiskDistributionRegistry(dist_kvstore)
    await dist_registry.initialize()
    return dist_registry, dist_kvstore