From 2bba56a0a842092dbcc253ab1483d741729d0da6 Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Sun, 19 Oct 2025 08:06:19 -0700
Subject: [PATCH] group all server stores under storage.stores

---
 .../k8s-benchmark/stack-configmap.yaml        |  40 +--
 .../k8s-benchmark/stack_run_config.yaml       |  39 ++-
 docs/docs/distributions/configuration.mdx     |  30 +-
 .../distributions/k8s/stack-configmap.yaml    | 201 ++++++++++----
 .../distributions/k8s/stack_run_config.yaml   |  39 ++-
 llama_stack/cli/stack/_build.py               |  29 +-
 llama_stack/core/configure.py                 |  31 +++
 .../core/conversations/conversations.py       |   4 +-
 llama_stack/core/datatypes.py                 |  28 +-
 llama_stack/core/prompts/prompts.py           |   4 +-
 llama_stack/core/routers/__init__.py          |   4 +-
 llama_stack/core/stack.py                     |   7 +-
 llama_stack/core/storage/datatypes.py         |  27 +-
 llama_stack/distributions/ci-tests/run.yaml   |  23 +-
 .../distributions/dell/run-with-safety.yaml   |  23 +-
 llama_stack/distributions/dell/run.yaml       |  23 +-
 .../meta-reference-gpu/run-with-safety.yaml   |  23 +-
 .../distributions/meta-reference-gpu/run.yaml |  23 +-
 .../distributions/nvidia/run-with-safety.yaml |  23 +-
 llama_stack/distributions/nvidia/run.yaml     |  23 +-
 .../distributions/open-benchmark/run.yaml     |  23 +-
 .../distributions/postgres-demo/run.yaml      |  23 +-
 .../distributions/starter-gpu/run.yaml        |  23 +-
 llama_stack/distributions/starter/run.yaml    |  23 +-
 llama_stack/distributions/template.py         |  36 +--
 llama_stack/distributions/watsonx/run.yaml    |  23 +-
 .../test_persistence_integration.py           |  30 +-
 tests/unit/cli/test_stack_config.py           |  42 +++
 .../unit/conversations/test_conversations.py  |  35 ++-
 tests/unit/core/test_storage_references.py    |  45 +--
 tests/unit/distribution/test_distribution.py  |  25 +-
 tests/unit/prompts/prompts/conftest.py        |  11 +-
 .../agent/test_meta_reference_agent.py        |  39 ++-
 tests/unit/server/test_resolver.py            |  26 +-
 .../utils/inference/test_inference_store.py   | 261 +++++++++---------
 35 files changed, 806 insertions(+), 503 deletions(-)

diff --git a/benchmarking/k8s-benchmark/stack-configmap.yaml b/benchmarking/k8s-benchmark/stack-configmap.yaml
index bb8a48d65..e1ca170f5 100644
--- a/benchmarking/k8s-benchmark/stack-configmap.yaml
+++ b/benchmarking/k8s-benchmark/stack-configmap.yaml
@@ -98,21 +98,30 @@ data:
       - provider_id: model-context-protocol
         provider_type: remote::model-context-protocol
         config: {}
-    metadata_store:
-      type: postgres
-      host: ${env.POSTGRES_HOST:=localhost}
-      port: ${env.POSTGRES_PORT:=5432}
-      db: ${env.POSTGRES_DB:=llamastack}
-      user: ${env.POSTGRES_USER:=llamastack}
-      password: ${env.POSTGRES_PASSWORD:=llamastack}
-      table_name: llamastack_kvstore
-    inference_store:
-      type: postgres
-      host: ${env.POSTGRES_HOST:=localhost}
-      port: ${env.POSTGRES_PORT:=5432}
-      db: ${env.POSTGRES_DB:=llamastack}
-      user: ${env.POSTGRES_USER:=llamastack}
-      password: ${env.POSTGRES_PASSWORD:=llamastack}
+    storage:
+      backends:
+        kv_default:
+          type: kv_postgres
+          host: ${env.POSTGRES_HOST:=localhost}
+          port: ${env.POSTGRES_PORT:=5432}
+          db: ${env.POSTGRES_DB:=llamastack}
+          user: ${env.POSTGRES_USER:=llamastack}
+          password: ${env.POSTGRES_PASSWORD:=llamastack}
+          table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+        sql_default:
+          type: sql_postgres
+          host: ${env.POSTGRES_HOST:=localhost}
+          port: ${env.POSTGRES_PORT:=5432}
+          db: ${env.POSTGRES_DB:=llamastack}
+          user: ${env.POSTGRES_USER:=llamastack}
+          password: ${env.POSTGRES_PASSWORD:=llamastack}
+      references:
+        metadata:
+          backend: kv_default
+          namespace: registry
+        inference:
+          backend: sql_default
+          table_name: inference_store
     models:
     - metadata:
         embedding_dimension: 768
@@ -137,5 +146,4 @@ data:
       port: 8323
 kind: ConfigMap
 metadata:
-  creationTimestamp: null
   name: llama-stack-config
diff --git a/benchmarking/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml
index e2fbfd7a4..2ccaa21aa 100644
--- a/benchmarking/k8s-benchmark/stack_run_config.yaml
+++ b/benchmarking/k8s-benchmark/stack_run_config.yaml
@@ -95,21 +95,30 @@ providers:
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
     config: {}
-metadata_store:
-  type: postgres
-  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
-  table_name: llamastack_kvstore
-inference_store:
-  type: postgres
-  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+storage:
+  backends:
+    kv_default:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_default:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  references:
+    metadata:
+      backend: kv_default
+      namespace: registry
+    inference:
+      backend: sql_default
+      table_name: inference_store
 models:
 - metadata:
     embedding_dimension: 768
diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx
index 81243c97b..bf3156865 100644
--- a/docs/docs/distributions/configuration.mdx
+++ b/docs/docs/distributions/configuration.mdx
@@ -44,18 +44,32 @@ providers:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db
+      persistence:
+        agent_state:
+          backend: kv_default
+          namespace: agents
+        responses:
+          backend: sql_default
+          table_name: responses
   telemetry:
   - provider_id: meta-reference
     provider_type: inline::meta-reference
     config: {}
-metadata_store:
-  namespace: null
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db
+storage:
+  backends:
+    kv_default:
+      type: kv_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/kvstore.db
+    sql_default:
+      type: sql_sqlite
+      db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
+  references:
+    metadata:
+      backend: kv_default
+      namespace: registry
+    inference:
+      backend: sql_default
+      table_name: inference_store
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/docs/docs/distributions/k8s/stack-configmap.yaml b/docs/docs/distributions/k8s/stack-configmap.yaml
index 3dbb0da97..c71ab05d8 100644
--- a/docs/docs/distributions/k8s/stack-configmap.yaml
+++ b/docs/docs/distributions/k8s/stack-configmap.yaml
@@ -1,56 +1,155 @@
 apiVersion: v1
 data:
-  stack_run_config.yaml: "version: '2'\nimage_name: kubernetes-demo\napis:\n- agents\n-
-    inference\n- files\n- safety\n- telemetry\n- tool_runtime\n- vector_io\nproviders:\n
-    \ inference:\n  - provider_id: vllm-inference\n    provider_type: remote::vllm\n
-    \   config:\n      url: ${env.VLLM_URL:=http://localhost:8000/v1}\n      max_tokens:
-    ${env.VLLM_MAX_TOKENS:=4096}\n      api_token: ${env.VLLM_API_TOKEN:=fake}\n      tls_verify:
-    ${env.VLLM_TLS_VERIFY:=true}\n  - provider_id: vllm-safety\n    provider_type:
-    remote::vllm\n    config:\n      url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}\n
-    \     max_tokens: ${env.VLLM_MAX_TOKENS:=4096}\n      api_token: ${env.VLLM_API_TOKEN:=fake}\n
-    \     tls_verify: ${env.VLLM_TLS_VERIFY:=true}\n  - provider_id: sentence-transformers\n
-    \   provider_type: inline::sentence-transformers\n    config: {}\n  vector_io:\n
-    \ - provider_id: ${env.ENABLE_CHROMADB:+chromadb}\n    provider_type: remote::chromadb\n
-    \   config:\n      url: ${env.CHROMADB_URL:=}\n      kvstore:\n        type: postgres\n
-    \       host: ${env.POSTGRES_HOST:=localhost}\n        port: ${env.POSTGRES_PORT:=5432}\n
-    \       db: ${env.POSTGRES_DB:=llamastack}\n        user: ${env.POSTGRES_USER:=llamastack}\n
-    \       password: ${env.POSTGRES_PASSWORD:=llamastack}\n  files:\n  - provider_id:
-    meta-reference-files\n    provider_type: inline::localfs\n    config:\n      storage_dir:
-    ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}\n      metadata_store:\n
-    \       type: sqlite\n        db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
-    \ \n  safety:\n  - provider_id: llama-guard\n    provider_type: inline::llama-guard\n
-    \   config:\n      excluded_categories: []\n  agents:\n  - provider_id: meta-reference\n
-    \   provider_type: inline::meta-reference\n    config:\n      persistence_store:\n
-    \       type: postgres\n        host: ${env.POSTGRES_HOST:=localhost}\n        port:
-    ${env.POSTGRES_PORT:=5432}\n        db: ${env.POSTGRES_DB:=llamastack}\n        user:
-    ${env.POSTGRES_USER:=llamastack}\n        password: ${env.POSTGRES_PASSWORD:=llamastack}\n
-    \     responses_store:\n        type: postgres\n        host: ${env.POSTGRES_HOST:=localhost}\n
-    \       port: ${env.POSTGRES_PORT:=5432}\n        db: ${env.POSTGRES_DB:=llamastack}\n
-    \       user: ${env.POSTGRES_USER:=llamastack}\n        password: ${env.POSTGRES_PASSWORD:=llamastack}\n
-    \ telemetry:\n  - provider_id: meta-reference\n    provider_type: inline::meta-reference\n
-    \   config:\n      service_name: \"${env.OTEL_SERVICE_NAME:=\\u200B}\"\n      sinks:
-    ${env.TELEMETRY_SINKS:=console}\n  tool_runtime:\n  - provider_id: brave-search\n
-    \   provider_type: remote::brave-search\n    config:\n      api_key: ${env.BRAVE_SEARCH_API_KEY:+}\n
-    \     max_results: 3\n  - provider_id: tavily-search\n    provider_type: remote::tavily-search\n
-    \   config:\n      api_key: ${env.TAVILY_SEARCH_API_KEY:+}\n      max_results:
-    3\n  - provider_id: rag-runtime\n    provider_type: inline::rag-runtime\n    config:
-    {}\n  - provider_id: model-context-protocol\n    provider_type: remote::model-context-protocol\n
-    \   config: {}\nmetadata_store:\n  type: postgres\n  host: ${env.POSTGRES_HOST:=localhost}\n
-    \ port: ${env.POSTGRES_PORT:=5432}\n  db: ${env.POSTGRES_DB:=llamastack}\n  user:
-    ${env.POSTGRES_USER:=llamastack}\n  password: ${env.POSTGRES_PASSWORD:=llamastack}\n
-    \ table_name: llamastack_kvstore\ninference_store:\n  type: postgres\n  host:
-    ${env.POSTGRES_HOST:=localhost}\n  port: ${env.POSTGRES_PORT:=5432}\n  db: ${env.POSTGRES_DB:=llamastack}\n
-    \ user: ${env.POSTGRES_USER:=llamastack}\n  password: ${env.POSTGRES_PASSWORD:=llamastack}\nmodels:\n-
-    metadata:\n    embedding_dimension: 384\n  model_id: all-MiniLM-L6-v2\n  provider_id:
-    sentence-transformers\n  model_type: embedding\n- metadata: {}\n  model_id: ${env.INFERENCE_MODEL}\n
-    \ provider_id: vllm-inference\n  model_type: llm\n- metadata: {}\n  model_id:
-    ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\n  provider_id: vllm-safety\n
-    \ model_type: llm\nshields:\n- shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}\nvector_dbs:
-    []\ndatasets: []\nscoring_fns: []\nbenchmarks: []\ntool_groups:\n- toolgroup_id:
-    builtin::websearch\n  provider_id: tavily-search\n- toolgroup_id: builtin::rag\n
-    \ provider_id: rag-runtime\nserver:\n  port: 8321\n  auth:\n    provider_config:\n
-    \     type: github_token\n"
+  stack_run_config.yaml: |
+    version: '2'
+    image_name: kubernetes-demo
+    apis:
+    - agents
+    - inference
+    - files
+    - safety
+    - telemetry
+    - tool_runtime
+    - vector_io
+    providers:
+      inference:
+      - provider_id: vllm-inference
+        provider_type: remote::vllm
+        config:
+          url: ${env.VLLM_URL:=http://localhost:8000/v1}
+          max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+          api_token: ${env.VLLM_API_TOKEN:=fake}
+          tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+      - provider_id: vllm-safety
+        provider_type: remote::vllm
+        config:
+          url: ${env.VLLM_SAFETY_URL:=http://localhost:8000/v1}
+          max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
+          api_token: ${env.VLLM_API_TOKEN:=fake}
+          tls_verify: ${env.VLLM_TLS_VERIFY:=true}
+      - provider_id: sentence-transformers
+        provider_type: inline::sentence-transformers
+        config: {}
+      vector_io:
+      - provider_id: ${env.ENABLE_CHROMADB:+chromadb}
+        provider_type: remote::chromadb
+        config:
+          url: ${env.CHROMADB_URL:=}
+          kvstore:
+            type: postgres
+            host: ${env.POSTGRES_HOST:=localhost}
+            port: ${env.POSTGRES_PORT:=5432}
+            db: ${env.POSTGRES_DB:=llamastack}
+            user: ${env.POSTGRES_USER:=llamastack}
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
+      files:
+      - provider_id: meta-reference-files
+        provider_type: inline::localfs
+        config:
+          storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
+          metadata_store:
+            type: sqlite
+            db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
+      safety:
+      - provider_id: llama-guard
+        provider_type: inline::llama-guard
+        config:
+          excluded_categories: []
+      agents:
+      - provider_id: meta-reference
+        provider_type: inline::meta-reference
+        config:
+          persistence_store:
+            type: postgres
+            host: ${env.POSTGRES_HOST:=localhost}
+            port: ${env.POSTGRES_PORT:=5432}
+            db: ${env.POSTGRES_DB:=llamastack}
+            user: ${env.POSTGRES_USER:=llamastack}
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
+          responses_store:
+            type: postgres
+            host: ${env.POSTGRES_HOST:=localhost}
+            port: ${env.POSTGRES_PORT:=5432}
+            db: ${env.POSTGRES_DB:=llamastack}
+            user: ${env.POSTGRES_USER:=llamastack}
+            password: ${env.POSTGRES_PASSWORD:=llamastack}
+      telemetry:
+      - provider_id: meta-reference
+        provider_type: inline::meta-reference
+        config:
+          service_name: "${env.OTEL_SERVICE_NAME:=\u200B}"
+          sinks: ${env.TELEMETRY_SINKS:=console}
+      tool_runtime:
+      - provider_id: brave-search
+        provider_type: remote::brave-search
+        config:
+          api_key: ${env.BRAVE_SEARCH_API_KEY:+}
+          max_results: 3
+      - provider_id: tavily-search
+        provider_type: remote::tavily-search
+        config:
+          api_key: ${env.TAVILY_SEARCH_API_KEY:+}
+          max_results: 3
+      - provider_id: rag-runtime
+        provider_type: inline::rag-runtime
+        config: {}
+      - provider_id: model-context-protocol
+        provider_type: remote::model-context-protocol
+        config: {}
+    storage:
+      backends:
+        kv_default:
+          type: kv_postgres
+          host: ${env.POSTGRES_HOST:=localhost}
+          port: ${env.POSTGRES_PORT:=5432}
+          db: ${env.POSTGRES_DB:=llamastack}
+          user: ${env.POSTGRES_USER:=llamastack}
+          password: ${env.POSTGRES_PASSWORD:=llamastack}
+          table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+        sql_default:
+          type: sql_postgres
+          host: ${env.POSTGRES_HOST:=localhost}
+          port: ${env.POSTGRES_PORT:=5432}
+          db: ${env.POSTGRES_DB:=llamastack}
+          user: ${env.POSTGRES_USER:=llamastack}
+          password: ${env.POSTGRES_PASSWORD:=llamastack}
+      references:
+        metadata:
+          backend: kv_default
+          namespace: registry
+        inference:
+          backend: sql_default
+          table_name: inference_store
+    models:
+    - metadata:
+        embedding_dimension: 768
+      model_id: nomic-embed-text-v1.5
+      provider_id: sentence-transformers
+      model_type: embedding
+    - metadata: {}
+      model_id: ${env.INFERENCE_MODEL}
+      provider_id: vllm-inference
+      model_type: llm
+    - metadata: {}
+      model_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
+      provider_id: vllm-safety
+      model_type: llm
+    shields:
+    - shield_id: ${env.SAFETY_MODEL:=meta-llama/Llama-Guard-3-1B}
+    vector_dbs: []
+    datasets: []
+    scoring_fns: []
+    benchmarks: []
+    tool_groups:
+    - toolgroup_id: builtin::websearch
+      provider_id: tavily-search
+    - toolgroup_id: builtin::rag
+      provider_id: rag-runtime
+    server:
+      port: 8321
+      auth:
+        provider_config:
+          type: github_token
 kind: ConfigMap
 metadata:
-  creationTimestamp: null
   name: llama-stack-config
diff --git a/docs/docs/distributions/k8s/stack_run_config.yaml b/docs/docs/distributions/k8s/stack_run_config.yaml
index ee28a1ea8..863565fdf 100644
--- a/docs/docs/distributions/k8s/stack_run_config.yaml
+++ b/docs/docs/distributions/k8s/stack_run_config.yaml
@@ -93,21 +93,30 @@ providers:
   - provider_id: model-context-protocol
     provider_type: remote::model-context-protocol
     config: {}
-metadata_store:
-  type: postgres
-  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
-  table_name: llamastack_kvstore
-inference_store:
-  type: postgres
-  host: ${env.POSTGRES_HOST:=localhost}
-  port: ${env.POSTGRES_PORT:=5432}
-  db: ${env.POSTGRES_DB:=llamastack}
-  user: ${env.POSTGRES_USER:=llamastack}
-  password: ${env.POSTGRES_PASSWORD:=llamastack}
+storage:
+  backends:
+    kv_default:
+      type: kv_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+      table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
+    sql_default:
+      type: sql_postgres
+      host: ${env.POSTGRES_HOST:=localhost}
+      port: ${env.POSTGRES_PORT:=5432}
+      db: ${env.POSTGRES_DB:=llamastack}
+      user: ${env.POSTGRES_USER:=llamastack}
+      password: ${env.POSTGRES_PASSWORD:=llamastack}
+  references:
+    metadata:
+      backend: kv_default
+      namespace: registry
+    inference:
+      backend: sql_default
+      table_name: inference_store
 models:
 - metadata:
     embedding_dimension: 768
diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py
index cf5ed55ae..9806e4c48 100644
--- a/llama_stack/cli/stack/_build.py
+++ b/llama_stack/cli/stack/_build.py
@@ -43,6 +43,7 @@ from llama_stack.core.stack import replace_env_vars
 from llama_stack.core.storage.datatypes import (
     InferenceStoreReference,
     KVStoreReference,
+    ServerStoresConfig,
     SqliteKVStoreConfig,
     SqliteSqlStoreConfig,
     SqlStoreReference,
@@ -302,7 +303,21 @@ def _generate_run_config(
             "sql_default": SqliteSqlStoreConfig(
                 db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
             ),
-        }
+        },
+        stores=ServerStoresConfig(
+            metadata=KVStoreReference(
+                backend="kv_default",
+                namespace="registry",
+            ),
+            inference=InferenceStoreReference(
+                backend="sql_default",
+                table_name="inference_store",
+            ),
+            conversations=SqlStoreReference(
+                backend="sql_default",
+                table_name="openai_conversations",
+            ),
+        ),
     )
 
     run_config = StackRunConfig(
@@ -311,18 +326,6 @@ def _generate_run_config(
         apis=apis,
         providers={},
         storage=storage,
-        metadata_store=KVStoreReference(
-            backend="kv_default",
-            namespace="registry",
-        ),
-        inference_store=InferenceStoreReference(
-            backend="sql_default",
-            table_name="inference_store",
-        ),
-        conversations_store=SqlStoreReference(
-            backend="sql_default",
-            table_name="openai_conversations",
-        ),
         external_providers_dir=build_config.external_providers_dir
         if build_config.external_providers_dir
         else EXTERNAL_PROVIDERS_DIR,
diff --git a/llama_stack/core/configure.py b/llama_stack/core/configure.py
index bfa2c6d71..734839ea9 100644
--- a/llama_stack/core/configure.py
+++ b/llama_stack/core/configure.py
@@ -159,6 +159,37 @@ def upgrade_from_routing_table(
     config_dict["apis"] = config_dict["apis_to_serve"]
     config_dict.pop("apis_to_serve", None)
 
+    # Add default storage config if not present
+    if "storage" not in config_dict:
+        config_dict["storage"] = {
+            "backends": {
+                "kv_default": {
+                    "type": "kv_sqlite",
+                    "db_path": "~/.llama/kvstore.db",
+                },
+                "sql_default": {
+                    "type": "sql_sqlite",
+                    "db_path": "~/.llama/sql_store.db",
+                },
+            },
+            "stores": {
+                "metadata": {
+                    "namespace": "registry",
+                    "backend": "kv_default",
+                },
+                "inference": {
+                    "table_name": "inference_store",
+                    "backend": "sql_default",
+                    "max_write_queue_size": 10000,
+                    "num_writers": 4,
+                },
+                "conversations": {
+                    "table_name": "openai_conversations",
+                    "backend": "sql_default",
+                },
+            },
+        }
+
     return config_dict
 
 
diff --git a/llama_stack/core/conversations/conversations.py b/llama_stack/core/conversations/conversations.py
index ef2bca7e3..66880ca36 100644
--- a/llama_stack/core/conversations/conversations.py
+++ b/llama_stack/core/conversations/conversations.py
@@ -56,9 +56,9 @@ class ConversationServiceImpl(Conversations):
         self.policy = config.policy
 
         # Use conversations store reference from run config
-        conversations_ref = config.run_config.conversations_store
+        conversations_ref = config.run_config.storage.stores.conversations
         if not conversations_ref:
-            raise ValueError("conversations_store must be configured in run config")
+            raise ValueError("storage.stores.conversations must be configured in run config")
 
         base_sql_store = sqlstore_impl(conversations_ref)
         self.sql_store = AuthorizedSqlStore(base_sql_store, self.policy)
diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py
index a813b6084..d692da3b3 100644
--- a/llama_stack/core/datatypes.py
+++ b/llama_stack/core/datatypes.py
@@ -27,9 +27,7 @@ from llama_stack.apis.vector_dbs import VectorDB, VectorDBInput
 from llama_stack.apis.vector_io import VectorIO
 from llama_stack.core.access_control.datatypes import AccessRule
 from llama_stack.core.storage.datatypes import (
-    InferenceStoreReference,
     KVStoreReference,
-    SqlStoreReference,
     StorageBackendType,
     StorageConfig,
 )
@@ -470,19 +468,7 @@ can be instantiated multiple times (with different configs) if necessary.
 """,
     )
     storage: StorageConfig = Field(
-        description="Catalog of named storage backends available to the stack",
-    )
-    metadata_store: KVStoreReference | None = Field(
-        default=None,
-        description="Reference to the KV store backend used by the distribution registry (kv_* backend).",
-    )
-    inference_store: InferenceStoreReference | None = Field(
-        default=None,
-        description="Reference to the SQL store backend used by the inference API (sql_* backend).",
-    )
-    conversations_store: SqlStoreReference | None = Field(
-        default=None,
-        description="Reference to the SQL store backend used by the conversations API (sql_* backend).",
+        description="Catalog of named storage backends and references available to the stack",
     )
 
     # registry of "resources" in the distribution
@@ -523,8 +509,9 @@ can be instantiated multiple times (with different configs) if necessary.
         return v
 
     @model_validator(mode="after")
-    def validate_storage_references(self) -> "StackRunConfig":
-        backend_map = self.storage.backends if self.storage else {}
+    def validate_server_stores(self) -> "StackRunConfig":
+        backend_map = self.storage.backends
+        stores = self.storage.stores
         kv_backends = {
             name
             for name, cfg in backend_map.items()
@@ -558,9 +545,10 @@ can be instantiated multiple times (with different configs) if necessary.
                     f"{'kv_*' if expected_set is kv_backends else 'sql_*'} is required."
                 )
 
-        _ensure_backend(self.metadata_store, kv_backends, "metadata_store")
-        _ensure_backend(self.inference_store, sql_backends, "inference_store")
-        _ensure_backend(self.conversations_store, sql_backends, "conversations_store")
+        _ensure_backend(stores.metadata, kv_backends, "storage.stores.metadata")
+        _ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
+        _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
+        _ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
         return self
 
 
diff --git a/llama_stack/core/prompts/prompts.py b/llama_stack/core/prompts/prompts.py
index ee8c42596..856397ca5 100644
--- a/llama_stack/core/prompts/prompts.py
+++ b/llama_stack/core/prompts/prompts.py
@@ -41,9 +41,9 @@ class PromptServiceImpl(Prompts):
 
     async def initialize(self) -> None:
         # Use metadata store backend with prompts-specific namespace
-        metadata_ref = self.config.run_config.metadata_store
+        metadata_ref = self.config.run_config.storage.stores.metadata
         if not metadata_ref:
-            raise ValueError("metadata_store must be configured in run config")
+            raise ValueError("storage.stores.metadata must be configured in run config")
         prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
         self.kvstore = await kvstore_impl(prompts_ref)
 
diff --git a/llama_stack/core/routers/__init__.py b/llama_stack/core/routers/__init__.py
index 62da054de..0573fc2c7 100644
--- a/llama_stack/core/routers/__init__.py
+++ b/llama_stack/core/routers/__init__.py
@@ -80,9 +80,9 @@ async def get_auto_router_impl(
 
     # TODO: move pass configs to routers instead
     if api == Api.inference:
-        inference_ref = run_config.inference_store
+        inference_ref = run_config.storage.stores.inference
         if not inference_ref:
-            raise ValueError("inference_store must be configured in run config")
+            raise ValueError("storage.stores.inference must be configured in run config")
 
         inference_store = InferenceStore(
             reference=inference_ref,
diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py
index 95e9a28b0..1222eff77 100644
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@@ -368,9 +368,10 @@ class Stack:
                 logger.info(f"API recording enabled: mode={os.environ.get('LLAMA_STACK_TEST_INFERENCE_MODE')}")
 
         _initialize_storage(self.run_config)
-        if not self.run_config.metadata_store:
-            raise ValueError("metadata_store must be configured with a kv_* backend")
-        dist_registry, _ = await create_dist_registry(self.run_config.metadata_store, self.run_config.image_name)
+        stores = self.run_config.storage.stores
+        if not stores.metadata:
+            raise ValueError("storage.stores.metadata must be configured with a kv_* backend")
+        dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name)
         policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
 
         internal_impls = {}
diff --git a/llama_stack/core/storage/datatypes.py b/llama_stack/core/storage/datatypes.py
index 4c3f1b99e..9df170e10 100644
--- a/llama_stack/core/storage/datatypes.py
+++ b/llama_stack/core/storage/datatypes.py
@@ -72,7 +72,7 @@ class SqliteKVStoreConfig(CommonConfig):
 class PostgresKVStoreConfig(CommonConfig):
     type: Literal[StorageBackendType.KV_POSTGRES] = StorageBackendType.KV_POSTGRES
     host: str = "localhost"
-    port: int = 5432
+    port: int | str = 5432
     db: str = "llamastack"
     user: str
     password: str | None = None
@@ -175,7 +175,7 @@ class SqliteSqlStoreConfig(SqlAlchemySqlStoreConfig):
 class PostgresSqlStoreConfig(SqlAlchemySqlStoreConfig):
     type: Literal[StorageBackendType.SQL_POSTGRES] = StorageBackendType.SQL_POSTGRES
     host: str = "localhost"
-    port: int = 5432
+    port: int | str = 5432
     db: str = "llamastack"
     user: str
     password: str | None = None
@@ -254,7 +254,30 @@ class ResponsesStoreReference(InferenceStoreReference):
     """Responses store configuration with queue tuning."""
 
 
+class ServerStoresConfig(BaseModel):
+    metadata: KVStoreReference | None = Field(
+        default=None,
+        description="Metadata store configuration (uses KV backend)",
+    )
+    inference: InferenceStoreReference | None = Field(
+        default=None,
+        description="Inference store configuration (uses SQL backend)",
+    )
+    conversations: SqlStoreReference | None = Field(
+        default=None,
+        description="Conversations store configuration (uses SQL backend)",
+    )
+    responses: ResponsesStoreReference | None = Field(
+        default=None,
+        description="Responses store configuration (uses SQL backend)",
+    )
+
+
 class StorageConfig(BaseModel):
     backends: dict[str, StorageBackendConfig] = Field(
         description="Named backend configurations (e.g., 'default', 'cache')",
     )
+    stores: ServerStoresConfig = Field(
+        default_factory=lambda: ServerStoresConfig(),
+        description="Named references to storage backends used by the stack core",
+    )
diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml
index de950d1b3..3e6f20900 100644
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@@ -220,17 +220,18 @@ storage:
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db
-metadata_store:
-  namespace: registry
-  backend: kv_default
-inference_store:
-  table_name: inference_store
-  backend: sql_default
-  max_write_queue_size: 10000
-  num_writers: 4
-conversations_store:
-  table_name: openai_conversations
-  backend: sql_default
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models: []
 shields:
 - shield_id: llama-guard
diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml
index a705afe94..a246755cc 100644
--- a/llama_stack/distributions/dell/run-with-safety.yaml
+++ b/llama_stack/distributions/dell/run-with-safety.yaml
@@ -97,17 +97,18 @@ storage:
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
-metadata_store:
-  namespace: registry
-  backend: kv_default
-inference_store:
-  table_name: inference_store
-  backend: sql_default
-  max_write_queue_size: 10000
-  num_writers: 4
-conversations_store:
-  table_name: openai_conversations
-  backend: sql_default
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml
index e1feb0c75..aa911c92f 100644
--- a/llama_stack/distributions/dell/run.yaml
+++ b/llama_stack/distributions/dell/run.yaml
@@ -93,17 +93,18 @@ storage:
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
-metadata_store:
-  namespace: registry
-  backend: kv_default
-inference_store:
-  table_name: inference_store
-  backend: sql_default
-  max_write_queue_size: 10000
-  num_writers: 4
-conversations_store:
-  table_name: openai_conversations
-  backend: sql_default
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
index 517886dba..c8426fe1b 100644
--- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
@@ -110,17 +110,18 @@ storage:
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
-metadata_store:
-  namespace: registry
-  backend: kv_default
-inference_store:
-  table_name: inference_store
-  backend: sql_default
-  max_write_queue_size: 10000
-  num_writers: 4
-conversations_store:
-  table_name: openai_conversations
-  backend: sql_default
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/distributions/meta-reference-gpu/run.yaml b/llama_stack/distributions/meta-reference-gpu/run.yaml
index 0133d5cd3..827d6d07f 100644
--- a/llama_stack/distributions/meta-reference-gpu/run.yaml
+++ b/llama_stack/distributions/meta-reference-gpu/run.yaml
@@ -100,17 +100,18 @@ storage:
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
-metadata_store:
-  namespace: registry
-  backend: kv_default
-inference_store:
-  table_name: inference_store
-  backend: sql_default
-  max_write_queue_size: 10000
-  num_writers: 4
-conversations_store:
-  table_name: openai_conversations
-  backend: sql_default
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/distributions/nvidia/run-with-safety.yaml b/llama_stack/distributions/nvidia/run-with-safety.yaml
index ae705977b..984b86578 100644
--- a/llama_stack/distributions/nvidia/run-with-safety.yaml
+++ b/llama_stack/distributions/nvidia/run-with-safety.yaml
@@ -99,17 +99,18 @@ storage:
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
-metadata_store:
-  namespace: registry
-  backend: kv_default
-inference_store:
-  table_name: inference_store
-  backend: sql_default
-  max_write_queue_size: 10000
-  num_writers: 4
-conversations_store:
-  table_name: openai_conversations
-  backend: sql_default
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/distributions/nvidia/run.yaml b/llama_stack/distributions/nvidia/run.yaml
index 65bc0ce9a..f3f4b73e4 100644
--- a/llama_stack/distributions/nvidia/run.yaml
+++ b/llama_stack/distributions/nvidia/run.yaml
@@ -88,17 +88,18 @@ storage:
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
-metadata_store:
-  namespace: registry
-  backend: kv_default
-inference_store:
-  table_name: inference_store
-  backend: sql_default
-  max_write_queue_size: 10000
-  num_writers: 4
-conversations_store:
-  table_name: openai_conversations
-  backend: sql_default
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models: []
 shields: []
 vector_dbs: []
diff --git a/llama_stack/distributions/open-benchmark/run.yaml b/llama_stack/distributions/open-benchmark/run.yaml
index e15cbedd8..ddaf3688d 100644
--- a/llama_stack/distributions/open-benchmark/run.yaml
+++ b/llama_stack/distributions/open-benchmark/run.yaml
@@ -130,17 +130,18 @@ storage:
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db
-metadata_store:
-  namespace: registry
-  backend: kv_default
-inference_store:
-  table_name: inference_store
-  backend: sql_default
-  max_write_queue_size: 10000
-  num_writers: 4
-conversations_store:
-  table_name: openai_conversations
-  backend: sql_default
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: gpt-4o
diff --git a/llama_stack/distributions/postgres-demo/run.yaml b/llama_stack/distributions/postgres-demo/run.yaml
index df979a8fe..7831b403d 100644
--- a/llama_stack/distributions/postgres-demo/run.yaml
+++ b/llama_stack/distributions/postgres-demo/run.yaml
@@ -75,17 +75,18 @@ storage:
       db: ${env.POSTGRES_DB:=llamastack}
       user: ${env.POSTGRES_USER:=llamastack}
       password: ${env.POSTGRES_PASSWORD:=llamastack}
-metadata_store:
-  namespace: registry
-  backend: kv_default
-inference_store:
-  table_name: inference_store
-  backend: sql_default
-  max_write_queue_size: 10000
-  num_writers: 4
-conversations_store:
-  table_name: openai_conversations
-  backend: sql_default
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml
index 357495368..f69ae2733 100644
--- a/llama_stack/distributions/starter-gpu/run.yaml
+++ b/llama_stack/distributions/starter-gpu/run.yaml
@@ -223,17 +223,18 @@ storage:
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db
-metadata_store:
-  namespace: registry
-  backend: kv_default
-inference_store:
-  table_name: inference_store
-  backend: sql_default
-  max_write_queue_size: 10000
-  num_writers: 4
-conversations_store:
-  table_name: openai_conversations
-  backend: sql_default
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models: []
 shields:
 - shield_id: llama-guard
diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml
index cc5fc92c4..99c425e5f 100644
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@@ -220,17 +220,18 @@ storage:
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db
-metadata_store:
-  namespace: registry
-  backend: kv_default
-inference_store:
-  table_name: inference_store
-  backend: sql_default
-  max_write_queue_size: 10000
-  num_writers: 4
-conversations_store:
-  table_name: openai_conversations
-  backend: sql_default
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models: []
 shields:
 - shield_id: llama-guard
diff --git a/llama_stack/distributions/template.py b/llama_stack/distributions/template.py
index f7671719c..542c7bea9 100644
--- a/llama_stack/distributions/template.py
+++ b/llama_stack/distributions/template.py
@@ -188,6 +188,7 @@ class RunConfigSettings(BaseModel):
     default_benchmarks: list[BenchmarkInput] | None = None
     telemetry: TelemetryConfig = Field(default_factory=lambda: TelemetryConfig(enabled=True))
     storage_backends: dict[str, Any] | None = None
+    storage_stores: dict[str, Any] | None = None
 
     def run_config(
         self,
@@ -241,19 +242,25 @@ class RunConfigSettings(BaseModel):
             ),
         }
 
-        storage_config = dict(backends=storage_backends)
-        metadata_store = KVStoreReference(
-            backend="kv_default",
-            namespace="registry",
-        ).model_dump(exclude_none=True)
-        inference_store = InferenceStoreReference(
-            backend="sql_default",
-            table_name="inference_store",
-        ).model_dump(exclude_none=True)
-        conversations_store = SqlStoreReference(
-            backend="sql_default",
-            table_name="openai_conversations",
-        ).model_dump(exclude_none=True)
+        storage_stores = self.storage_stores or {
+            "metadata": KVStoreReference(
+                backend="kv_default",
+                namespace="registry",
+            ).model_dump(exclude_none=True),
+            "inference": InferenceStoreReference(
+                backend="sql_default",
+                table_name="inference_store",
+            ).model_dump(exclude_none=True),
+            "conversations": SqlStoreReference(
+                backend="sql_default",
+                table_name="openai_conversations",
+            ).model_dump(exclude_none=True),
+        }
+
+        storage_config = dict(
+            backends=storage_backends,
+            stores=storage_stores,
+        )
 
         # Return a dict that matches StackRunConfig structure
         return {
@@ -263,9 +270,6 @@ class RunConfigSettings(BaseModel):
             "apis": apis,
             "providers": provider_configs,
             "storage": storage_config,
-            "metadata_store": metadata_store,
-            "inference_store": inference_store,
-            "conversations_store": conversations_store,
             "models": [m.model_dump(exclude_none=True) for m in (self.default_models or [])],
             "shields": [s.model_dump(exclude_none=True) for s in (self.default_shields or [])],
             "vector_dbs": [],
diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml
index 5a5343e88..f05f2d17c 100644
--- a/llama_stack/distributions/watsonx/run.yaml
+++ b/llama_stack/distributions/watsonx/run.yaml
@@ -103,17 +103,18 @@ storage:
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db
-metadata_store:
-  namespace: registry
-  backend: kv_default
-inference_store:
-  table_name: inference_store
-  backend: sql_default
-  max_write_queue_size: 10000
-  num_writers: 4
-conversations_store:
-  table_name: openai_conversations
-  backend: sql_default
+  stores:
+    metadata:
+      namespace: registry
+      backend: kv_default
+    inference:
+      table_name: inference_store
+      backend: sql_default
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      table_name: openai_conversations
+      backend: sql_default
 models: []
 shields: []
 vector_dbs: []
diff --git a/tests/integration/test_persistence_integration.py b/tests/integration/test_persistence_integration.py
index dcc4cb1af..e9b80dc0c 100644
--- a/tests/integration/test_persistence_integration.py
+++ b/tests/integration/test_persistence_integration.py
@@ -29,19 +29,20 @@ def test_starter_distribution_config_loads_and_resolves():
     assert isinstance(config.storage.backends["kv_default"], SqliteKVStoreConfig)
     assert isinstance(config.storage.backends["sql_default"], SqliteSqlStoreConfig)
 
-    assert config.metadata_store is not None
-    assert config.metadata_store.backend == "kv_default"
-    assert config.metadata_store.namespace == "registry"
+    stores = config.storage.stores
+    assert stores.metadata is not None
+    assert stores.metadata.backend == "kv_default"
+    assert stores.metadata.namespace == "registry"
 
-    assert config.inference_store is not None
-    assert config.inference_store.backend == "sql_default"
-    assert config.inference_store.table_name == "inference_store"
-    assert config.inference_store.max_write_queue_size > 0
-    assert config.inference_store.num_writers > 0
+    assert stores.inference is not None
+    assert stores.inference.backend == "sql_default"
+    assert stores.inference.table_name == "inference_store"
+    assert stores.inference.max_write_queue_size > 0
+    assert stores.inference.num_writers > 0
 
-    assert config.conversations_store is not None
-    assert config.conversations_store.backend == "sql_default"
-    assert config.conversations_store.table_name == "openai_conversations"
+    assert stores.conversations is not None
+    assert stores.conversations.backend == "sql_default"
+    assert stores.conversations.table_name == "openai_conversations"
 
 
 def test_postgres_demo_distribution_config_loads():
@@ -62,6 +63,9 @@ def test_postgres_demo_distribution_config_loads():
     kv_backend = config.storage.backends["kv_default"]
     assert isinstance(kv_backend, PostgresKVStoreConfig)
 
+    stores = config.storage.stores
     # Stores target the Postgres backends explicitly
-    assert config.metadata_store.backend == "kv_default"
-    assert config.inference_store.backend == "sql_default"
+    assert stores.metadata is not None
+    assert stores.metadata.backend == "kv_default"
+    assert stores.inference is not None
+    assert stores.inference.backend == "sql_default"
diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py
index daaf229e5..7b9f3ca0c 100644
--- a/tests/unit/cli/test_stack_config.py
+++ b/tests/unit/cli/test_stack_config.py
@@ -23,6 +23,27 @@ def config_with_image_name_int():
         image_name: 1234
         apis_to_serve: []
         built_at: {datetime.now().isoformat()}
+        storage:
+          backends:
+            kv_default:
+              type: kv_sqlite
+              db_path: /tmp/test_kv.db
+            sql_default:
+              type: sql_sqlite
+              db_path: /tmp/test_sql.db
+          stores:
+            metadata:
+              backend: kv_default
+              namespace: metadata
+            inference:
+              backend: sql_default
+              table_name: inference
+            conversations:
+              backend: sql_default
+              table_name: conversations
+            responses:
+              backend: sql_default
+              table_name: responses
         providers:
           inference:
             - provider_id: provider1
@@ -54,6 +75,27 @@ def up_to_date_config():
         image_name: foo
         apis_to_serve: []
         built_at: {datetime.now().isoformat()}
+        storage:
+          backends:
+            kv_default:
+              type: kv_sqlite
+              db_path: /tmp/test_kv.db
+            sql_default:
+              type: sql_sqlite
+              db_path: /tmp/test_sql.db
+          stores:
+            metadata:
+              backend: kv_default
+              namespace: metadata
+            inference:
+              backend: sql_default
+              table_name: inference
+            conversations:
+              backend: sql_default
+              table_name: conversations
+            responses:
+              backend: sql_default
+              table_name: responses
         providers:
           inference:
             - provider_id: provider1
diff --git a/tests/unit/conversations/test_conversations.py b/tests/unit/conversations/test_conversations.py
index 65c3e2333..ff6dd243d 100644
--- a/tests/unit/conversations/test_conversations.py
+++ b/tests/unit/conversations/test_conversations.py
@@ -20,7 +20,14 @@ from llama_stack.core.conversations.conversations import (
     ConversationServiceConfig,
     ConversationServiceImpl,
 )
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.core.datatypes import StackRunConfig
+from llama_stack.core.storage.datatypes import (
+    ServerStoresConfig,
+    SqliteSqlStoreConfig,
+    SqlStoreReference,
+    StorageConfig,
+)
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 
 
 @pytest.fixture
@@ -28,7 +35,18 @@ async def service():
     with tempfile.TemporaryDirectory() as tmpdir:
         db_path = Path(tmpdir) / "test_conversations.db"
 
-        config = ConversationServiceConfig(conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=[])
+        storage = StorageConfig(
+            backends={
+                "sql_test": SqliteSqlStoreConfig(db_path=str(db_path)),
+            },
+            stores=ServerStoresConfig(
+                conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"),
+            ),
+        )
+        register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
+        run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage)
+
+        config = ConversationServiceConfig(run_config=run_config, policy=[])
         service = ConversationServiceImpl(config, {})
         await service.initialize()
         yield service
@@ -121,9 +139,18 @@ async def test_policy_configuration():
             AccessRule(forbid=Scope(principal="test_user", actions=[Action.CREATE, Action.READ], resource="*"))
         ]
 
-        config = ConversationServiceConfig(
-            conversations_store=SqliteSqlStoreConfig(db_path=str(db_path)), policy=restrictive_policy
+        storage = StorageConfig(
+            backends={
+                "sql_test": SqliteSqlStoreConfig(db_path=str(db_path)),
+            },
+            stores=ServerStoresConfig(
+                conversations=SqlStoreReference(backend="sql_test", table_name="openai_conversations"),
+            ),
         )
+        register_sqlstore_backends({"sql_test": storage.backends["sql_test"]})
+        run_config = StackRunConfig(image_name="test", apis=[], providers={}, storage=storage)
+
+        config = ConversationServiceConfig(run_config=run_config, policy=restrictive_policy)
         service = ConversationServiceImpl(config, {})
         await service.initialize()
 
diff --git a/tests/unit/core/test_storage_references.py b/tests/unit/core/test_storage_references.py
index 206b90304..7bceba74d 100644
--- a/tests/unit/core/test_storage_references.py
+++ b/tests/unit/core/test_storage_references.py
@@ -16,6 +16,7 @@ from llama_stack.core.datatypes import (
 from llama_stack.core.storage.datatypes import (
     InferenceStoreReference,
     KVStoreReference,
+    ServerStoresConfig,
     SqliteKVStoreConfig,
     SqliteSqlStoreConfig,
     SqlStoreReference,
@@ -24,13 +25,30 @@ from llama_stack.core.storage.datatypes import (
 
 
 def _base_run_config(**overrides):
+    metadata_reference = overrides.pop(
+        "metadata_reference",
+        KVStoreReference(backend="kv_default", namespace="registry"),
+    )
+    inference_reference = overrides.pop(
+        "inference_reference",
+        InferenceStoreReference(backend="sql_default", table_name="inference"),
+    )
+    conversations_reference = overrides.pop(
+        "conversations_reference",
+        SqlStoreReference(backend="sql_default", table_name="conversations"),
+    )
     storage = overrides.pop(
         "storage",
         StorageConfig(
             backends={
                 "kv_default": SqliteKVStoreConfig(db_path="/tmp/kv.db"),
                 "sql_default": SqliteSqlStoreConfig(db_path="/tmp/sql.db"),
-            }
+            },
+            stores=ServerStoresConfig(
+                metadata=metadata_reference,
+                inference=inference_reference,
+                conversations=conversations_reference,
+            ),
         ),
     )
     return StackRunConfig(
@@ -39,39 +57,28 @@ def _base_run_config(**overrides):
         apis=[],
         providers={},
         storage=storage,
-        metadata_store=overrides.pop(
-            "metadata_store",
-            KVStoreReference(backend="kv_default", namespace="registry"),
-        ),
-        inference_store=overrides.pop(
-            "inference_store",
-            InferenceStoreReference(backend="sql_default", table_name="inference"),
-        ),
-        conversations_store=overrides.pop(
-            "conversations_store",
-            SqlStoreReference(backend="sql_default", table_name="conversations"),
-        ),
         **overrides,
     )
 
 
 def test_references_require_known_backend():
     with pytest.raises(ValidationError, match="unknown backend 'missing'"):
-        _base_run_config(metadata_store=KVStoreReference(backend="missing", namespace="registry"))
+        _base_run_config(metadata_reference=KVStoreReference(backend="missing", namespace="registry"))
 
 
 def test_references_must_match_backend_family():
     with pytest.raises(ValidationError, match="kv_.* is required"):
-        _base_run_config(metadata_store=KVStoreReference(backend="sql_default", namespace="registry"))
+        _base_run_config(metadata_reference=KVStoreReference(backend="sql_default", namespace="registry"))
 
     with pytest.raises(ValidationError, match="sql_.* is required"):
         _base_run_config(
-            inference_store=InferenceStoreReference(backend="kv_default", table_name="inference"),
+            inference_reference=InferenceStoreReference(backend="kv_default", table_name="inference"),
         )
 
 
 def test_valid_configuration_passes_validation():
     config = _base_run_config()
-    assert config.metadata_store.backend == "kv_default"
-    assert config.inference_store.backend == "sql_default"
-    assert config.conversations_store.backend == "sql_default"
+    stores = config.storage.stores
+    assert stores.metadata is not None and stores.metadata.backend == "kv_default"
+    assert stores.inference is not None and stores.inference.backend == "sql_default"
+    assert stores.conversations is not None and stores.conversations.backend == "sql_default"
diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py
index 788585328..3b0643a13 100644
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@@ -16,6 +16,7 @@ from llama_stack.core.distribution import INTERNAL_APIS, get_provider_registry,
 from llama_stack.core.storage.datatypes import (
     InferenceStoreReference,
     KVStoreReference,
+    ServerStoresConfig,
     SqliteKVStoreConfig,
     SqliteSqlStoreConfig,
     SqlStoreReference,
@@ -42,35 +43,25 @@ def _default_storage() -> StorageConfig:
         backends={
             "kv_default": SqliteKVStoreConfig(db_path=":memory:"),
             "sql_default": SqliteSqlStoreConfig(db_path=":memory:"),
-        }
+        },
+        stores=ServerStoresConfig(
+            metadata=KVStoreReference(backend="kv_default", namespace="registry"),
+            inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
+            conversations=SqlStoreReference(backend="sql_default", table_name="conversations"),
+        ),
     )
 
 
 def make_stack_config(**overrides) -> StackRunConfig:
     storage = overrides.pop("storage", _default_storage())
-    metadata_store = overrides.pop(
-        "metadata_store",
-        KVStoreReference(backend="kv_default", namespace="registry"),
-    )
-    inference_store = overrides.pop(
-        "inference_store",
-        InferenceStoreReference(backend="sql_default", table_name="inference_store"),
-    )
-    conversations_store = overrides.pop(
-        "conversations_store",
-        SqlStoreReference(backend="sql_default", table_name="conversations"),
-    )
     defaults = dict(
         image_name="test_image",
         apis=[],
         providers={},
         storage=storage,
-        metadata_store=metadata_store,
-        inference_store=inference_store,
-        conversations_store=conversations_store,
     )
     defaults.update(overrides)
-    return make_stack_config(**defaults)
+    return StackRunConfig(**defaults)
 
 
 @pytest.fixture
diff --git a/tests/unit/prompts/prompts/conftest.py b/tests/unit/prompts/prompts/conftest.py
index 0fa52193b..fe30e1a77 100644
--- a/tests/unit/prompts/prompts/conftest.py
+++ b/tests/unit/prompts/prompts/conftest.py
@@ -12,6 +12,7 @@ from llama_stack.core.prompts.prompts import PromptServiceConfig, PromptServiceI
 from llama_stack.core.storage.datatypes import (
     InferenceStoreReference,
     KVStoreReference,
+    ServerStoresConfig,
     SqliteKVStoreConfig,
     SqliteSqlStoreConfig,
     SqlStoreReference,
@@ -32,16 +33,18 @@ async def temp_prompt_store(tmp_path_factory):
         backends={
             "kv_test": SqliteKVStoreConfig(db_path=db_path),
             "sql_test": SqliteSqlStoreConfig(db_path=str(temp_dir / f"{unique_id}_sql.db")),
-        }
+        },
+        stores=ServerStoresConfig(
+            metadata=KVStoreReference(backend="kv_test", namespace="registry"),
+            inference=InferenceStoreReference(backend="sql_test", table_name="inference"),
+            conversations=SqlStoreReference(backend="sql_test", table_name="conversations"),
+        ),
     )
     mock_run_config = StackRunConfig(
         image_name="test-distribution",
         apis=[],
         providers={},
         storage=storage,
-        metadata_store=KVStoreReference(backend="kv_test", namespace="registry"),
-        inference_store=InferenceStoreReference(backend="sql_test", table_name="inference"),
-        conversations_store=SqlStoreReference(backend="sql_test", table_name="conversations"),
     )
     config = PromptServiceConfig(run_config=mock_run_config)
     store = PromptServiceImpl(config, deps={})
diff --git a/tests/unit/providers/agent/test_meta_reference_agent.py b/tests/unit/providers/agent/test_meta_reference_agent.py
index cfb3e1327..48fb7083a 100644
--- a/tests/unit/providers/agent/test_meta_reference_agent.py
+++ b/tests/unit/providers/agent/test_meta_reference_agent.py
@@ -26,6 +26,24 @@ from llama_stack.providers.inline.agents.meta_reference.config import MetaRefere
 from llama_stack.providers.inline.agents.meta_reference.persistence import AgentInfo
 
 
+@pytest.fixture(autouse=True)
+def setup_backends(tmp_path):
+    """Register KV and SQL store backends for testing."""
+    from llama_stack.core.storage.datatypes import SqliteSqlStoreConfig, SqliteKVStoreConfig
+    from llama_stack.providers.utils.kvstore.kvstore import register_kvstore_backends
+    from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+
+    kv_path = str(tmp_path / "test_kv.db")
+    sql_path = str(tmp_path / "test_sql.db")
+
+    register_kvstore_backends({
+        "kv_default": SqliteKVStoreConfig(db_path=kv_path)
+    })
+    register_sqlstore_backends({
+        "sql_default": SqliteSqlStoreConfig(db_path=sql_path)
+    })
+
+
 @pytest.fixture
 def mock_apis():
     return {
@@ -40,15 +58,20 @@ def mock_apis():
 
 @pytest.fixture
 def config(tmp_path):
+    from llama_stack.core.storage.datatypes import KVStoreReference, ResponsesStoreReference
+    from llama_stack.providers.inline.agents.meta_reference.config import AgentPersistenceConfig
+
     return MetaReferenceAgentsImplConfig(
-        persistence_store={
-            "type": "sqlite",
-            "db_path": str(tmp_path / "test.db"),
-        },
-        responses_store={
-            "type": "sqlite",
-            "db_path": str(tmp_path / "test.db"),
-        },
+        persistence=AgentPersistenceConfig(
+            agent_state=KVStoreReference(
+                backend="kv_default",
+                namespace="agents",
+            ),
+            responses=ResponsesStoreReference(
+                backend="sql_default",
+                table_name="responses",
+            ),
+        )
     )
 
 
diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py
index 5620c7fa5..b44f12f7e 100644
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@@ -19,12 +19,15 @@ from llama_stack.core.routing_tables.models import ModelsRoutingTable
 from llama_stack.core.storage.datatypes import (
     InferenceStoreReference,
     KVStoreReference,
+    ServerStoresConfig,
     SqliteKVStoreConfig,
     SqliteSqlStoreConfig,
     SqlStoreReference,
     StorageConfig,
 )
 from llama_stack.providers.datatypes import InlineProviderSpec, ProviderSpec
+from llama_stack.providers.utils.kvstore import register_kvstore_backends
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
 
 
 def add_protocol_methods(cls: type, protocol: type[Protocol]) -> None:
@@ -76,26 +79,23 @@ def make_run_config(**overrides) -> StackRunConfig:
             backends={
                 "kv_default": SqliteKVStoreConfig(db_path=":memory:"),
                 "sql_default": SqliteSqlStoreConfig(db_path=":memory:"),
-            }
+            },
+            stores=ServerStoresConfig(
+                metadata=KVStoreReference(backend="kv_default", namespace="registry"),
+                inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
+                conversations=SqlStoreReference(backend="sql_default", table_name="conversations"),
+            ),
         ),
     )
+    register_kvstore_backends({name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("kv_")})
+    register_sqlstore_backends(
+        {name: cfg for name, cfg in storage.backends.items() if cfg.type.value.startswith("sql_")}
+    )
     defaults = dict(
         image_name="test_image",
         apis=[],
         providers={},
         storage=storage,
-        metadata_store=overrides.pop(
-            "metadata_store",
-            KVStoreReference(backend="kv_default", namespace="registry"),
-        ),
-        inference_store=overrides.pop(
-            "inference_store",
-            InferenceStoreReference(backend="sql_default", table_name="inference_store"),
-        ),
-        conversations_store=overrides.pop(
-            "conversations_store",
-            SqlStoreReference(backend="sql_default", table_name="conversations"),
-        ),
     )
     defaults.update(overrides)
     return StackRunConfig(**defaults)
diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py
index f6d63490a..4f6ba817e 100644
--- a/tests/unit/utils/inference/test_inference_store.py
+++ b/tests/unit/utils/inference/test_inference_store.py
@@ -16,8 +16,18 @@ from llama_stack.apis.inference import (
     OpenAIUserMessageParam,
     Order,
 )
+from llama_stack.core.storage.datatypes import InferenceStoreReference, SqliteSqlStoreConfig
 from llama_stack.providers.utils.inference.inference_store import InferenceStore
-from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
+from llama_stack.providers.utils.sqlstore.sqlstore import register_sqlstore_backends
+
+
+@pytest.fixture(autouse=True)
+def setup_backends(tmp_path):
+    """Register SQL store backends for testing."""
+    db_path = str(tmp_path / "test.db")
+    register_sqlstore_backends({
+        "sql_default": SqliteSqlStoreConfig(db_path=db_path)
+    })
 
 
 def create_test_chat_completion(
@@ -44,167 +54,162 @@ def create_test_chat_completion(
 
 async def test_inference_store_pagination_basic():
     """Test basic pagination functionality."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()
 
-        # Create test data with different timestamps
-        base_time = int(time.time())
-        test_data = [
-            ("zebra-task", base_time + 1),
-            ("apple-job", base_time + 2),
-            ("moon-work", base_time + 3),
-            ("banana-run", base_time + 4),
-            ("car-exec", base_time + 5),
-        ]
+    # Create test data with different timestamps
+    base_time = int(time.time())
+    test_data = [
+        ("zebra-task", base_time + 1),
+        ("apple-job", base_time + 2),
+        ("moon-work", base_time + 3),
+        ("banana-run", base_time + 4),
+        ("car-exec", base_time + 5),
+    ]
 
-        # Store test chat completions
-        for completion_id, timestamp in test_data:
-            completion = create_test_chat_completion(completion_id, timestamp)
-            input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
-            await store.store_chat_completion(completion, input_messages)
+    # Store test chat completions
+    for completion_id, timestamp in test_data:
+        completion = create_test_chat_completion(completion_id, timestamp)
+        input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
+        await store.store_chat_completion(completion, input_messages)
 
-        # Wait for all queued writes to complete
-        await store.flush()
+    # Wait for all queued writes to complete
+    await store.flush()
 
-        # Test 1: First page with limit=2, descending order (default)
-        result = await store.list_chat_completions(limit=2, order=Order.desc)
-        assert len(result.data) == 2
-        assert result.data[0].id == "car-exec"  # Most recent first
-        assert result.data[1].id == "banana-run"
-        assert result.has_more is True
-        assert result.last_id == "banana-run"
+    # Test 1: First page with limit=2, descending order (default)
+    result = await store.list_chat_completions(limit=2, order=Order.desc)
+    assert len(result.data) == 2
+    assert result.data[0].id == "car-exec"  # Most recent first
+    assert result.data[1].id == "banana-run"
+    assert result.has_more is True
+    assert result.last_id == "banana-run"
 
-        # Test 2: Second page using 'after' parameter
-        result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc)
-        assert len(result2.data) == 2
-        assert result2.data[0].id == "moon-work"
-        assert result2.data[1].id == "apple-job"
-        assert result2.has_more is True
+    # Test 2: Second page using 'after' parameter
+    result2 = await store.list_chat_completions(after="banana-run", limit=2, order=Order.desc)
+    assert len(result2.data) == 2
+    assert result2.data[0].id == "moon-work"
+    assert result2.data[1].id == "apple-job"
+    assert result2.has_more is True
 
-        # Test 3: Final page
-        result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc)
-        assert len(result3.data) == 1
-        assert result3.data[0].id == "zebra-task"
-        assert result3.has_more is False
+    # Test 3: Final page
+    result3 = await store.list_chat_completions(after="apple-job", limit=2, order=Order.desc)
+    assert len(result3.data) == 1
+    assert result3.data[0].id == "zebra-task"
+    assert result3.has_more is False
 
 
 async def test_inference_store_pagination_ascending():
     """Test pagination with ascending order."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()
 
-        # Create test data
-        base_time = int(time.time())
-        test_data = [
-            ("delta-item", base_time + 1),
-            ("charlie-task", base_time + 2),
-            ("alpha-work", base_time + 3),
-        ]
+    # Create test data
+    base_time = int(time.time())
+    test_data = [
+        ("delta-item", base_time + 1),
+        ("charlie-task", base_time + 2),
+        ("alpha-work", base_time + 3),
+    ]
 
-        # Store test chat completions
-        for completion_id, timestamp in test_data:
-            completion = create_test_chat_completion(completion_id, timestamp)
-            input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
-            await store.store_chat_completion(completion, input_messages)
+    # Store test chat completions
+    for completion_id, timestamp in test_data:
+        completion = create_test_chat_completion(completion_id, timestamp)
+        input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
+        await store.store_chat_completion(completion, input_messages)
 
-        # Wait for all queued writes to complete
-        await store.flush()
+    # Wait for all queued writes to complete
+    await store.flush()
 
-        # Test ascending order pagination
-        result = await store.list_chat_completions(limit=1, order=Order.asc)
-        assert len(result.data) == 1
-        assert result.data[0].id == "delta-item"  # Oldest first
-        assert result.has_more is True
+    # Test ascending order pagination
+    result = await store.list_chat_completions(limit=1, order=Order.asc)
+    assert len(result.data) == 1
+    assert result.data[0].id == "delta-item"  # Oldest first
+    assert result.has_more is True
 
-        # Second page with ascending order
-        result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc)
-        assert len(result2.data) == 1
-        assert result2.data[0].id == "charlie-task"
-        assert result2.has_more is True
+    # Second page with ascending order
+    result2 = await store.list_chat_completions(after="delta-item", limit=1, order=Order.asc)
+    assert len(result2.data) == 1
+    assert result2.data[0].id == "charlie-task"
+    assert result2.has_more is True
 
 
 async def test_inference_store_pagination_with_model_filter():
     """Test pagination combined with model filtering."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()
 
-        # Create test data with different models
-        base_time = int(time.time())
-        test_data = [
-            ("xyz-task", base_time + 1, "model-a"),
-            ("def-work", base_time + 2, "model-b"),
-            ("pqr-job", base_time + 3, "model-a"),
-            ("abc-run", base_time + 4, "model-b"),
-        ]
+    # Create test data with different models
+    base_time = int(time.time())
+    test_data = [
+        ("xyz-task", base_time + 1, "model-a"),
+        ("def-work", base_time + 2, "model-b"),
+        ("pqr-job", base_time + 3, "model-a"),
+        ("abc-run", base_time + 4, "model-b"),
+    ]
 
-        # Store test chat completions
-        for completion_id, timestamp, model in test_data:
-            completion = create_test_chat_completion(completion_id, timestamp, model)
-            input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
-            await store.store_chat_completion(completion, input_messages)
+    # Store test chat completions
+    for completion_id, timestamp, model in test_data:
+        completion = create_test_chat_completion(completion_id, timestamp, model)
+        input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
+        await store.store_chat_completion(completion, input_messages)
 
-        # Wait for all queued writes to complete
-        await store.flush()
+    # Wait for all queued writes to complete
+    await store.flush()
 
-        # Test pagination with model filter
-        result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc)
-        assert len(result.data) == 1
-        assert result.data[0].id == "pqr-job"  # Most recent model-a
-        assert result.data[0].model == "model-a"
-        assert result.has_more is True
+    # Test pagination with model filter
+    result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc)
+    assert len(result.data) == 1
+    assert result.data[0].id == "pqr-job"  # Most recent model-a
+    assert result.data[0].model == "model-a"
+    assert result.has_more is True
 
-        # Second page with model filter
-        result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc)
-        assert len(result2.data) == 1
-        assert result2.data[0].id == "xyz-task"
-        assert result2.data[0].model == "model-a"
-        assert result2.has_more is False
+    # Second page with model filter
+    result2 = await store.list_chat_completions(after="pqr-job", limit=1, model="model-a", order=Order.desc)
+    assert len(result2.data) == 1
+    assert result2.data[0].id == "xyz-task"
+    assert result2.data[0].model == "model-a"
+    assert result2.has_more is False
 
 
 async def test_inference_store_pagination_invalid_after():
     """Test error handling for invalid 'after' parameter."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()
 
-        # Try to paginate with non-existent ID
-        with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"):
-            await store.list_chat_completions(after="non-existent", limit=2)
+    # Try to paginate with non-existent ID
+    with pytest.raises(ValueError, match="Record with id='non-existent' not found in table 'chat_completions'"):
+        await store.list_chat_completions(after="non-existent", limit=2)
 
 
 async def test_inference_store_pagination_no_limit():
     """Test pagination behavior when no limit is specified."""
-    with TemporaryDirectory() as tmp_dir:
-        db_path = tmp_dir + "/test.db"
-        store = InferenceStore(SqliteSqlStoreConfig(db_path=db_path), policy=[])
-        await store.initialize()
+    reference = InferenceStoreReference(backend="sql_default", table_name="chat_completions")
+    store = InferenceStore(reference, policy=[])
+    await store.initialize()
 
-        # Create test data
-        base_time = int(time.time())
-        test_data = [
-            ("omega-first", base_time + 1),
-            ("beta-second", base_time + 2),
-        ]
+    # Create test data
+    base_time = int(time.time())
+    test_data = [
+        ("omega-first", base_time + 1),
+        ("beta-second", base_time + 2),
+    ]
 
-        # Store test chat completions
-        for completion_id, timestamp in test_data:
-            completion = create_test_chat_completion(completion_id, timestamp)
-            input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
-            await store.store_chat_completion(completion, input_messages)
+    # Store test chat completions
+    for completion_id, timestamp in test_data:
+        completion = create_test_chat_completion(completion_id, timestamp)
+        input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
+        await store.store_chat_completion(completion, input_messages)
 
-        # Wait for all queued writes to complete
-        await store.flush()
+    # Wait for all queued writes to complete
+    await store.flush()
 
-        # Test without limit
-        result = await store.list_chat_completions(order=Order.desc)
-        assert len(result.data) == 2
-        assert result.data[0].id == "beta-second"  # Most recent first
-        assert result.data[1].id == "omega-first"
-        assert result.has_more is False
+    # Test without limit
+    result = await store.list_chat_completions(order=Order.desc)
+    assert len(result.data) == 2
+    assert result.data[0].id == "beta-second"  # Most recent first
+    assert result.data[1].id == "omega-first"
+    assert result.has_more is False