From 98a5047f9d82dd95b04cf6bf4e570342db9f7282 Mon Sep 17 00:00:00 2001
From: IAN MILLER <75687988+r3v5@users.noreply.github.com>
Date: Mon, 27 Oct 2025 18:12:12 +0000
Subject: [PATCH] feat(prompts): attach prompts to storage stores in run
 configs (#3893)

# What does this PR do?
<!-- Provide a short summary of what this PR does and why. Link to
relevant issues if applicable. -->
This PR is responsible for attaching prompts to storage stores in run
configs. It allows to specify prompts as stores in different
distributions. The need of this functionality was initiated in #3514

> Note, #3514 is divided on three separate PRs. Current PR is the first
of three.

<!-- If resolving an issue, uncomment and update the line below -->
<!-- Closes #[issue-number] -->

## Test Plan
<!-- Describe the tests you ran to verify your changes with result
summaries. *Provide clear instructions so the plan can be easily
re-executed.* -->
Manual testing and updated CI unit tests

Prerequisites:

1. `uv run --with llama-stack llama stack list-deps starter | xargs -L1
uv pip install`

2. `llama stack run starter `

```
INFO     2025-10-23 15:36:17,387 llama_stack.cli.stack.run:100 cli: Using run configuration:
         /Users/ianmiller/llama-stack/llama_stack/distributions/starter/run.yaml
INFO     2025-10-23 15:36:17,423 llama_stack.cli.stack.run:157 cli: HTTPS enabled with certificates:
           Key: None
           Cert: None
INFO     2025-10-23 15:36:17,424 llama_stack.cli.stack.run:159 cli: Listening on ['::', '0.0.0.0']:8321
INFO     2025-10-23 15:36:17,749 llama_stack.core.server.server:521 core::server: Run configuration:
INFO     2025-10-23 15:36:17,756 llama_stack.core.server.server:524 core::server: apis:
         - agents
         - batches
         - datasetio
         - eval
         - files
         - inference
         - post_training
         - safety
         - scoring
         - tool_runtime
         - vector_io
         image_name: starter
         providers:
           agents:
           - config:
               persistence:
                 agent_state:
                   backend: kv_default
                   namespace: agents
                 responses:
                   backend: sql_default
                   max_write_queue_size: 10000
                   num_writers: 4
                   table_name: responses
             provider_id: meta-reference
             provider_type: inline::meta-reference
           batches:
           - config:
               kvstore:
                 backend: kv_default
                 namespace: batches
             provider_id: reference
             provider_type: inline::reference
           datasetio:
           - config:
               kvstore:
                 backend: kv_default
                 namespace: datasetio::huggingface
             provider_id: huggingface
             provider_type: remote::huggingface
           - config:
               kvstore:
                 backend: kv_default
                 namespace: datasetio::localfs
             provider_id: localfs
             provider_type: inline::localfs
           eval:
           - config:
               kvstore:
                 backend: kv_default
                 namespace: eval
             provider_id: meta-reference
             provider_type: inline::meta-reference
           files:
           - config:
               metadata_store:
                 backend: sql_default
                 table_name: files_metadata
               storage_dir: /Users/ianmiller/.llama/distributions/starter/files
             provider_id: meta-reference-files
             provider_type: inline::localfs
           inference:
           - config:
               api_key: '********'
               url: https://api.fireworks.ai/inference/v1
             provider_id: fireworks
             provider_type: remote::fireworks
           - config:
               api_key: '********'
               url: https://api.together.xyz/v1
             provider_id: together
             provider_type: remote::together
           - config: {}
             provider_id: bedrock
             provider_type: remote::bedrock
           - config:
               api_key: '********'
               base_url: https://api.openai.com/v1
             provider_id: openai
             provider_type: remote::openai
           - config:
               api_key: '********'
             provider_id: anthropic
             provider_type: remote::anthropic
           - config:
               api_key: '********'
             provider_id: gemini
             provider_type: remote::gemini
           - config:
               api_key: '********'
               url: https://api.groq.com
             provider_id: groq
             provider_type: remote::groq
           - config:
               api_key: '********'
               url: https://api.sambanova.ai/v1
             provider_id: sambanova
             provider_type: remote::sambanova
           - config: {}
             provider_id: sentence-transformers
             provider_type: inline::sentence-transformers
           post_training:
           - config:
               checkpoint_format: meta
             provider_id: torchtune-cpu
             provider_type: inline::torchtune-cpu
           safety:
           - config:
               excluded_categories: []
             provider_id: llama-guard
             provider_type: inline::llama-guard
           - config: {}
             provider_id: code-scanner
             provider_type: inline::code-scanner
           scoring:
           - config: {}
             provider_id: basic
             provider_type: inline::basic
           - config: {}
             provider_id: llm-as-judge
             provider_type: inline::llm-as-judge
           - config:
               openai_api_key: '********'
             provider_id: braintrust
             provider_type: inline::braintrust
           tool_runtime:
           - config:
               api_key: '********'
               max_results: 3
             provider_id: brave-search
             provider_type: remote::brave-search
           - config:
               api_key: '********'
               max_results: 3
             provider_id: tavily-search
             provider_type: remote::tavily-search
           - config: {}
             provider_id: rag-runtime
             provider_type: inline::rag-runtime
           - config: {}
             provider_id: model-context-protocol
             provider_type: remote::model-context-protocol
           vector_io:
           - config:
               persistence:
                 backend: kv_default
                 namespace: vector_io::faiss
             provider_id: faiss
             provider_type: inline::faiss
           - config:
               db_path: /Users/ianmiller/.llama/distributions/starter/sqlite_vec.db
               persistence:
                 backend: kv_default
                 namespace: vector_io::sqlite_vec
             provider_id: sqlite-vec
             provider_type: inline::sqlite-vec
         registered_resources:
           benchmarks: []
           datasets: []
           models: []
           scoring_fns: []
           shields: []
           tool_groups:
           - provider_id: tavily-search
             toolgroup_id: builtin::websearch
           - provider_id: rag-runtime
             toolgroup_id: builtin::rag
           vector_stores: []
         server:
           port: 8321
         storage:
           backends:
             kv_default:
               db_path: /Users/ianmiller/.llama/distributions/starter/kvstore.db
               type: kv_sqlite
             sql_default:
               db_path: /Users/ianmiller/.llama/distributions/starter/sql_store.db
               type: sql_sqlite
           stores:
             conversations:
               backend: sql_default
               table_name: openai_conversations
             inference:
               backend: sql_default
               max_write_queue_size: 10000
               num_writers: 4
               table_name: inference_store
             metadata:
               backend: kv_default
               namespace: registry
             prompts:
               backend: kv_default
               namespace: prompts
         telemetry:
           enabled: true
         vector_stores:
           default_embedding_model:
             model_id: nomic-ai/nomic-embed-text-v1.5
             provider_id: sentence-transformers
           default_provider_id: faiss
         version: 2

INFO     2025-10-23 15:36:20,032 llama_stack.providers.utils.inference.inference_store:74 inference: Write queue
         disabled for SQLite to avoid concurrency issues
WARNING  2025-10-23 15:36:20,422 llama_stack.providers.inline.telemetry.meta_reference.telemetry:84 telemetry:
         OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry
INFO     2025-10-23 15:36:22,379 llama_stack.providers.utils.inference.openai_mixin:436 providers::utils:
         OpenAIInferenceAdapter.list_provider_model_ids() returned 105 models
INFO     2025-10-23 15:36:22,703 uvicorn.error:84 uncategorized: Started server process [17328]
INFO     2025-10-23 15:36:22,704 uvicorn.error:48 uncategorized: Waiting for application startup.
INFO     2025-10-23 15:36:22,706 llama_stack.core.server.server:179 core::server: Starting up Llama Stack server
         (version: 0.3.0)
INFO     2025-10-23 15:36:22,707 llama_stack.core.stack:470 core: starting registry refresh task
INFO     2025-10-23 15:36:22,708 uvicorn.error:62 uncategorized: Application startup complete.
INFO     2025-10-23 15:36:22,708 uvicorn.error:216 uncategorized: Uvicorn running on http://['::', '0.0.0.0']:8321
         (Press CTRL+C to quit)
```
As you can see, prompts are attached to stores in config

Testing:

1. Create prompt:

```
curl -X POST http://localhost:8321/v1/prompts \
  -H "Content-Type: application/json" \
  -d '{
    "prompt": "Hello {{name}}! You are working at {{company}}. Your role is {{role}} at {{company}}. Remember, {{name}}, to be {{tone}}.",
    "variables": ["name", "company", "role", "tone"]
  }'
```

`{"prompt":"Hello {{name}}! You are working at {{company}}. Your role is
{{role}} at {{company}}. Remember, {{name}}, to be
{{tone}}.","version":1,"prompt_id":"pmpt_a90e09e67acfe23776f2778c603eb6c17e139dab5f6e163f","variables":["name","company","role","tone"],"is_default":false}%
`

2. Get prompt:

`curl -X GET
http://localhost:8321/v1/prompts/pmpt_a90e09e67acfe23776f2778c603eb6c17e139dab5f6e163f`

`{"prompt":"Hello {{name}}! You are working at {{company}}. Your role is
{{role}} at {{company}}. Remember, {{name}}, to be
{{tone}}.","version":1,"prompt_id":"pmpt_a90e09e67acfe23776f2778c603eb6c17e139dab5f6e163f","variables":["name","company","role","tone"],"is_default":false}%
`

3. Query sqlite KV storage to check created prompt:

```
sqlite> .mode column
sqlite> .headers on
sqlite> SELECT * FROM kvstore WHERE key LIKE 'prompts:v1:%';
key                                                           value                                                         expiration
------------------------------------------------------------  ------------------------------------------------------------  ----------
prompts:v1:pmpt_a90e09e67acfe23776f2778c603eb6c17e139dab5f6e  {"prompt_id": "pmpt_a90e09e67acfe23776f2778c603eb6c17e139dab
163f:1                                                        5f6e163f", "prompt": "Hello {{name}}! You are working at {{c
                                                              ompany}}. Your role is {{role}} at {{company}}. Remember, {{
                                                              name}}, to be {{tone}}.", "version": 1, "variables": ["name"
                                                              , "company", "role", "tone"], "is_default": false}

prompts:v1:pmpt_a90e09e67acfe23776f2778c603eb6c17e139dab5f6e  1
163f:default
sqlite>
```
---
 .github/workflows/integration-auth-tests.yml           |  3 +++
 benchmarking/k8s-benchmark/stack-configmap.yaml        | 10 +++++++++-
 benchmarking/k8s-benchmark/stack_run_config.yaml       |  3 +++
 docs/docs/distributions/configuration.mdx              | 10 +++++++++-
 docs/docs/distributions/k8s/stack-configmap.yaml       | 10 +++++++++-
 docs/docs/distributions/k8s/stack_run_config.yaml      |  3 +++
 llama_stack/core/datatypes.py                          |  1 +
 llama_stack/core/prompts/prompts.py                    | 10 ++++------
 llama_stack/core/stack.py                              |  1 +
 llama_stack/core/storage/datatypes.py                  |  4 ++++
 llama_stack/distributions/ci-tests/run.yaml            |  3 +++
 llama_stack/distributions/dell/run-with-safety.yaml    |  3 +++
 llama_stack/distributions/dell/run.yaml                |  3 +++
 .../meta-reference-gpu/run-with-safety.yaml            |  3 +++
 llama_stack/distributions/meta-reference-gpu/run.yaml  |  3 +++
 llama_stack/distributions/nvidia/run-with-safety.yaml  |  3 +++
 llama_stack/distributions/nvidia/run.yaml              |  3 +++
 llama_stack/distributions/open-benchmark/run.yaml      |  3 +++
 llama_stack/distributions/postgres-demo/run.yaml       |  3 +++
 llama_stack/distributions/starter-gpu/run.yaml         |  3 +++
 llama_stack/distributions/starter/run.yaml             |  3 +++
 llama_stack/distributions/template.py                  |  4 ++++
 llama_stack/distributions/watsonx/run.yaml             |  3 +++
 tests/external/run-byoa.yaml                           |  3 +++
 tests/unit/cli/test_stack_config.py                    |  3 +++
 tests/unit/distribution/test_distribution.py           |  1 +
 tests/unit/prompts/prompts/conftest.py                 |  5 +++--
 27 files changed, 96 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml
index 30a8063ea..c13ed6cbe 100644
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@@ -91,6 +91,9 @@ jobs:
               conversations:
                 table_name: openai_conversations
                 backend: sql_default
+              prompts:
+                namespace: prompts
+                backend: kv_default
           server:
             port: 8321
           EOF
diff --git a/benchmarking/k8s-benchmark/stack-configmap.yaml b/benchmarking/k8s-benchmark/stack-configmap.yaml
index 8fbf09fce..58518ec18 100644
--- a/benchmarking/k8s-benchmark/stack-configmap.yaml
+++ b/benchmarking/k8s-benchmark/stack-configmap.yaml
@@ -107,13 +107,21 @@ data:
           db: ${env.POSTGRES_DB:=llamastack}
           user: ${env.POSTGRES_USER:=llamastack}
           password: ${env.POSTGRES_PASSWORD:=llamastack}
-      references:
+      stores:
         metadata:
           backend: kv_default
           namespace: registry
         inference:
           backend: sql_default
           table_name: inference_store
+          max_write_queue_size: 10000
+          num_writers: 4
+        conversations:
+          backend: sql_default
+          table_name: openai_conversations
+        prompts:
+          backend: kv_default
+          namespace: prompts
     models:
     - metadata:
         embedding_dimension: 768
diff --git a/benchmarking/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml
index 88f4b0fef..7992eb3c7 100644
--- a/benchmarking/k8s-benchmark/stack_run_config.yaml
+++ b/benchmarking/k8s-benchmark/stack_run_config.yaml
@@ -100,6 +100,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata:
diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx
index 910a0ed05..ff50c406a 100644
--- a/docs/docs/distributions/configuration.mdx
+++ b/docs/docs/distributions/configuration.mdx
@@ -58,13 +58,21 @@ storage:
     sql_default:
       type: sql_sqlite
       db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
-  references:
+  stores:
     metadata:
       backend: kv_default
       namespace: registry
     inference:
       backend: sql_default
       table_name: inference_store
+      max_write_queue_size: 10000
+      num_writers: 4
+    conversations:
+      backend: sql_default
+      table_name: openai_conversations
+    prompts:
+      backend: kv_default
+      namespace: prompts
 models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
diff --git a/docs/docs/distributions/k8s/stack-configmap.yaml b/docs/docs/distributions/k8s/stack-configmap.yaml
index c71ab05d8..255e39ac2 100644
--- a/docs/docs/distributions/k8s/stack-configmap.yaml
+++ b/docs/docs/distributions/k8s/stack-configmap.yaml
@@ -113,13 +113,21 @@ data:
           db: ${env.POSTGRES_DB:=llamastack}
           user: ${env.POSTGRES_USER:=llamastack}
           password: ${env.POSTGRES_PASSWORD:=llamastack}
-      references:
+      stores:
         metadata:
           backend: kv_default
           namespace: registry
         inference:
           backend: sql_default
           table_name: inference_store
+          max_write_queue_size: 10000
+          num_writers: 4
+        conversations:
+          backend: sql_default
+          table_name: openai_conversations
+        prompts:
+          backend: kv_default
+          namespace: prompts
     models:
     - metadata:
         embedding_dimension: 768
diff --git a/docs/docs/distributions/k8s/stack_run_config.yaml b/docs/docs/distributions/k8s/stack_run_config.yaml
index 1bfa5ac25..3dde74bbf 100644
--- a/docs/docs/distributions/k8s/stack_run_config.yaml
+++ b/docs/docs/distributions/k8s/stack_run_config.yaml
@@ -106,6 +106,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata:
diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py
index d7175100e..95907adcf 100644
--- a/llama_stack/core/datatypes.py
+++ b/llama_stack/core/datatypes.py
@@ -589,6 +589,7 @@ can be instantiated multiple times (with different configs) if necessary.
         _ensure_backend(stores.inference, sql_backends, "storage.stores.inference")
         _ensure_backend(stores.conversations, sql_backends, "storage.stores.conversations")
         _ensure_backend(stores.responses, sql_backends, "storage.stores.responses")
+        _ensure_backend(stores.prompts, kv_backends, "storage.stores.prompts")
         return self
 
 
diff --git a/llama_stack/core/prompts/prompts.py b/llama_stack/core/prompts/prompts.py
index 856397ca5..1e48bcc8c 100644
--- a/llama_stack/core/prompts/prompts.py
+++ b/llama_stack/core/prompts/prompts.py
@@ -11,7 +11,6 @@ from pydantic import BaseModel
 
 from llama_stack.apis.prompts import ListPromptsResponse, Prompt, Prompts
 from llama_stack.core.datatypes import StackRunConfig
-from llama_stack.core.storage.datatypes import KVStoreReference
 from llama_stack.providers.utils.kvstore import KVStore, kvstore_impl
 
 
@@ -40,11 +39,10 @@ class PromptServiceImpl(Prompts):
         self.kvstore: KVStore
 
     async def initialize(self) -> None:
-        # Use metadata store backend with prompts-specific namespace
-        metadata_ref = self.config.run_config.storage.stores.metadata
-        if not metadata_ref:
-            raise ValueError("storage.stores.metadata must be configured in run config")
-        prompts_ref = KVStoreReference(namespace="prompts", backend=metadata_ref.backend)
+        # Use prompts store reference from run config
+        prompts_ref = self.config.run_config.storage.stores.prompts
+        if not prompts_ref:
+            raise ValueError("storage.stores.prompts must be configured in run config")
         self.kvstore = await kvstore_impl(prompts_ref)
 
     def _get_default_key(self, prompt_id: str) -> str:
diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py
index ebfd59a05..1b5c288a1 100644
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@@ -565,6 +565,7 @@ def run_config_from_adhoc_config_spec(
                 metadata=KVStoreReference(backend="kv_default", namespace="registry"),
                 inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
                 conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
+                prompts=KVStoreReference(backend="kv_default", namespace="prompts"),
             ),
         ),
     )
diff --git a/llama_stack/core/storage/datatypes.py b/llama_stack/core/storage/datatypes.py
index 9df170e10..4b17b9ea9 100644
--- a/llama_stack/core/storage/datatypes.py
+++ b/llama_stack/core/storage/datatypes.py
@@ -271,6 +271,10 @@ class ServerStoresConfig(BaseModel):
         default=None,
         description="Responses store configuration (uses SQL backend)",
     )
+    prompts: KVStoreReference | None = Field(
+        default=None,
+        description="Prompts store configuration (uses KV backend)",
+    )
 
 
 class StorageConfig(BaseModel):
diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml
index ed880d4a0..702acff8e 100644
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@@ -247,6 +247,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models: []
   shields:
diff --git a/llama_stack/distributions/dell/run-with-safety.yaml b/llama_stack/distributions/dell/run-with-safety.yaml
index 2563f2f4b..e0da8060d 100644
--- a/llama_stack/distributions/dell/run-with-safety.yaml
+++ b/llama_stack/distributions/dell/run-with-safety.yaml
@@ -109,6 +109,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata: {}
diff --git a/llama_stack/distributions/dell/run.yaml b/llama_stack/distributions/dell/run.yaml
index 7bada394f..bc3117d88 100644
--- a/llama_stack/distributions/dell/run.yaml
+++ b/llama_stack/distributions/dell/run.yaml
@@ -105,6 +105,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata: {}
diff --git a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
index 01b5db4f9..2fa9d198b 100644
--- a/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
+++ b/llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml
@@ -122,6 +122,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata: {}
diff --git a/llama_stack/distributions/meta-reference-gpu/run.yaml b/llama_stack/distributions/meta-reference-gpu/run.yaml
index 87c33dde0..5c7f75ca8 100644
--- a/llama_stack/distributions/meta-reference-gpu/run.yaml
+++ b/llama_stack/distributions/meta-reference-gpu/run.yaml
@@ -112,6 +112,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata: {}
diff --git a/llama_stack/distributions/nvidia/run-with-safety.yaml b/llama_stack/distributions/nvidia/run-with-safety.yaml
index c23d0f9cb..1d57ad17a 100644
--- a/llama_stack/distributions/nvidia/run-with-safety.yaml
+++ b/llama_stack/distributions/nvidia/run-with-safety.yaml
@@ -111,6 +111,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata: {}
diff --git a/llama_stack/distributions/nvidia/run.yaml b/llama_stack/distributions/nvidia/run.yaml
index 81e744d53..8c50b8bfb 100644
--- a/llama_stack/distributions/nvidia/run.yaml
+++ b/llama_stack/distributions/nvidia/run.yaml
@@ -100,6 +100,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models: []
   shields: []
diff --git a/llama_stack/distributions/open-benchmark/run.yaml b/llama_stack/distributions/open-benchmark/run.yaml
index 4fd0e199b..912e48dd3 100644
--- a/llama_stack/distributions/open-benchmark/run.yaml
+++ b/llama_stack/distributions/open-benchmark/run.yaml
@@ -142,6 +142,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata: {}
diff --git a/llama_stack/distributions/postgres-demo/run.yaml b/llama_stack/distributions/postgres-demo/run.yaml
index 0d7ecff48..dd1c2bc7f 100644
--- a/llama_stack/distributions/postgres-demo/run.yaml
+++ b/llama_stack/distributions/postgres-demo/run.yaml
@@ -87,6 +87,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models:
   - metadata: {}
diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml
index 33e8c9b59..807f0d678 100644
--- a/llama_stack/distributions/starter-gpu/run.yaml
+++ b/llama_stack/distributions/starter-gpu/run.yaml
@@ -250,6 +250,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models: []
   shields:
diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml
index 4ca0914af..eb4652af0 100644
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@@ -247,6 +247,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models: []
   shields:
diff --git a/llama_stack/distributions/template.py b/llama_stack/distributions/template.py
index f0c4c6b9e..1dad60064 100644
--- a/llama_stack/distributions/template.py
+++ b/llama_stack/distributions/template.py
@@ -259,6 +259,10 @@ class RunConfigSettings(BaseModel):
                 backend="sql_default",
                 table_name="openai_conversations",
             ).model_dump(exclude_none=True),
+            "prompts": KVStoreReference(
+                backend="kv_default",
+                namespace="prompts",
+            ).model_dump(exclude_none=True),
         }
 
         storage_config = dict(
diff --git a/llama_stack/distributions/watsonx/run.yaml b/llama_stack/distributions/watsonx/run.yaml
index ca3c8402d..8456115d2 100644
--- a/llama_stack/distributions/watsonx/run.yaml
+++ b/llama_stack/distributions/watsonx/run.yaml
@@ -115,6 +115,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 registered_resources:
   models: []
   shields: []
diff --git a/tests/external/run-byoa.yaml b/tests/external/run-byoa.yaml
index 4d63046c6..62d6b1825 100644
--- a/tests/external/run-byoa.yaml
+++ b/tests/external/run-byoa.yaml
@@ -25,6 +25,9 @@ storage:
     conversations:
       table_name: openai_conversations
       backend: sql_default
+    prompts:
+      namespace: prompts
+      backend: kv_default
 external_apis_dir: ~/.llama/apis.d
 external_providers_dir: ~/.llama/providers.d
 server:
diff --git a/tests/unit/cli/test_stack_config.py b/tests/unit/cli/test_stack_config.py
index 7b9f3ca0c..0977a1e43 100644
--- a/tests/unit/cli/test_stack_config.py
+++ b/tests/unit/cli/test_stack_config.py
@@ -44,6 +44,9 @@ def config_with_image_name_int():
             responses:
               backend: sql_default
               table_name: responses
+            prompts:
+              backend: kv_default
+              namespace: prompts
         providers:
           inference:
             - provider_id: provider1
diff --git a/tests/unit/distribution/test_distribution.py b/tests/unit/distribution/test_distribution.py
index 4161d7b84..11f55cfdb 100644
--- a/tests/unit/distribution/test_distribution.py
+++ b/tests/unit/distribution/test_distribution.py
@@ -48,6 +48,7 @@ def _default_storage() -> StorageConfig:
             metadata=KVStoreReference(backend="kv_default", namespace="registry"),
             inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
             conversations=SqlStoreReference(backend="sql_default", table_name="conversations"),
+            prompts=KVStoreReference(backend="kv_default", namespace="prompts"),
         ),
     )
 
diff --git a/tests/unit/prompts/prompts/conftest.py b/tests/unit/prompts/prompts/conftest.py
index fe30e1a77..c876f2041 100644
--- a/tests/unit/prompts/prompts/conftest.py
+++ b/tests/unit/prompts/prompts/conftest.py
@@ -18,7 +18,7 @@ from llama_stack.core.storage.datatypes import (
     SqlStoreReference,
     StorageConfig,
 )
-from llama_stack.providers.utils.kvstore import kvstore_impl, register_kvstore_backends
+from llama_stack.providers.utils.kvstore import register_kvstore_backends
 
 
 @pytest.fixture
@@ -38,6 +38,7 @@ async def temp_prompt_store(tmp_path_factory):
             metadata=KVStoreReference(backend="kv_test", namespace="registry"),
             inference=InferenceStoreReference(backend="sql_test", table_name="inference"),
             conversations=SqlStoreReference(backend="sql_test", table_name="conversations"),
+            prompts=KVStoreReference(backend="kv_test", namespace="prompts"),
         ),
     )
     mock_run_config = StackRunConfig(
@@ -50,6 +51,6 @@ async def temp_prompt_store(tmp_path_factory):
     store = PromptServiceImpl(config, deps={})
 
     register_kvstore_backends({"kv_test": storage.backends["kv_test"]})
-    store.kvstore = await kvstore_impl(KVStoreReference(backend="kv_test", namespace="prompts"))
+    await store.initialize()
 
     yield store