From feabcdd67ba4e169d9dec46d14c29fc3f09ad790 Mon Sep 17 00:00:00 2001
From: raghotham <rsm@meta.com>
Date: Tue, 28 Oct 2025 16:05:44 -0700
Subject: [PATCH] docs: add documentation on how to use custom run yaml in
 docker (#3949)

as title

test plan:

```yaml
# custom-ollama-run.yaml
version: 2
image_name: starter
external_providers_dir: /.llama/providers.d
apis:
- inference
- vector_io
- files
- safety
- tool_runtime
- agents

providers:
  inference:
  # Single Ollama provider for all models
  - provider_id: ollama
    provider_type: remote::ollama
    config:
      url: ${env.OLLAMA_URL:=http://localhost:11434}

  vector_io:
  - provider_id: faiss
    provider_type: inline::faiss
    config:
      persistence:
        namespace: vector_io::faiss
        backend: kv_default

  files:
  - provider_id: meta-reference-files
    provider_type: inline::localfs
    config:
      storage_dir: /.llama/files
      metadata_store:
        table_name: files_metadata
        backend: sql_default

  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
    config:
      excluded_categories: []

  tool_runtime:
  - provider_id: rag-runtime
    provider_type: inline::rag-runtime

  agents:
  - provider_id: meta-reference
    provider_type: inline::meta-reference
    config:
      persistence:
        agent_state:
          namespace: agents
          backend: kv_default
        responses:
          table_name: responses
          backend: sql_default
          max_write_queue_size: 10000
          num_writers: 4

storage:
  backends:
    kv_default:
      type: kv_sqlite
      db_path: /.llama/kvstore.db
    sql_default:
      type: sql_sqlite
      db_path: /.llama/sql_store.db
  stores:
    metadata:
      namespace: registry
      backend: kv_default
    inference:
      table_name: inference_store
      backend: sql_default
      max_write_queue_size: 10000
      num_writers: 4
    conversations:
      table_name: openai_conversations
      backend: sql_default

registered_resources:
  models:
  # All models use the same 'ollama' provider
  - model_id: llama3.2-vision:latest
    provider_id: ollama
    provider_model_id: llama3.2-vision:latest
    model_type: llm
  - model_id: llama3.2:3b
    provider_id: ollama
    provider_model_id: llama3.2:3b
    model_type: llm
  # Embedding models
  - model_id: nomic-embed-text-v2-moe
    provider_id: ollama
    provider_model_id: toshk0/nomic-embed-text-v2-moe:Q6_K
    model_type: embedding
    metadata:
      embedding_dimension: 768
  shields: []
  vector_dbs: []
  datasets: []
  scoring_fns: []
  benchmarks: []
  tool_groups: []

server:
  port: 8321

telemetry:
  enabled: true

vector_stores:
  default_provider_id: faiss
  default_embedding_model:
    provider_id: ollama
    model_id: toshk0/nomic-embed-text-v2-moe:Q6_K
```

```bash
docker run
     -it
     --pull always
     -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT
     -v ~/.llama:/root/.llama
     -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml
     -e RUN_CONFIG_PATH=/app/custom-run.yaml
     -e OLLAMA_URL=http://host.docker.internal:11434/
     llamastack/distribution-starter:0.3.0
     --port $LLAMA_STACK_PORT
```
---
 .../self_hosted_distro/meta-reference-gpu.md  | 27 +++++++++++++++
 .../self_hosted_distro/nvidia.md              | 30 +++++++++++++++--
 .../distributions/dell/doc_template.md        | 31 +++++++++++++++++
 .../meta-reference-gpu/doc_template.md        | 30 +++++++++++++++++
 .../distributions/nvidia/doc_template.md      | 33 +++++++++++++++++--
 src/llama_stack/distributions/template.py     |  1 +
 6 files changed, 148 insertions(+), 4 deletions(-)

diff --git a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
index b7134b3e1..9c4095e88 100644
--- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -79,6 +79,33 @@ docker run \
   --port $LLAMA_STACK_PORT
 ```
 
+### Via Docker with Custom Run Configuration
+
+You can also run the Docker container with a custom run configuration file by mounting it into the container:
+
+```bash
+# Set the path to your custom run.yaml file
+CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
+LLAMA_STACK_PORT=8321
+
+docker run \
+  -it \
+  --pull always \
+  --gpu all \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
+  -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
+  -e RUN_CONFIG_PATH=/app/custom-run.yaml \
+  llamastack/distribution-meta-reference-gpu \
+  --port $LLAMA_STACK_PORT
+```
+
+**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
+
+Available run configurations for this distribution:
+- `run.yaml`
+- `run-with-safety.yaml`
+
 ### Via venv
 
 Make sure you have the Llama Stack CLI available.
diff --git a/docs/docs/distributions/self_hosted_distro/nvidia.md b/docs/docs/distributions/self_hosted_distro/nvidia.md
index 4a7d99ff5..c48a7d391 100644
--- a/docs/docs/distributions/self_hosted_distro/nvidia.md
+++ b/docs/docs/distributions/self_hosted_distro/nvidia.md
@@ -127,13 +127,39 @@ docker run \
   -it \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
   -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
   llamastack/distribution-nvidia \
-  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT
 ```
 
+### Via Docker with Custom Run Configuration
+
+You can also run the Docker container with a custom run configuration file by mounting it into the container:
+
+```bash
+# Set the path to your custom run.yaml file
+CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
+LLAMA_STACK_PORT=8321
+
+docker run \
+  -it \
+  --pull always \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
+  -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
+  -e RUN_CONFIG_PATH=/app/custom-run.yaml \
+  -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
+  llamastack/distribution-nvidia \
+  --port $LLAMA_STACK_PORT
+```
+
+**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
+
+Available run configurations for this distribution:
+- `run.yaml`
+- `run-with-safety.yaml`
+
 ### Via venv
 
 If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
diff --git a/src/llama_stack/distributions/dell/doc_template.md b/src/llama_stack/distributions/dell/doc_template.md
index 4e28673e8..1530f665a 100644
--- a/src/llama_stack/distributions/dell/doc_template.md
+++ b/src/llama_stack/distributions/dell/doc_template.md
@@ -152,6 +152,37 @@ docker run \
   --port $LLAMA_STACK_PORT
 ```
 
+### Via Docker with Custom Run Configuration
+
+You can also run the Docker container with a custom run configuration file by mounting it into the container:
+
+```bash
+# Set the path to your custom run.yaml file
+CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
+
+docker run -it \
+  --pull always \
+  --network host \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v $HOME/.llama:/root/.llama \
+  -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
+  -e RUN_CONFIG_PATH=/app/custom-run.yaml \
+  -e INFERENCE_MODEL=$INFERENCE_MODEL \
+  -e DEH_URL=$DEH_URL \
+  -e CHROMA_URL=$CHROMA_URL \
+  llamastack/distribution-{{ name }} \
+  --port $LLAMA_STACK_PORT
+```
+
+**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
+
+{% if run_configs %}
+Available run configurations for this distribution:
+{% for config in run_configs %}
+- `{{ config }}`
+{% endfor %}
+{% endif %}
+
 ### Via Conda
 
 Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.
diff --git a/src/llama_stack/distributions/meta-reference-gpu/doc_template.md b/src/llama_stack/distributions/meta-reference-gpu/doc_template.md
index ec4452d81..af71d8388 100644
--- a/src/llama_stack/distributions/meta-reference-gpu/doc_template.md
+++ b/src/llama_stack/distributions/meta-reference-gpu/doc_template.md
@@ -68,6 +68,36 @@ docker run \
   --port $LLAMA_STACK_PORT
 ```
 
+### Via Docker with Custom Run Configuration
+
+You can also run the Docker container with a custom run configuration file by mounting it into the container:
+
+```bash
+# Set the path to your custom run.yaml file
+CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
+LLAMA_STACK_PORT=8321
+
+docker run \
+  -it \
+  --pull always \
+  --gpu all \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
+  -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
+  -e RUN_CONFIG_PATH=/app/custom-run.yaml \
+  llamastack/distribution-{{ name }} \
+  --port $LLAMA_STACK_PORT
+```
+
+**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
+
+{% if run_configs %}
+Available run configurations for this distribution:
+{% for config in run_configs %}
+- `{{ config }}`
+{% endfor %}
+{% endif %}
+
 ### Via venv
 
 Make sure you have the Llama Stack CLI available.
diff --git a/src/llama_stack/distributions/nvidia/doc_template.md b/src/llama_stack/distributions/nvidia/doc_template.md
index 40f39e4f3..054a1e3ec 100644
--- a/src/llama_stack/distributions/nvidia/doc_template.md
+++ b/src/llama_stack/distributions/nvidia/doc_template.md
@@ -117,13 +117,42 @@ docker run \
   -it \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
+  -v ~/.llama:/root/.llama \
   -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
   llamastack/distribution-{{ name }} \
-  --config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT
 ```
 
+### Via Docker with Custom Run Configuration
+
+You can also run the Docker container with a custom run configuration file by mounting it into the container:
+
+```bash
+# Set the path to your custom run.yaml file
+CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
+LLAMA_STACK_PORT=8321
+
+docker run \
+  -it \
+  --pull always \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
+  -v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
+  -e RUN_CONFIG_PATH=/app/custom-run.yaml \
+  -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
+  llamastack/distribution-{{ name }} \
+  --port $LLAMA_STACK_PORT
+```
+
+**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
+
+{% if run_configs %}
+Available run configurations for this distribution:
+{% for config in run_configs %}
+- `{{ config }}`
+{% endfor %}
+{% endif %}
+
 ### Via venv
 
 If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
diff --git a/src/llama_stack/distributions/template.py b/src/llama_stack/distributions/template.py
index 1dad60064..e6813806a 100644
--- a/src/llama_stack/distributions/template.py
+++ b/src/llama_stack/distributions/template.py
@@ -424,6 +424,7 @@ class DistributionTemplate(BaseModel):
                 providers_table=providers_table,
                 run_config_env_vars=self.run_config_env_vars,
                 default_models=default_models,
+                run_configs=list(self.run_configs.keys()),
             )
         return ""