diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
index 73d6befd4..d46039318 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -60,6 +60,7 @@ LLAMA_STACK_PORT=5001
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
   llamastack/distribution-meta-reference-gpu \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
@@ -71,6 +72,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
   llamastack/distribution-meta-reference-gpu \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
index fab9c6cd8..837be744a 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
@@ -60,6 +60,7 @@ LLAMA_STACK_PORT=5001
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
   llamastack/distribution-meta-reference-quantized-gpu \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
@@ -71,6 +72,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
   llamastack/distribution-meta-reference-quantized-gpu \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md
index f9870adbd..421812dbc 100644
--- a/llama_stack/templates/meta-reference-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-gpu/doc_template.md
@@ -50,6 +50,7 @@ LLAMA_STACK_PORT=5001
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
   llamastack/distribution-{{ name }} \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
@@ -61,6 +62,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
   llamastack/distribution-{{ name }} \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
diff --git a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
index 9e3c56d92..daa380d20 100644
--- a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
@@ -52,6 +52,7 @@ LLAMA_STACK_PORT=5001
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
   llamastack/distribution-{{ name }} \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
@@ -63,6 +64,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
   llamastack/distribution-{{ name }} \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \