From b9b24fbf94ba486259617c36f207fd4b45ee677c Mon Sep 17 00:00:00 2001
From: r3v5 <milleryan2003@gmail.com>
Date: Mon, 21 Jul 2025 10:26:09 +0100
Subject: [PATCH 1/2] feat: make Distribution container images running be
 rootless in Llama Stack

---
 llama_stack/distribution/build_container.sh | 25 ++++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh
index 6985c1cd0..20eb372b7 100755
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@@ -259,6 +259,25 @@ fi
 RUN pip uninstall -y uv
 EOF
 
+# Add non-root user setup before entrypoint
+add_to_container << EOF
+
+# Create group with GID 1001 and user with UID 1001
+RUN groupadd -g 1001 appgroup && useradd -u 1001 -g appgroup -M appuser
+
+# Create necessary directories with appropriate permissions for UID 1001
+RUN mkdir -p /.llama /.cache && chown -R 1001:1001 /.llama /.cache && chmod -R 775 /.llama /.cache && chmod -R g+w /app
+
+# Set the Llama Stack config directory environment variable to use /.llama
+ENV LLAMA_STACK_CONFIG_DIR=/.llama
+
+# This prevents dual storage while keeping /app as working directory for CI compatibility
+ENV HOME=/
+
+# Switch to non-root user (UID 1001 directly)
+USER 1001
+EOF
+
 # If a run config is provided, we use the --config flag
 if [[ -n "$run_config" ]]; then
   add_to_container << EOF
@@ -271,12 +290,6 @@ ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--templat
 EOF
 fi
 
-# Add other require item commands genearic to all containers
-add_to_container << EOF
-
-RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache
-EOF
-
 printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR"
 cat "$TEMP_DIR"/Containerfile
 printf "\n"

From 79805c607f41e0b90e6dd9006685215b4e53bc32 Mon Sep 17 00:00:00 2001
From: r3v5 <milleryan2003@gmail.com>
Date: Mon, 21 Jul 2025 10:33:12 +0100
Subject: [PATCH 2/2] docs: update container deployment guides for
 distributions

---
 docs/source/distributions/building_distro.md              | 4 ++--
 docs/source/distributions/remote_hosted_distro/watsonx.md | 4 ++--
 docs/source/distributions/self_hosted_distro/dell-tgi.md  | 2 +-
 docs/source/distributions/self_hosted_distro/dell.md      | 8 ++++----
 .../self_hosted_distro/meta-reference-gpu.md              | 4 ++--
 docs/source/distributions/self_hosted_distro/nvidia.md    | 4 ++--
 docs/source/getting_started/detailed_tutorial.md          | 4 ++--
 llama_stack/templates/meta-reference-gpu/doc_template.md  | 4 ++--
 llama_stack/templates/nvidia/doc_template.md              | 4 ++--
 9 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md
index cd2c6b6a8..72a9e295b 100644
--- a/docs/source/distributions/building_distro.md
+++ b/docs/source/distributions/building_distro.md
@@ -278,7 +278,7 @@ After this step is successful, you should be able to find the built container im
 ```
 docker run -d \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
+  -v ~/.llama:/.llama \
   localhost/distribution-ollama:dev \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
@@ -291,7 +291,7 @@ Here are the docker flags and their uses:
 
 * `-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT`: Maps the container port to the host port for accessing the server
 
-* `-v ~/.llama:/root/.llama`: Mounts the local .llama directory to persist configurations and data
+* `-v ~/.llama:/.llama`: Mounts the local .llama directory to persist configurations and data
 
 * `localhost/distribution-ollama:dev`: The name and tag of the container image to run
 
diff --git a/docs/source/distributions/remote_hosted_distro/watsonx.md b/docs/source/distributions/remote_hosted_distro/watsonx.md
index ec1b98059..45890702d 100644
--- a/docs/source/distributions/remote_hosted_distro/watsonx.md
+++ b/docs/source/distributions/remote_hosted_distro/watsonx.md
@@ -68,9 +68,9 @@ LLAMA_STACK_PORT=5001
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
+  -v ./run.yaml:/.llama/my-run.yaml \
   llamastack/distribution-watsonx \
-  --config /root/my-run.yaml \
+  --config /.llama/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env WATSONX_API_KEY=$WATSONX_API_KEY \
   --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
diff --git a/docs/source/distributions/self_hosted_distro/dell-tgi.md b/docs/source/distributions/self_hosted_distro/dell-tgi.md
index 5fca297b0..9be79037b 100644
--- a/docs/source/distributions/self_hosted_distro/dell-tgi.md
+++ b/docs/source/distributions/self_hosted_distro/dell-tgi.md
@@ -65,7 +65,7 @@ registry.dell.huggingface.co/enterprise-dell-inference-meta-llama-meta-llama-3.1
 #### Start Llama Stack server pointing to TGI server
 
 ```
-docker run --pull always --network host -it -p 8321:8321 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-run.yaml
+docker run --pull always --network host -it -p 8321:8321 -v ./run.yaml:/.llama/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /.llama/my-run.yaml
 ```
 
 Make sure in you `run.yaml` file, you inference provider is pointing to the correct TGI server endpoint. E.g.
diff --git a/docs/source/distributions/self_hosted_distro/dell.md b/docs/source/distributions/self_hosted_distro/dell.md
index eded3bdc4..917de5c68 100644
--- a/docs/source/distributions/self_hosted_distro/dell.md
+++ b/docs/source/distributions/self_hosted_distro/dell.md
@@ -125,7 +125,7 @@ docker run -it \
   --pull always \
   --network host \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v $HOME/.llama:/root/.llama \
+  -v $HOME/.llama:/.llama \
   # NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed
   -v /home/hjshah/git/llama-stack:/app/llama-stack-source -v /home/hjshah/git/llama-models:/app/llama-models-source \
   # localhost/distribution-dell:dev if building / testing locally
@@ -152,10 +152,10 @@ docker run \
   -it \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v $HOME/.llama:/root/.llama \
-  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
+  -v $HOME/.llama:/.llama \
+  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/.llama/my-run.yaml \
   llamastack/distribution-dell \
-  --config /root/my-run.yaml \
+  --config /.llama/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
   --env DEH_URL=$DEH_URL \
diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
index 8b9dcec55..03dc17a08 100644
--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -83,7 +83,7 @@ docker run \
   --pull always \
   --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
+  -v ~/.llama:/.llama \
   llamastack/distribution-meta-reference-gpu \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
@@ -97,7 +97,7 @@ docker run \
   --pull always \
   --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
+  -v ~/.llama:/.llama \
   llamastack/distribution-meta-reference-gpu \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
diff --git a/docs/source/distributions/self_hosted_distro/nvidia.md b/docs/source/distributions/self_hosted_distro/nvidia.md
index 47e38f73d..8d74056a7 100644
--- a/docs/source/distributions/self_hosted_distro/nvidia.md
+++ b/docs/source/distributions/self_hosted_distro/nvidia.md
@@ -142,9 +142,9 @@ docker run \
   -it \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
+  -v ./run.yaml:/.llama/my-run.yaml \
   llamastack/distribution-nvidia \
-  --config /root/my-run.yaml \
+  --config /.llama/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env NVIDIA_API_KEY=$NVIDIA_API_KEY
 ```
diff --git a/docs/source/getting_started/detailed_tutorial.md b/docs/source/getting_started/detailed_tutorial.md
index 7ceae9072..39793f5b9 100644
--- a/docs/source/getting_started/detailed_tutorial.md
+++ b/docs/source/getting_started/detailed_tutorial.md
@@ -91,7 +91,7 @@ following command:
 docker run -it \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
+  -v ~/.llama:/.llama \
   llamastack/distribution-starter \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
@@ -112,7 +112,7 @@ Linux users having issues running the above command should instead try the follo
 docker run -it \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
+  -v ~/.llama:/.llama \
   --network=host \
   llamastack/distribution-starter \
   --port $LLAMA_STACK_PORT \
diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md
index 2ca6793d7..3fb1b8e60 100644
--- a/llama_stack/templates/meta-reference-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-gpu/doc_template.md
@@ -71,7 +71,7 @@ docker run \
   --pull always \
   --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
+  -v ~/.llama:/.llama \
   llamastack/distribution-{{ name }} \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
@@ -85,7 +85,7 @@ docker run \
   --pull always \
   --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
+  -v ~/.llama:/.llama \
   llamastack/distribution-{{ name }} \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
diff --git a/llama_stack/templates/nvidia/doc_template.md b/llama_stack/templates/nvidia/doc_template.md
index 3cb8245df..442b41665 100644
--- a/llama_stack/templates/nvidia/doc_template.md
+++ b/llama_stack/templates/nvidia/doc_template.md
@@ -114,9 +114,9 @@ docker run \
   -it \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
+  -v ./run.yaml:/.llama/my-run.yaml \
   llamastack/distribution-{{ name }} \
-  --config /root/my-run.yaml \
+  --config /.llama/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env NVIDIA_API_KEY=$NVIDIA_API_KEY
 ```