diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md index cd2c6b6a8..72a9e295b 100644 --- a/docs/source/distributions/building_distro.md +++ b/docs/source/distributions/building_distro.md @@ -278,7 +278,7 @@ After this step is successful, you should be able to find the built container im ``` docker run -d \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ~/.llama:/root/.llama \ + -v ~/.llama:/.llama \ localhost/distribution-ollama:dev \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=$INFERENCE_MODEL \ @@ -291,7 +291,7 @@ Here are the docker flags and their uses: * `-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT`: Maps the container port to the host port for accessing the server -* `-v ~/.llama:/root/.llama`: Mounts the local .llama directory to persist configurations and data +* `-v ~/.llama:/.llama`: Mounts the local .llama directory to persist configurations and data * `localhost/distribution-ollama:dev`: The name and tag of the container image to run diff --git a/docs/source/distributions/remote_hosted_distro/watsonx.md b/docs/source/distributions/remote_hosted_distro/watsonx.md index ec1b98059..45890702d 100644 --- a/docs/source/distributions/remote_hosted_distro/watsonx.md +++ b/docs/source/distributions/remote_hosted_distro/watsonx.md @@ -68,9 +68,9 @@ LLAMA_STACK_PORT=5001 docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run.yaml:/root/my-run.yaml \ + -v ./run.yaml:/.llama/my-run.yaml \ llamastack/distribution-watsonx \ - --config /root/my-run.yaml \ + --config /.llama/my-run.yaml \ --port $LLAMA_STACK_PORT \ --env WATSONX_API_KEY=$WATSONX_API_KEY \ --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \ diff --git a/docs/source/distributions/self_hosted_distro/dell-tgi.md b/docs/source/distributions/self_hosted_distro/dell-tgi.md index 5fca297b0..9be79037b 100644 --- a/docs/source/distributions/self_hosted_distro/dell-tgi.md +++ b/docs/source/distributions/self_hosted_distro/dell-tgi.md @@ -65,7 +65,7 @@ registry.dell.huggingface.co/enterprise-dell-inference-meta-llama-meta-llama-3.1 #### Start Llama Stack server pointing to TGI server ``` -docker run --pull always --network host -it -p 8321:8321 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-run.yaml +docker run --pull always --network host -it -p 8321:8321 -v ./run.yaml:/.llama/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /.llama/my-run.yaml ``` Make sure in you `run.yaml` file, you inference provider is pointing to the correct TGI server endpoint. E.g. diff --git a/docs/source/distributions/self_hosted_distro/dell.md b/docs/source/distributions/self_hosted_distro/dell.md index eded3bdc4..917de5c68 100644 --- a/docs/source/distributions/self_hosted_distro/dell.md +++ b/docs/source/distributions/self_hosted_distro/dell.md @@ -125,7 +125,7 @@ docker run -it \ --pull always \ --network host \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v $HOME/.llama:/root/.llama \ + -v $HOME/.llama:/.llama \ # NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed -v /home/hjshah/git/llama-stack:/app/llama-stack-source -v /home/hjshah/git/llama-models:/app/llama-models-source \ # localhost/distribution-dell:dev if building / testing locally @@ -152,10 +152,10 @@ docker run \ -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v $HOME/.llama:/root/.llama \ - -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \ + -v $HOME/.llama:/.llama \ + -v ./llama_stack/templates/tgi/run-with-safety.yaml:/.llama/my-run.yaml \ llamastack/distribution-dell \ - --config /root/my-run.yaml \ + --config /.llama/my-run.yaml \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=$INFERENCE_MODEL \ --env DEH_URL=$DEH_URL \ diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md index 8b9dcec55..03dc17a08 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md @@ -83,7 +83,7 @@ docker run \ --pull always \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ~/.llama:/root/.llama \ + -v ~/.llama:/.llama \ llamastack/distribution-meta-reference-gpu \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct @@ -97,7 +97,7 @@ docker run \ --pull always \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ~/.llama:/root/.llama \ + -v ~/.llama:/.llama \ llamastack/distribution-meta-reference-gpu \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ diff --git a/docs/source/distributions/self_hosted_distro/nvidia.md b/docs/source/distributions/self_hosted_distro/nvidia.md index aeb14e6a6..547fac75b 100644 --- a/docs/source/distributions/self_hosted_distro/nvidia.md +++ b/docs/source/distributions/self_hosted_distro/nvidia.md @@ -145,9 +145,9 @@ docker run \ -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run.yaml:/root/my-run.yaml \ + -v ./run.yaml:/.llama/my-run.yaml \ llamastack/distribution-nvidia \ - --config /root/my-run.yaml \ + --config /.llama/my-run.yaml \ --port $LLAMA_STACK_PORT \ --env NVIDIA_API_KEY=$NVIDIA_API_KEY ``` diff --git a/docs/source/getting_started/detailed_tutorial.md b/docs/source/getting_started/detailed_tutorial.md index c6589e758..09870a1ed 100644 --- a/docs/source/getting_started/detailed_tutorial.md +++ b/docs/source/getting_started/detailed_tutorial.md @@ -89,7 +89,7 @@ following command: docker run -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ~/.llama:/root/.llama \ + -v ~/.llama:/.llama \ llamastack/distribution-starter \ --port $LLAMA_STACK_PORT \ --env OLLAMA_URL=http://host.docker.internal:11434 @@ -109,7 +109,7 @@ Linux users having issues running the above command should instead try the follo docker run -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ~/.llama:/root/.llama \ + -v ~/.llama:/.llama \ --network=host \ llamastack/distribution-starter \ --port $LLAMA_STACK_PORT \ diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 7c406d3e7..efc3dd704 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -333,6 +333,25 @@ fi RUN pip uninstall -y uv EOF +# Add non-root user setup before entrypoint +add_to_container << EOF + +# Create group with GID 1001 and user with UID 1001 +RUN groupadd -g 1001 appgroup && useradd -u 1001 -g appgroup -M appuser + +# Create necessary directories with appropriate permissions for UID 1001 +RUN mkdir -p /.llama /.cache && chown -R 1001:1001 /.llama /.cache && chmod -R 775 /.llama /.cache && chmod -R g+w /app + +# Set the Llama Stack config directory environment variable to use /.llama +ENV LLAMA_STACK_CONFIG_DIR=/.llama + +# This prevents dual storage while keeping /app as working directory for CI compatibility +ENV HOME=/ + +# Switch to non-root user (UID 1001 directly) +USER 1001 +EOF + # If a run config is provided, we use the --config flag if [[ -n "$run_config" ]]; then add_to_container << EOF @@ -345,12 +364,6 @@ ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--templat EOF fi -# Add other require item commands genearic to all containers -add_to_container << EOF - -RUN mkdir -p /.llama /.cache && chmod -R g+rw /app /.llama /.cache -EOF - printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR" cat "$TEMP_DIR"/Containerfile printf "\n" diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md index 2ca6793d7..3fb1b8e60 100644 --- a/llama_stack/templates/meta-reference-gpu/doc_template.md +++ b/llama_stack/templates/meta-reference-gpu/doc_template.md @@ -71,7 +71,7 @@ docker run \ --pull always \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ~/.llama:/root/.llama \ + -v ~/.llama:/.llama \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct @@ -85,7 +85,7 @@ docker run \ --pull always \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ~/.llama:/root/.llama \ + -v ~/.llama:/.llama \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ diff --git a/llama_stack/templates/nvidia/doc_template.md b/llama_stack/templates/nvidia/doc_template.md index 5a180d49f..d916b2dd1 100644 --- a/llama_stack/templates/nvidia/doc_template.md +++ b/llama_stack/templates/nvidia/doc_template.md @@ -117,9 +117,9 @@ docker run \ -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run.yaml:/root/my-run.yaml \ + -v ./run.yaml:/.llama/my-run.yaml \ llamastack/distribution-{{ name }} \ - --config /root/my-run.yaml \ + --config /.llama/my-run.yaml \ --port $LLAMA_STACK_PORT \ --env NVIDIA_API_KEY=$NVIDIA_API_KEY ```