fix: docker run with --pull always to fetch the latest image (#1733)

As titled
2025-03-20 15:35:48 -07:00 · 2025-03-20 15:35:48 -07:00 · 581e8ae562
commit 581e8ae562
parent f95bc29ca9
31 changed files with 57 additions and 3 deletions
--- a/docs/source/building_applications/telemetry.md
+++ b/docs/source/building_applications/telemetry.md
@ -57,7 +57,7 @@ The `otel` sink works with any service compatible with the OpenTelemetry collect
 Start a Jaeger instance with the OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686 using the following command:

 ```bash
-$ docker run --rm --name jaeger \
+$ docker run --pull always --rm --name jaeger \
  -p 16686:16686 -p 4318:4318 \
  jaegertracing/jaeger:2.1.0
 ```
--- a/docs/source/distributions/remote_hosted_distro/nvidia.md
+++ b/docs/source/distributions/remote_hosted_distro/nvidia.md
@ -61,6 +61,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ./run.yaml:/root/my-run.yaml \
  llamastack/distribution-nvidia \
--- a/docs/source/distributions/self_hosted_distro/bedrock.md
+++ b/docs/source/distributions/self_hosted_distro/bedrock.md
@ -56,6 +56,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-bedrock \
  --port $LLAMA_STACK_PORT \
--- a/docs/source/distributions/self_hosted_distro/cerebras.md
+++ b/docs/source/distributions/self_hosted_distro/cerebras.md
@ -48,6 +48,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ./run.yaml:/root/my-run.yaml \
  llamastack/distribution-cerebras \
--- a/docs/source/distributions/self_hosted_distro/dell-tgi.md
+++ b/docs/source/distributions/self_hosted_distro/dell-tgi.md
@ -53,7 +53,7 @@ docker compose down

 #### Start Dell-TGI server locally
 ```
-docker run -it --shm-size 1g -p 80:80 --gpus 4 \
+docker run -it --pull always --shm-size 1g -p 80:80 --gpus 4 \
 -e NUM_SHARD=4
 -e MAX_BATCH_PREFILL_TOKENS=32768 \
 -e MAX_INPUT_TOKENS=8000 \
@ -65,7 +65,7 @@ registry.dell.huggingface.co/enterprise-dell-inference-meta-llama-meta-llama-3.1
 #### Start Llama Stack server pointing to TGI server

 ```
-docker run --network host -it -p 8321:8321 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-run.yaml
+docker run --pull always --network host -it -p 8321:8321 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-run.yaml
 ```

 Make sure in you `run.yaml` file, you inference provider is pointing to the correct TGI server endpoint. E.g.
--- a/docs/source/distributions/self_hosted_distro/dell.md
+++ b/docs/source/distributions/self_hosted_distro/dell.md
@ -55,6 +55,7 @@ export CUDA_VISIBLE_DEVICES=0
 export LLAMA_STACK_PORT=8321

 docker run --rm -it \
+  --pull always \
  --network host \
  -v $HOME/.cache/huggingface:/data \
  -e HF_TOKEN=$HF_TOKEN \
@ -78,6 +79,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 export CUDA_VISIBLE_DEVICES=1

 docker run --rm -it \
+  --pull always \
  --network host \
  -v $HOME/.cache/huggingface:/data \
  -e HF_TOKEN=$HF_TOKEN \
@ -120,6 +122,7 @@ This method allows you to get started quickly without having to build the distri

 ```bash
 docker run -it \
+  --pull always \
  --network host \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v $HOME/.llama:/root/.llama \
@ -147,6 +150,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B

 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v $HOME/.llama:/root/.llama \
  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
--- a/docs/source/distributions/self_hosted_distro/fireworks.md
+++ b/docs/source/distributions/self_hosted_distro/fireworks.md
@ -66,6 +66,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-fireworks \
  --port $LLAMA_STACK_PORT \
--- a/docs/source/distributions/self_hosted_distro/groq.md
+++ b/docs/source/distributions/self_hosted_distro/groq.md
@ -61,6 +61,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-groq \
  --port $LLAMA_STACK_PORT \
--- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md
@ -80,6 +80,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-meta-reference-gpu \
@ -92,6 +93,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 ```bash
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-meta-reference-gpu \
--- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
+++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md
@ -80,6 +80,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-meta-reference-quantized-gpu \
@ -92,6 +93,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 ```bash
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-meta-reference-quantized-gpu \
--- a/docs/source/distributions/self_hosted_distro/nvidia.md
+++ b/docs/source/distributions/self_hosted_distro/nvidia.md
@ -42,6 +42,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ./run.yaml:/root/my-run.yaml \
  llamastack/distribution-nvidia \
--- a/docs/source/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/distributions/self_hosted_distro/ollama.md
@ -74,6 +74,7 @@ This method allows you to get started quickly without having to build the distri
 export LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-ollama \
@ -91,6 +92,7 @@ cd /path/to/llama-stack

 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@ -49,6 +49,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 export CUDA_VISIBLE_DEVICES=0

 docker run \
+    --pull always \
    --runtime nvidia \
    --gpus $CUDA_VISIBLE_DEVICES \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
@ -71,6 +72,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 export CUDA_VISIBLE_DEVICES=1

 docker run \
+    --pull always \
    --runtime nvidia \
    --gpus $CUDA_VISIBLE_DEVICES \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
@ -98,6 +100,7 @@ export LLAMA_STACK_PORT=5001

 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ./run.yaml:/root/my-run.yaml \
  llamastack/distribution-remote-vllm \
@ -119,6 +122,7 @@ cd /path/to/llama-stack

 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
--- a/docs/source/distributions/self_hosted_distro/sambanova.md
+++ b/docs/source/distributions/self_hosted_distro/sambanova.md
@ -62,6 +62,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-sambanova \
  --port $LLAMA_STACK_PORT \
--- a/docs/source/distributions/self_hosted_distro/tgi.md
+++ b/docs/source/distributions/self_hosted_distro/tgi.md
@ -50,6 +50,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 export CUDA_VISIBLE_DEVICES=0

 docker run --rm -it \
+  --pull always \
  -v $HOME/.cache/huggingface:/data \
  -p $INFERENCE_PORT:$INFERENCE_PORT \
  --gpus $CUDA_VISIBLE_DEVICES \
@ -70,6 +71,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 export CUDA_VISIBLE_DEVICES=1

 docker run --rm -it \
+  --pull always \
  -v $HOME/.cache/huggingface:/data \
  -p $SAFETY_PORT:$SAFETY_PORT \
  --gpus $CUDA_VISIBLE_DEVICES \
@ -93,6 +95,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-tgi \
  --port $LLAMA_STACK_PORT \
@ -109,6 +112,7 @@ cd /path/to/llama-stack

 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
--- a/docs/source/distributions/self_hosted_distro/together.md
+++ b/docs/source/distributions/self_hosted_distro/together.md
@ -67,6 +67,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-together \
  --port $LLAMA_STACK_PORT \
--- a/docs/source/getting_started/index.md
+++ b/docs/source/getting_started/index.md
@ -54,6 +54,7 @@ mkdir -p ~/.llama
 Then you can start the server using the container tool of your choice.  For example, if you are running Docker you can use the following command:
 ```bash
 docker run -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-ollama \
@ -74,6 +75,7 @@ Docker containers run in their own isolated network namespaces on Linux. To allo
 Linux users having issues running the above command should instead try the following:
 ```bash
 docker run -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  --network=host \
--- a/docs/source/playground/index.md
+++ b/docs/source/playground/index.md
@ -118,6 +118,7 @@ Playground can also be started in a docker image:
 export LLAMA_STACK_URL=http://localhost:11434

 docker run \
+  --pull always \
  -p 8501:8501 \
  -e LLAMA_STACK_ENDPOINT=$LLAMA_STACK_URL \
  quay.io/jland/llama-stack-playground
--- a/llama_stack/templates/bedrock/doc_template.md
+++ b/llama_stack/templates/bedrock/doc_template.md
@ -50,6 +50,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-{{ name }} \
  --port $LLAMA_STACK_PORT \
--- a/llama_stack/templates/cerebras/doc_template.md
+++ b/llama_stack/templates/cerebras/doc_template.md
@ -42,6 +42,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ./run.yaml:/root/my-run.yaml \
  llamastack/distribution-{{ name }} \
--- a/llama_stack/templates/dell/doc_template.md
+++ b/llama_stack/templates/dell/doc_template.md
@ -43,6 +43,7 @@ export CUDA_VISIBLE_DEVICES=0
 export LLAMA_STACK_PORT=8321

 docker run --rm -it \
+  --pull always \
  --network host \
  -v $HOME/.cache/huggingface:/data \
  -e HF_TOKEN=$HF_TOKEN \
@ -66,6 +67,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 export CUDA_VISIBLE_DEVICES=1

 docker run --rm -it \
+  --pull always \
  --network host \
  -v $HOME/.cache/huggingface:/data \
  -e HF_TOKEN=$HF_TOKEN \
@ -108,6 +110,7 @@ This method allows you to get started quickly without having to build the distri

 ```bash
 docker run -it \
+  --pull always \
  --network host \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v $HOME/.llama:/root/.llama \
@ -135,6 +138,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B

 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v $HOME/.llama:/root/.llama \
  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
--- a/llama_stack/templates/fireworks/doc_template.md
+++ b/llama_stack/templates/fireworks/doc_template.md
@ -52,6 +52,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-{{ name }} \
  --port $LLAMA_STACK_PORT \
--- a/llama_stack/templates/groq/doc_template.md
+++ b/llama_stack/templates/groq/doc_template.md
@ -52,6 +52,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-{{ name }} \
  --port $LLAMA_STACK_PORT \
--- a/llama_stack/templates/meta-reference-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-gpu/doc_template.md
@ -68,6 +68,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-{{ name }} \
@ -80,6 +81,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 ```bash
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-{{ name }} \
--- a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
+++ b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md
@ -70,6 +70,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-{{ name }} \
@ -82,6 +83,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
 ```bash
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-{{ name }} \
--- a/llama_stack/templates/nvidia/doc_template.md
+++ b/llama_stack/templates/nvidia/doc_template.md
@ -42,6 +42,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ./run.yaml:/root/my-run.yaml \
  llamastack/distribution-{{ name }} \
--- a/llama_stack/templates/ollama/doc_template.md
+++ b/llama_stack/templates/ollama/doc_template.md
@ -63,6 +63,7 @@ This method allows you to get started quickly without having to build the distri
 export LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  llamastack/distribution-{{ name }} \
@ -80,6 +81,7 @@ cd /path/to/llama-stack

 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@ -36,6 +36,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 export CUDA_VISIBLE_DEVICES=0

 docker run \
+    --pull always \
    --runtime nvidia \
    --gpus $CUDA_VISIBLE_DEVICES \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
@ -58,6 +59,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 export CUDA_VISIBLE_DEVICES=1

 docker run \
+    --pull always \
    --runtime nvidia \
    --gpus $CUDA_VISIBLE_DEVICES \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
@ -85,6 +87,7 @@ export LLAMA_STACK_PORT=5001

 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ./run.yaml:/root/my-run.yaml \
  llamastack/distribution-{{ name }} \
@ -106,6 +109,7 @@ cd /path/to/llama-stack

 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
--- a/llama_stack/templates/sambanova/doc_template.md
+++ b/llama_stack/templates/sambanova/doc_template.md
@ -52,6 +52,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-{{ name }} \
  --port $LLAMA_STACK_PORT \
--- a/llama_stack/templates/tgi/doc_template.md
+++ b/llama_stack/templates/tgi/doc_template.md
@ -38,6 +38,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 export CUDA_VISIBLE_DEVICES=0

 docker run --rm -it \
+  --pull always \
  -v $HOME/.cache/huggingface:/data \
  -p $INFERENCE_PORT:$INFERENCE_PORT \
  --gpus $CUDA_VISIBLE_DEVICES \
@ -58,6 +59,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 export CUDA_VISIBLE_DEVICES=1

 docker run --rm -it \
+  --pull always \
  -v $HOME/.cache/huggingface:/data \
  -p $SAFETY_PORT:$SAFETY_PORT \
  --gpus $CUDA_VISIBLE_DEVICES \
@ -81,6 +83,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-{{ name }} \
  --port $LLAMA_STACK_PORT \
@ -97,6 +100,7 @@ cd /path/to/llama-stack

 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  -v ~/.llama:/root/.llama \
  -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
--- a/llama_stack/templates/together/doc_template.md
+++ b/llama_stack/templates/together/doc_template.md
@ -52,6 +52,7 @@ This method allows you to get started quickly without having to build the distri
 LLAMA_STACK_PORT=5001
 docker run \
  -it \
+  --pull always \
  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
  llamastack/distribution-{{ name }} \
  --port $LLAMA_STACK_PORT \