diff --git a/docs/source/building_applications/telemetry.md b/docs/source/building_applications/telemetry.md index b607a3d66..833117740 100644 --- a/docs/source/building_applications/telemetry.md +++ b/docs/source/building_applications/telemetry.md @@ -57,7 +57,7 @@ The `otel` sink works with any service compatible with the OpenTelemetry collect Start a Jaeger instance with the OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686 using the following command: ```bash -$ docker run --rm --name jaeger \ +$ docker run --pull always --rm --name jaeger \ -p 16686:16686 -p 4318:4318 \ jaegertracing/jaeger:2.1.0 ``` diff --git a/docs/source/distributions/remote_hosted_distro/nvidia.md b/docs/source/distributions/remote_hosted_distro/nvidia.md index 774d5ec1b..8eafdfc99 100644 --- a/docs/source/distributions/remote_hosted_distro/nvidia.md +++ b/docs/source/distributions/remote_hosted_distro/nvidia.md @@ -61,6 +61,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ./run.yaml:/root/my-run.yaml \ llamastack/distribution-nvidia \ diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md index 623ab6848..74a544e59 100644 --- a/docs/source/distributions/self_hosted_distro/bedrock.md +++ b/docs/source/distributions/self_hosted_distro/bedrock.md @@ -56,6 +56,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-bedrock \ --port $LLAMA_STACK_PORT \ diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md index 8f14ae7cc..d590e10eb 100644 --- a/docs/source/distributions/self_hosted_distro/cerebras.md +++ b/docs/source/distributions/self_hosted_distro/cerebras.md @@ -48,6 +48,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ./run.yaml:/root/my-run.yaml \ llamastack/distribution-cerebras \ diff --git a/docs/source/distributions/self_hosted_distro/dell-tgi.md b/docs/source/distributions/self_hosted_distro/dell-tgi.md index cf0c02983..5fca297b0 100644 --- a/docs/source/distributions/self_hosted_distro/dell-tgi.md +++ b/docs/source/distributions/self_hosted_distro/dell-tgi.md @@ -53,7 +53,7 @@ docker compose down #### Start Dell-TGI server locally ``` -docker run -it --shm-size 1g -p 80:80 --gpus 4 \ +docker run -it --pull always --shm-size 1g -p 80:80 --gpus 4 \ -e NUM_SHARD=4 -e MAX_BATCH_PREFILL_TOKENS=32768 \ -e MAX_INPUT_TOKENS=8000 \ @@ -65,7 +65,7 @@ registry.dell.huggingface.co/enterprise-dell-inference-meta-llama-meta-llama-3.1 #### Start Llama Stack server pointing to TGI server ``` -docker run --network host -it -p 8321:8321 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-run.yaml +docker run --pull always --network host -it -p 8321:8321 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-run.yaml ``` Make sure in you `run.yaml` file, you inference provider is pointing to the correct TGI server endpoint. E.g. diff --git a/docs/source/distributions/self_hosted_distro/dell.md b/docs/source/distributions/self_hosted_distro/dell.md index f49b332a9..96b0ef478 100644 --- a/docs/source/distributions/self_hosted_distro/dell.md +++ b/docs/source/distributions/self_hosted_distro/dell.md @@ -55,6 +55,7 @@ export CUDA_VISIBLE_DEVICES=0 export LLAMA_STACK_PORT=8321 docker run --rm -it \ + --pull always \ --network host \ -v $HOME/.cache/huggingface:/data \ -e HF_TOKEN=$HF_TOKEN \ @@ -78,6 +79,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B export CUDA_VISIBLE_DEVICES=1 docker run --rm -it \ + --pull always \ --network host \ -v $HOME/.cache/huggingface:/data \ -e HF_TOKEN=$HF_TOKEN \ @@ -120,6 +122,7 @@ This method allows you to get started quickly without having to build the distri ```bash docker run -it \ + --pull always \ --network host \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v $HOME/.llama:/root/.llama \ @@ -147,6 +150,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v $HOME/.llama:/root/.llama \ -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \ diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md index 3c8f5eec9..5a270f0e3 100644 --- a/docs/source/distributions/self_hosted_distro/fireworks.md +++ b/docs/source/distributions/self_hosted_distro/fireworks.md @@ -66,6 +66,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-fireworks \ --port $LLAMA_STACK_PORT \ diff --git a/docs/source/distributions/self_hosted_distro/groq.md b/docs/source/distributions/self_hosted_distro/groq.md index ce3f8aecc..561a2f246 100644 --- a/docs/source/distributions/self_hosted_distro/groq.md +++ b/docs/source/distributions/self_hosted_distro/groq.md @@ -61,6 +61,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-groq \ --port $LLAMA_STACK_PORT \ diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md index b8d1b1714..c61d21634 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md @@ -80,6 +80,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ llamastack/distribution-meta-reference-gpu \ @@ -92,6 +93,7 @@ If you are using Llama Stack Safety / Shield APIs, use: ```bash docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ llamastack/distribution-meta-reference-gpu \ diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md index a49175e22..aec4f4e92 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md @@ -80,6 +80,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ llamastack/distribution-meta-reference-quantized-gpu \ @@ -92,6 +93,7 @@ If you are using Llama Stack Safety / Shield APIs, use: ```bash docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ llamastack/distribution-meta-reference-quantized-gpu \ diff --git a/docs/source/distributions/self_hosted_distro/nvidia.md b/docs/source/distributions/self_hosted_distro/nvidia.md index b86d950dd..28d873a9e 100644 --- a/docs/source/distributions/self_hosted_distro/nvidia.md +++ b/docs/source/distributions/self_hosted_distro/nvidia.md @@ -42,6 +42,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ./run.yaml:/root/my-run.yaml \ llamastack/distribution-nvidia \ diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md index 9bfa4211c..b02870797 100644 --- a/docs/source/distributions/self_hosted_distro/ollama.md +++ b/docs/source/distributions/self_hosted_distro/ollama.md @@ -74,6 +74,7 @@ This method allows you to get started quickly without having to build the distri export LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ llamastack/distribution-ollama \ @@ -91,6 +92,7 @@ cd /path/to/llama-stack docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \ diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md index 643627fad..169c9a087 100644 --- a/docs/source/distributions/self_hosted_distro/remote-vllm.md +++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md @@ -49,6 +49,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct export CUDA_VISIBLE_DEVICES=0 docker run \ + --pull always \ --runtime nvidia \ --gpus $CUDA_VISIBLE_DEVICES \ -v ~/.cache/huggingface:/root/.cache/huggingface \ @@ -71,6 +72,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B export CUDA_VISIBLE_DEVICES=1 docker run \ + --pull always \ --runtime nvidia \ --gpus $CUDA_VISIBLE_DEVICES \ -v ~/.cache/huggingface:/root/.cache/huggingface \ @@ -98,6 +100,7 @@ export LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ./run.yaml:/root/my-run.yaml \ llamastack/distribution-remote-vllm \ @@ -119,6 +122,7 @@ cd /path/to/llama-stack docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \ diff --git a/docs/source/distributions/self_hosted_distro/sambanova.md b/docs/source/distributions/self_hosted_distro/sambanova.md index a7f738261..5ef8be4cd 100644 --- a/docs/source/distributions/self_hosted_distro/sambanova.md +++ b/docs/source/distributions/self_hosted_distro/sambanova.md @@ -62,6 +62,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-sambanova \ --port $LLAMA_STACK_PORT \ diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md index e126f9a08..30ca6e22b 100644 --- a/docs/source/distributions/self_hosted_distro/tgi.md +++ b/docs/source/distributions/self_hosted_distro/tgi.md @@ -50,6 +50,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct export CUDA_VISIBLE_DEVICES=0 docker run --rm -it \ + --pull always \ -v $HOME/.cache/huggingface:/data \ -p $INFERENCE_PORT:$INFERENCE_PORT \ --gpus $CUDA_VISIBLE_DEVICES \ @@ -70,6 +71,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B export CUDA_VISIBLE_DEVICES=1 docker run --rm -it \ + --pull always \ -v $HOME/.cache/huggingface:/data \ -p $SAFETY_PORT:$SAFETY_PORT \ --gpus $CUDA_VISIBLE_DEVICES \ @@ -93,6 +95,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-tgi \ --port $LLAMA_STACK_PORT \ @@ -109,6 +112,7 @@ cd /path/to/llama-stack docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \ diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index fa02199b0..11c37fd57 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -67,6 +67,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-together \ --port $LLAMA_STACK_PORT \ diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index f846c9ff0..e8ca05d76 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -54,6 +54,7 @@ mkdir -p ~/.llama Then you can start the server using the container tool of your choice. For example, if you are running Docker you can use the following command: ```bash docker run -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ llamastack/distribution-ollama \ @@ -74,6 +75,7 @@ Docker containers run in their own isolated network namespaces on Linux. To allo Linux users having issues running the above command should instead try the following: ```bash docker run -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ --network=host \ diff --git a/docs/source/playground/index.md b/docs/source/playground/index.md index 1d52de73f..2940ff988 100644 --- a/docs/source/playground/index.md +++ b/docs/source/playground/index.md @@ -118,6 +118,7 @@ Playground can also be started in a docker image: export LLAMA_STACK_URL=http://localhost:11434 docker run \ + --pull always \ -p 8501:8501 \ -e LLAMA_STACK_ENDPOINT=$LLAMA_STACK_URL \ quay.io/jland/llama-stack-playground diff --git a/llama_stack/templates/bedrock/doc_template.md b/llama_stack/templates/bedrock/doc_template.md index 24106525a..c18dedf68 100644 --- a/llama_stack/templates/bedrock/doc_template.md +++ b/llama_stack/templates/bedrock/doc_template.md @@ -50,6 +50,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ diff --git a/llama_stack/templates/cerebras/doc_template.md b/llama_stack/templates/cerebras/doc_template.md index 3f5645958..eac690fc8 100644 --- a/llama_stack/templates/cerebras/doc_template.md +++ b/llama_stack/templates/cerebras/doc_template.md @@ -42,6 +42,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ./run.yaml:/root/my-run.yaml \ llamastack/distribution-{{ name }} \ diff --git a/llama_stack/templates/dell/doc_template.md b/llama_stack/templates/dell/doc_template.md index 34377de43..26f07130b 100644 --- a/llama_stack/templates/dell/doc_template.md +++ b/llama_stack/templates/dell/doc_template.md @@ -43,6 +43,7 @@ export CUDA_VISIBLE_DEVICES=0 export LLAMA_STACK_PORT=8321 docker run --rm -it \ + --pull always \ --network host \ -v $HOME/.cache/huggingface:/data \ -e HF_TOKEN=$HF_TOKEN \ @@ -66,6 +67,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B export CUDA_VISIBLE_DEVICES=1 docker run --rm -it \ + --pull always \ --network host \ -v $HOME/.cache/huggingface:/data \ -e HF_TOKEN=$HF_TOKEN \ @@ -108,6 +110,7 @@ This method allows you to get started quickly without having to build the distri ```bash docker run -it \ + --pull always \ --network host \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v $HOME/.llama:/root/.llama \ @@ -135,6 +138,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v $HOME/.llama:/root/.llama \ -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \ diff --git a/llama_stack/templates/fireworks/doc_template.md b/llama_stack/templates/fireworks/doc_template.md index 6c7743cb8..6bc6c32e5 100644 --- a/llama_stack/templates/fireworks/doc_template.md +++ b/llama_stack/templates/fireworks/doc_template.md @@ -52,6 +52,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ diff --git a/llama_stack/templates/groq/doc_template.md b/llama_stack/templates/groq/doc_template.md index 85b916ccd..c09742a38 100644 --- a/llama_stack/templates/groq/doc_template.md +++ b/llama_stack/templates/groq/doc_template.md @@ -52,6 +52,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md index 87438fb6d..015df3817 100644 --- a/llama_stack/templates/meta-reference-gpu/doc_template.md +++ b/llama_stack/templates/meta-reference-gpu/doc_template.md @@ -68,6 +68,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ llamastack/distribution-{{ name }} \ @@ -80,6 +81,7 @@ If you are using Llama Stack Safety / Shield APIs, use: ```bash docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ llamastack/distribution-{{ name }} \ diff --git a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md index e8dfaaf3c..7d979ecef 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md +++ b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md @@ -70,6 +70,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ llamastack/distribution-{{ name }} \ @@ -82,6 +83,7 @@ If you are using Llama Stack Safety / Shield APIs, use: ```bash docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ llamastack/distribution-{{ name }} \ diff --git a/llama_stack/templates/nvidia/doc_template.md b/llama_stack/templates/nvidia/doc_template.md index 71b8ac32f..efbedda5b 100644 --- a/llama_stack/templates/nvidia/doc_template.md +++ b/llama_stack/templates/nvidia/doc_template.md @@ -42,6 +42,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ./run.yaml:/root/my-run.yaml \ llamastack/distribution-{{ name }} \ diff --git a/llama_stack/templates/ollama/doc_template.md b/llama_stack/templates/ollama/doc_template.md index 8964260a6..925c3bb0a 100644 --- a/llama_stack/templates/ollama/doc_template.md +++ b/llama_stack/templates/ollama/doc_template.md @@ -63,6 +63,7 @@ This method allows you to get started quickly without having to build the distri export LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ llamastack/distribution-{{ name }} \ @@ -80,6 +81,7 @@ cd /path/to/llama-stack docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ -v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \ diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md index 8abef18fb..33d50c687 100644 --- a/llama_stack/templates/remote-vllm/doc_template.md +++ b/llama_stack/templates/remote-vllm/doc_template.md @@ -36,6 +36,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct export CUDA_VISIBLE_DEVICES=0 docker run \ + --pull always \ --runtime nvidia \ --gpus $CUDA_VISIBLE_DEVICES \ -v ~/.cache/huggingface:/root/.cache/huggingface \ @@ -58,6 +59,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B export CUDA_VISIBLE_DEVICES=1 docker run \ + --pull always \ --runtime nvidia \ --gpus $CUDA_VISIBLE_DEVICES \ -v ~/.cache/huggingface:/root/.cache/huggingface \ @@ -85,6 +87,7 @@ export LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ./run.yaml:/root/my-run.yaml \ llamastack/distribution-{{ name }} \ @@ -106,6 +109,7 @@ cd /path/to/llama-stack docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ -v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \ diff --git a/llama_stack/templates/sambanova/doc_template.md b/llama_stack/templates/sambanova/doc_template.md index b2a295716..f20d14988 100644 --- a/llama_stack/templates/sambanova/doc_template.md +++ b/llama_stack/templates/sambanova/doc_template.md @@ -52,6 +52,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ diff --git a/llama_stack/templates/tgi/doc_template.md b/llama_stack/templates/tgi/doc_template.md index 32988cf57..ad20727cd 100644 --- a/llama_stack/templates/tgi/doc_template.md +++ b/llama_stack/templates/tgi/doc_template.md @@ -38,6 +38,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct export CUDA_VISIBLE_DEVICES=0 docker run --rm -it \ + --pull always \ -v $HOME/.cache/huggingface:/data \ -p $INFERENCE_PORT:$INFERENCE_PORT \ --gpus $CUDA_VISIBLE_DEVICES \ @@ -58,6 +59,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B export CUDA_VISIBLE_DEVICES=1 docker run --rm -it \ + --pull always \ -v $HOME/.cache/huggingface:/data \ -p $SAFETY_PORT:$SAFETY_PORT \ --gpus $CUDA_VISIBLE_DEVICES \ @@ -81,6 +83,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ @@ -97,6 +100,7 @@ cd /path/to/llama-stack docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ -v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \ diff --git a/llama_stack/templates/together/doc_template.md b/llama_stack/templates/together/doc_template.md index be055a43e..b306e5cac 100644 --- a/llama_stack/templates/together/doc_template.md +++ b/llama_stack/templates/together/doc_template.md @@ -52,6 +52,7 @@ This method allows you to get started quickly without having to build the distri LLAMA_STACK_PORT=5001 docker run \ -it \ + --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \