forked from phoenix-oss/llama-stack-mirror
fix: docker run with --pull always
to fetch the latest image (#1733)
As titled
This commit is contained in:
parent
f95bc29ca9
commit
581e8ae562
31 changed files with 57 additions and 3 deletions
|
@ -57,7 +57,7 @@ The `otel` sink works with any service compatible with the OpenTelemetry collect
|
|||
Start a Jaeger instance with the OTLP HTTP endpoint at 4318 and the Jaeger UI at 16686 using the following command:
|
||||
|
||||
```bash
|
||||
$ docker run --rm --name jaeger \
|
||||
$ docker run --pull always --rm --name jaeger \
|
||||
-p 16686:16686 -p 4318:4318 \
|
||||
jaegertracing/jaeger:2.1.0
|
||||
```
|
||||
|
|
|
@ -61,6 +61,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-nvidia \
|
||||
|
|
|
@ -56,6 +56,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-bedrock \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
|
|
@ -48,6 +48,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-cerebras \
|
||||
|
|
|
@ -53,7 +53,7 @@ docker compose down
|
|||
|
||||
#### Start Dell-TGI server locally
|
||||
```
|
||||
docker run -it --shm-size 1g -p 80:80 --gpus 4 \
|
||||
docker run -it --pull always --shm-size 1g -p 80:80 --gpus 4 \
|
||||
-e NUM_SHARD=4
|
||||
-e MAX_BATCH_PREFILL_TOKENS=32768 \
|
||||
-e MAX_INPUT_TOKENS=8000 \
|
||||
|
@ -65,7 +65,7 @@ registry.dell.huggingface.co/enterprise-dell-inference-meta-llama-meta-llama-3.1
|
|||
#### Start Llama Stack server pointing to TGI server
|
||||
|
||||
```
|
||||
docker run --network host -it -p 8321:8321 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-run.yaml
|
||||
docker run --pull always --network host -it -p 8321:8321 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-tgi --yaml_config /root/my-run.yaml
|
||||
```
|
||||
|
||||
Make sure in you `run.yaml` file, you inference provider is pointing to the correct TGI server endpoint. E.g.
|
||||
|
|
|
@ -55,6 +55,7 @@ export CUDA_VISIBLE_DEVICES=0
|
|||
export LLAMA_STACK_PORT=8321
|
||||
|
||||
docker run --rm -it \
|
||||
--pull always \
|
||||
--network host \
|
||||
-v $HOME/.cache/huggingface:/data \
|
||||
-e HF_TOKEN=$HF_TOKEN \
|
||||
|
@ -78,6 +79,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
|||
export CUDA_VISIBLE_DEVICES=1
|
||||
|
||||
docker run --rm -it \
|
||||
--pull always \
|
||||
--network host \
|
||||
-v $HOME/.cache/huggingface:/data \
|
||||
-e HF_TOKEN=$HF_TOKEN \
|
||||
|
@ -120,6 +122,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull always \
|
||||
--network host \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v $HOME/.llama:/root/.llama \
|
||||
|
@ -147,6 +150,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
|||
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v $HOME/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||
|
|
|
@ -66,6 +66,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-fireworks \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
|
|
@ -61,6 +61,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-groq \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
|
|
@ -80,6 +80,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
llamastack/distribution-meta-reference-gpu \
|
||||
|
@ -92,6 +93,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
|
|||
```bash
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
llamastack/distribution-meta-reference-gpu \
|
||||
|
|
|
@ -80,6 +80,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
llamastack/distribution-meta-reference-quantized-gpu \
|
||||
|
@ -92,6 +93,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
|
|||
```bash
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
llamastack/distribution-meta-reference-quantized-gpu \
|
||||
|
|
|
@ -42,6 +42,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-nvidia \
|
||||
|
|
|
@ -74,6 +74,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
export LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
llamastack/distribution-ollama \
|
||||
|
@ -91,6 +92,7 @@ cd /path/to/llama-stack
|
|||
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
|
||||
|
|
|
@ -49,6 +49,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
|||
export CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
docker run \
|
||||
--pull always \
|
||||
--runtime nvidia \
|
||||
--gpus $CUDA_VISIBLE_DEVICES \
|
||||
-v ~/.cache/huggingface:/root/.cache/huggingface \
|
||||
|
@ -71,6 +72,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
|||
export CUDA_VISIBLE_DEVICES=1
|
||||
|
||||
docker run \
|
||||
--pull always \
|
||||
--runtime nvidia \
|
||||
--gpus $CUDA_VISIBLE_DEVICES \
|
||||
-v ~/.cache/huggingface:/root/.cache/huggingface \
|
||||
|
@ -98,6 +100,7 @@ export LLAMA_STACK_PORT=5001
|
|||
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-remote-vllm \
|
||||
|
@ -119,6 +122,7 @@ cd /path/to/llama-stack
|
|||
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
|
||||
|
|
|
@ -62,6 +62,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-sambanova \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
|
|
@ -50,6 +50,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
|||
export CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
docker run --rm -it \
|
||||
--pull always \
|
||||
-v $HOME/.cache/huggingface:/data \
|
||||
-p $INFERENCE_PORT:$INFERENCE_PORT \
|
||||
--gpus $CUDA_VISIBLE_DEVICES \
|
||||
|
@ -70,6 +71,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
|||
export CUDA_VISIBLE_DEVICES=1
|
||||
|
||||
docker run --rm -it \
|
||||
--pull always \
|
||||
-v $HOME/.cache/huggingface:/data \
|
||||
-p $SAFETY_PORT:$SAFETY_PORT \
|
||||
--gpus $CUDA_VISIBLE_DEVICES \
|
||||
|
@ -93,6 +95,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-tgi \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
@ -109,6 +112,7 @@ cd /path/to/llama-stack
|
|||
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||
|
|
|
@ -67,6 +67,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-together \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
|
|
@ -54,6 +54,7 @@ mkdir -p ~/.llama
|
|||
Then you can start the server using the container tool of your choice. For example, if you are running Docker you can use the following command:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
llamastack/distribution-ollama \
|
||||
|
@ -74,6 +75,7 @@ Docker containers run in their own isolated network namespaces on Linux. To allo
|
|||
Linux users having issues running the above command should instead try the following:
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
--network=host \
|
||||
|
|
|
@ -118,6 +118,7 @@ Playground can also be started in a docker image:
|
|||
export LLAMA_STACK_URL=http://localhost:11434
|
||||
|
||||
docker run \
|
||||
--pull always \
|
||||
-p 8501:8501 \
|
||||
-e LLAMA_STACK_ENDPOINT=$LLAMA_STACK_URL \
|
||||
quay.io/jland/llama-stack-playground
|
||||
|
|
|
@ -50,6 +50,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
|
|
@ -42,6 +42,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
|
|
|
@ -43,6 +43,7 @@ export CUDA_VISIBLE_DEVICES=0
|
|||
export LLAMA_STACK_PORT=8321
|
||||
|
||||
docker run --rm -it \
|
||||
--pull always \
|
||||
--network host \
|
||||
-v $HOME/.cache/huggingface:/data \
|
||||
-e HF_TOKEN=$HF_TOKEN \
|
||||
|
@ -66,6 +67,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
|||
export CUDA_VISIBLE_DEVICES=1
|
||||
|
||||
docker run --rm -it \
|
||||
--pull always \
|
||||
--network host \
|
||||
-v $HOME/.cache/huggingface:/data \
|
||||
-e HF_TOKEN=$HF_TOKEN \
|
||||
|
@ -108,6 +110,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
|
||||
```bash
|
||||
docker run -it \
|
||||
--pull always \
|
||||
--network host \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v $HOME/.llama:/root/.llama \
|
||||
|
@ -135,6 +138,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
|||
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v $HOME/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||
|
|
|
@ -52,6 +52,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
|
|
@ -52,6 +52,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
|
|
@ -68,6 +68,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
llamastack/distribution-{{ name }} \
|
||||
|
@ -80,6 +81,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
|
|||
```bash
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
llamastack/distribution-{{ name }} \
|
||||
|
|
|
@ -70,6 +70,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
llamastack/distribution-{{ name }} \
|
||||
|
@ -82,6 +83,7 @@ If you are using Llama Stack Safety / Shield APIs, use:
|
|||
```bash
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
llamastack/distribution-{{ name }} \
|
||||
|
|
|
@ -42,6 +42,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
|
|
|
@ -63,6 +63,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
export LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
llamastack/distribution-{{ name }} \
|
||||
|
@ -80,6 +81,7 @@ cd /path/to/llama-stack
|
|||
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/ollama/run-with-safety.yaml:/root/my-run.yaml \
|
||||
|
|
|
@ -36,6 +36,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
|||
export CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
docker run \
|
||||
--pull always \
|
||||
--runtime nvidia \
|
||||
--gpus $CUDA_VISIBLE_DEVICES \
|
||||
-v ~/.cache/huggingface:/root/.cache/huggingface \
|
||||
|
@ -58,6 +59,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
|||
export CUDA_VISIBLE_DEVICES=1
|
||||
|
||||
docker run \
|
||||
--pull always \
|
||||
--runtime nvidia \
|
||||
--gpus $CUDA_VISIBLE_DEVICES \
|
||||
-v ~/.cache/huggingface:/root/.cache/huggingface \
|
||||
|
@ -85,6 +87,7 @@ export LLAMA_STACK_PORT=5001
|
|||
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-{{ name }} \
|
||||
|
@ -106,6 +109,7 @@ cd /path/to/llama-stack
|
|||
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/remote-vllm/run-with-safety.yaml:/root/my-run.yaml \
|
||||
|
|
|
@ -52,6 +52,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
|
|
@ -38,6 +38,7 @@ export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
|||
export CUDA_VISIBLE_DEVICES=0
|
||||
|
||||
docker run --rm -it \
|
||||
--pull always \
|
||||
-v $HOME/.cache/huggingface:/data \
|
||||
-p $INFERENCE_PORT:$INFERENCE_PORT \
|
||||
--gpus $CUDA_VISIBLE_DEVICES \
|
||||
|
@ -58,6 +59,7 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
|||
export CUDA_VISIBLE_DEVICES=1
|
||||
|
||||
docker run --rm -it \
|
||||
--pull always \
|
||||
-v $HOME/.cache/huggingface:/data \
|
||||
-p $SAFETY_PORT:$SAFETY_PORT \
|
||||
--gpus $CUDA_VISIBLE_DEVICES \
|
||||
|
@ -81,6 +83,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
@ -97,6 +100,7 @@ cd /path/to/llama-stack
|
|||
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v ./llama_stack/templates/tgi/run-with-safety.yaml:/root/my-run.yaml \
|
||||
|
|
|
@ -52,6 +52,7 @@ This method allows you to get started quickly without having to build the distri
|
|||
LLAMA_STACK_PORT=5001
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
llamastack/distribution-{{ name }} \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue