diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md index 73d6befd4..d46039318 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md @@ -60,6 +60,7 @@ LLAMA_STACK_PORT=5001 docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-meta-reference-gpu \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct @@ -71,6 +72,7 @@ If you are using Llama Stack Safety / Shield APIs, use: docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-meta-reference-gpu \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md index fab9c6cd8..837be744a 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md @@ -60,6 +60,7 @@ LLAMA_STACK_PORT=5001 docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-meta-reference-quantized-gpu \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct @@ -71,6 +72,7 @@ If you are using Llama Stack Safety / Shield APIs, use: docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-meta-reference-quantized-gpu \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md index f9870adbd..421812dbc 100644 --- a/llama_stack/templates/meta-reference-gpu/doc_template.md +++ b/llama_stack/templates/meta-reference-gpu/doc_template.md @@ -50,6 +50,7 @@ LLAMA_STACK_PORT=5001 docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct @@ -61,6 +62,7 @@ If you are using Llama Stack Safety / Shield APIs, use: docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ diff --git a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md index 9e3c56d92..daa380d20 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md +++ b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md @@ -52,6 +52,7 @@ LLAMA_STACK_PORT=5001 docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct @@ -63,6 +64,7 @@ If you are using Llama Stack Safety / Shield APIs, use: docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \