From e2054d53e4aa6b1a8949bd7107e2099aeaf07978 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Tue, 10 Dec 2024 10:22:04 -0800 Subject: [PATCH] Fix issue 586 (#594) # What does this PR do? - Addresses issue (#586 ) ## Test Plan ``` python llama_stack/scripts/distro_codegen.py ``` ## Before submitting - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [ ] Ran pre-commit to handle lint / formatting issues. - [ ] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [ ] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests. --- .../distributions/self_hosted_distro/meta-reference-gpu.md | 2 ++ .../self_hosted_distro/meta-reference-quantized-gpu.md | 2 ++ llama_stack/templates/meta-reference-gpu/doc_template.md | 2 ++ .../templates/meta-reference-quantized-gpu/doc_template.md | 2 ++ 4 files changed, 8 insertions(+) diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md index 73d6befd4..d46039318 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md @@ -60,6 +60,7 @@ LLAMA_STACK_PORT=5001 docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-meta-reference-gpu \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct @@ -71,6 +72,7 @@ If you are using Llama Stack Safety / Shield APIs, use: docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-meta-reference-gpu \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md index fab9c6cd8..837be744a 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md @@ -60,6 +60,7 @@ LLAMA_STACK_PORT=5001 docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-meta-reference-quantized-gpu \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct @@ -71,6 +72,7 @@ If you are using Llama Stack Safety / Shield APIs, use: docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-meta-reference-quantized-gpu \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md index f9870adbd..421812dbc 100644 --- a/llama_stack/templates/meta-reference-gpu/doc_template.md +++ b/llama_stack/templates/meta-reference-gpu/doc_template.md @@ -50,6 +50,7 @@ LLAMA_STACK_PORT=5001 docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct @@ -61,6 +62,7 @@ If you are using Llama Stack Safety / Shield APIs, use: docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ diff --git a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md index 9e3c56d92..daa380d20 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md +++ b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md @@ -52,6 +52,7 @@ LLAMA_STACK_PORT=5001 docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct @@ -63,6 +64,7 @@ If you are using Llama Stack Safety / Shield APIs, use: docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ llamastack/distribution-{{ name }} \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \