From 935e706b152ed332361240efc615433494f49d8d Mon Sep 17 00:00:00 2001 From: Dmitry Rogozhkin Date: Thu, 27 Mar 2025 07:19:51 -0700 Subject: [PATCH] docs: fix remote-vllm instructions (#1805) # What does this PR do? * Fix location of `run.yaml` relative to the cloned llama stack repository * Drop `-it` from `docker run` commands as its not needed running services ## Test Plan * Verified running the llama stack following updated instruction CC: @ashwinb Signed-off-by: Dmitry Rogozhkin --- .../distributions/self_hosted_distro/remote-vllm.md | 8 +++++--- llama_stack/templates/remote-vllm/doc_template.md | 8 +++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md index a8cac4971..b6e8a8ad4 100644 --- a/docs/source/distributions/self_hosted_distro/remote-vllm.md +++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md @@ -98,11 +98,14 @@ export INFERENCE_PORT=8000 export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct export LLAMA_STACK_PORT=8321 +# You need a local checkout of llama-stack to run this, get it using +# git clone https://github.com/meta-llama/llama-stack.git +cd /path/to/llama-stack + docker run \ - -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run.yaml:/root/my-run.yaml \ + -v ./llama_stack/templates/remote-vllm/run.yaml:/root/my-run.yaml \ llamastack/distribution-remote-vllm \ --yaml-config /root/my-run.yaml \ --port $LLAMA_STACK_PORT \ @@ -121,7 +124,6 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B cd /path/to/llama-stack docker run \ - -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md index 4d585bc2d..57c9f116c 100644 --- a/llama_stack/templates/remote-vllm/doc_template.md +++ b/llama_stack/templates/remote-vllm/doc_template.md @@ -85,11 +85,14 @@ export INFERENCE_PORT=8000 export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct export LLAMA_STACK_PORT=8321 +# You need a local checkout of llama-stack to run this, get it using +# git clone https://github.com/meta-llama/llama-stack.git +cd /path/to/llama-stack + docker run \ - -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ./run.yaml:/root/my-run.yaml \ + -v ./llama_stack/templates/remote-vllm/run.yaml:/root/my-run.yaml \ llamastack/distribution-{{ name }} \ --yaml-config /root/my-run.yaml \ --port $LLAMA_STACK_PORT \ @@ -108,7 +111,6 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B cd /path/to/llama-stack docker run \ - -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \