docs: fix remote-vllm instructions (#1805)

# What does this PR do?

* Fix location of `run.yaml` relative to the cloned llama stack
repository
* Drop `-it` from `docker run` commands as its not needed running
services

## Test Plan

* Verified running the llama stack following updated instruction

CC: @ashwinb

Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
This commit is contained in:
Dmitry Rogozhkin 2025-03-27 07:19:51 -07:00 committed by GitHub
parent 9d9ab7e7dd
commit 935e706b15
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 10 additions and 6 deletions

View file

@ -98,11 +98,14 @@ export INFERENCE_PORT=8000
export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
export LLAMA_STACK_PORT=8321 export LLAMA_STACK_PORT=8321
# You need a local checkout of llama-stack to run this, get it using
# git clone https://github.com/meta-llama/llama-stack.git
cd /path/to/llama-stack
docker run \ docker run \
-it \
--pull always \ --pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ./run.yaml:/root/my-run.yaml \ -v ./llama_stack/templates/remote-vllm/run.yaml:/root/my-run.yaml \
llamastack/distribution-remote-vllm \ llamastack/distribution-remote-vllm \
--yaml-config /root/my-run.yaml \ --yaml-config /root/my-run.yaml \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
@ -121,7 +124,6 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
cd /path/to/llama-stack cd /path/to/llama-stack
docker run \ docker run \
-it \
--pull always \ --pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ~/.llama:/root/.llama \ -v ~/.llama:/root/.llama \

View file

@ -85,11 +85,14 @@ export INFERENCE_PORT=8000
export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
export LLAMA_STACK_PORT=8321 export LLAMA_STACK_PORT=8321
# You need a local checkout of llama-stack to run this, get it using
# git clone https://github.com/meta-llama/llama-stack.git
cd /path/to/llama-stack
docker run \ docker run \
-it \
--pull always \ --pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ./run.yaml:/root/my-run.yaml \ -v ./llama_stack/templates/remote-vllm/run.yaml:/root/my-run.yaml \
llamastack/distribution-{{ name }} \ llamastack/distribution-{{ name }} \
--yaml-config /root/my-run.yaml \ --yaml-config /root/my-run.yaml \
--port $LLAMA_STACK_PORT \ --port $LLAMA_STACK_PORT \
@ -108,7 +111,6 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
cd /path/to/llama-stack cd /path/to/llama-stack
docker run \ docker run \
-it \
--pull always \ --pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ~/.llama:/root/.llama \ -v ~/.llama:/root/.llama \