From 935e706b152ed332361240efc615433494f49d8d Mon Sep 17 00:00:00 2001
From: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Date: Thu, 27 Mar 2025 07:19:51 -0700
Subject: [PATCH] docs: fix remote-vllm instructions (#1805)

# What does this PR do?

* Fix location of `run.yaml` relative to the cloned llama stack
repository
* Drop `-it` from `docker run` commands as its not needed running
services

## Test Plan

* Verified running the llama stack following updated instruction

CC: @ashwinb

Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
---
 .../distributions/self_hosted_distro/remote-vllm.md       | 8 +++++---
 llama_stack/templates/remote-vllm/doc_template.md         | 8 +++++---
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md
index a8cac4971..b6e8a8ad4 100644
--- a/docs/source/distributions/self_hosted_distro/remote-vllm.md
+++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md
@@ -98,11 +98,14 @@ export INFERENCE_PORT=8000
 export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 export LLAMA_STACK_PORT=8321
 
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
-  -it \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
+  -v ./llama_stack/templates/remote-vllm/run.yaml:/root/my-run.yaml \
   llamastack/distribution-remote-vllm \
   --yaml-config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
@@ -121,7 +124,6 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 cd /path/to/llama-stack
 
 docker run \
-  -it \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
diff --git a/llama_stack/templates/remote-vllm/doc_template.md b/llama_stack/templates/remote-vllm/doc_template.md
index 4d585bc2d..57c9f116c 100644
--- a/llama_stack/templates/remote-vllm/doc_template.md
+++ b/llama_stack/templates/remote-vllm/doc_template.md
@@ -85,11 +85,14 @@ export INFERENCE_PORT=8000
 export INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
 export LLAMA_STACK_PORT=8321
 
+# You need a local checkout of llama-stack to run this, get it using
+# git clone https://github.com/meta-llama/llama-stack.git
+cd /path/to/llama-stack
+
 docker run \
-  -it \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ./run.yaml:/root/my-run.yaml \
+  -v ./llama_stack/templates/remote-vllm/run.yaml:/root/my-run.yaml \
   llamastack/distribution-{{ name }} \
   --yaml-config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
@@ -108,7 +111,6 @@ export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
 cd /path/to/llama-stack
 
 docker run \
-  -it \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \