mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
Update remote vllm docs
This commit is contained in:
parent
fb15ff4a97
commit
afa4f0b19f
2 changed files with 18 additions and 12 deletions
|
@ -88,7 +88,7 @@ docker run \
|
||||||
/root/my-run.yaml \
|
/root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
--env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT \
|
--env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
@ -105,9 +105,9 @@ docker run \
|
||||||
/root/my-run.yaml \
|
/root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
--env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT \
|
--env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1 \
|
||||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
--env SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
--env VLLM_SAFETY_URL=http://host.docker.internal:$SAFETY_PORT
|
--env VLLM_SAFETY_URL=http://host.docker.internal:$SAFETY_PORT/v1
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -126,16 +126,19 @@ llama stack build --template remote-vllm --image-type conda
|
||||||
llama stack run ./run.yaml \
|
llama stack run ./run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
--env VLLM_URL=http://127.0.0.1:$INFERENCE_PORT
|
--env VLLM_URL=http://127.0.0.1:$INFERENCE_PORT/v1
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
export SAFETY_PORT=8081
|
||||||
|
export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||||
|
|
||||||
llama stack run ./run-with-safety.yaml \
|
llama stack run ./run-with-safety.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
--env VLLM_URL=http://127.0.0.1:$INFERENCE_PORT \
|
--env VLLM_URL=http://127.0.0.1:$INFERENCE_PORT/v1 \
|
||||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
--env SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
--env VLLM_SAFETY_URL=http://127.0.0.1:$SAFETY_PORT
|
--env VLLM_SAFETY_URL=http://127.0.0.1:$SAFETY_PORT/v1
|
||||||
```
|
```
|
||||||
|
|
|
@ -80,7 +80,7 @@ docker run \
|
||||||
/root/my-run.yaml \
|
/root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
--env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT \
|
--env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
@ -97,9 +97,9 @@ docker run \
|
||||||
/root/my-run.yaml \
|
/root/my-run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
--env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT \
|
--env VLLM_URL=http://host.docker.internal:$INFERENCE_PORT/v1 \
|
||||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
--env SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
--env VLLM_SAFETY_URL=http://host.docker.internal:$SAFETY_PORT
|
--env VLLM_SAFETY_URL=http://host.docker.internal:$SAFETY_PORT/v1
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -118,16 +118,19 @@ llama stack build --template remote-vllm --image-type conda
|
||||||
llama stack run ./run.yaml \
|
llama stack run ./run.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
--env VLLM_URL=http://127.0.0.1:$INFERENCE_PORT
|
--env VLLM_URL=http://127.0.0.1:$INFERENCE_PORT/v1
|
||||||
```
|
```
|
||||||
|
|
||||||
If you are using Llama Stack Safety / Shield APIs, use:
|
If you are using Llama Stack Safety / Shield APIs, use:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
export SAFETY_PORT=8081
|
||||||
|
export SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
|
||||||
|
|
||||||
llama stack run ./run-with-safety.yaml \
|
llama stack run ./run-with-safety.yaml \
|
||||||
--port $LLAMA_STACK_PORT \
|
--port $LLAMA_STACK_PORT \
|
||||||
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
--env INFERENCE_MODEL=$INFERENCE_MODEL \
|
||||||
--env VLLM_URL=http://127.0.0.1:$INFERENCE_PORT \
|
--env VLLM_URL=http://127.0.0.1:$INFERENCE_PORT/v1 \
|
||||||
--env SAFETY_MODEL=$SAFETY_MODEL \
|
--env SAFETY_MODEL=$SAFETY_MODEL \
|
||||||
--env VLLM_SAFETY_URL=http://127.0.0.1:$SAFETY_PORT
|
--env VLLM_SAFETY_URL=http://127.0.0.1:$SAFETY_PORT/v1
|
||||||
```
|
```
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue