forked from phoenix-oss/llama-stack-mirror
add support for ${env.FOO_BAR} placeholders in run.yaml files (#439)
# What does this PR do? We'd like our docker steps to require _ZERO EDITS_ to a YAML file in order to get going. This is often not possible because depending on the provider, we do need some configuration input from the user. Environment variables are the best way to obtain this information. This PR allows our run.yaml to contain `${env.FOO_BAR}` placeholders which can be replaced using `docker run -e FOO_BAR=baz` (and similar `docker compose` equivalent). ## Test Plan For remote-vllm, example `run.yaml` snippet looks like this: ```yaml providers: inference: # serves main inference model - provider_id: vllm-0 provider_type: remote::vllm config: # NOTE: replace with "localhost" if you are running in "host" network mode url: ${env.LLAMA_INFERENCE_VLLM_URL:http://host.docker.internal:5100/v1} max_tokens: ${env.MAX_TOKENS:4096} api_token: fake # serves safety llama_guard model - provider_id: vllm-1 provider_type: remote::vllm config: # NOTE: replace with "localhost" if you are running in "host" network mode url: ${env.LLAMA_SAFETY_VLLM_URL:http://host.docker.internal:5101/v1} max_tokens: ${env.MAX_TOKENS:4096} api_token: fake ``` `compose.yaml` snippet looks like this: ```yaml llamastack: depends_on: - vllm-0 - vllm-1 # image: llamastack/distribution-remote-vllm image: llamastack/distribution-remote-vllm:test-0.0.52rc3 volumes: - ~/.llama:/root/.llama - ~/local/llama-stack/distributions/remote-vllm/run.yaml:/root/llamastack-run-remote-vllm.yaml # network_mode: "host" environment: - LLAMA_INFERENCE_VLLM_URL=${LLAMA_INFERENCE_VLLM_URL:-http://host.docker.internal:5100/v1} - LLAMA_INFERENCE_MODEL=${LLAMA_INFERENCE_MODEL:-Llama3.1-8B-Instruct} - MAX_TOKENS=${MAX_TOKENS:-4096} - SQLITE_STORE_DIR=${SQLITE_STORE_DIR:-$HOME/.llama/distributions/remote-vllm} - LLAMA_SAFETY_VLLM_URL=${LLAMA_SAFETY_VLLM_URL:-http://host.docker.internal:5101/v1} - LLAMA_SAFETY_MODEL=${LLAMA_SAFETY_MODEL:-Llama-Guard-3-1B} ```
This commit is contained in:
parent
838b8d4fb5
commit
96e7ef646f
3 changed files with 73 additions and 11 deletions
|
@ -71,6 +71,13 @@ services:
|
|||
- ~/.llama:/root/.llama
|
||||
- ~/local/llama-stack/distributions/remote-vllm/run.yaml:/root/llamastack-run-remote-vllm.yaml
|
||||
# network_mode: "host"
|
||||
environment:
|
||||
- LLAMA_INFERENCE_VLLM_URL=${LLAMA_INFERENCE_VLLM_URL:-http://host.docker.internal:5100/v1}
|
||||
- LLAMA_INFERENCE_MODEL=${LLAMA_INFERENCE_MODEL:-Llama3.1-8B-Instruct}
|
||||
- MAX_TOKENS=${MAX_TOKENS:-4096}
|
||||
- SQLITE_STORE_DIR=${SQLITE_STORE_DIR:-$HOME/.llama/distributions/remote-vllm}
|
||||
- LLAMA_SAFETY_VLLM_URL=${LLAMA_SAFETY_VLLM_URL:-http://host.docker.internal:5101/v1}
|
||||
- LLAMA_SAFETY_MODEL=${LLAMA_SAFETY_MODEL:-Llama-Guard-3-1B}
|
||||
ports:
|
||||
- "5001:5001"
|
||||
# Hack: wait for vLLM server to start before starting docker
|
||||
|
|
|
@ -16,16 +16,16 @@ providers:
|
|||
provider_type: remote::vllm
|
||||
config:
|
||||
# NOTE: replace with "localhost" if you are running in "host" network mode
|
||||
url: http://host.docker.internal:5100/v1
|
||||
max_tokens: 4096
|
||||
url: ${env.LLAMA_INFERENCE_VLLM_URL:http://host.docker.internal:5100/v1}
|
||||
max_tokens: ${env.MAX_TOKENS:4096}
|
||||
api_token: fake
|
||||
# serves safety llama_guard model
|
||||
- provider_id: vllm-1
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
# NOTE: replace with "localhost" if you are running in "host" network mode
|
||||
url: http://host.docker.internal:5101/v1
|
||||
max_tokens: 4096
|
||||
url: ${env.LLAMA_SAFETY_VLLM_URL:http://host.docker.internal:5101/v1}
|
||||
max_tokens: ${env.MAX_TOKENS:4096}
|
||||
api_token: fake
|
||||
memory:
|
||||
- provider_id: faiss-0
|
||||
|
@ -34,7 +34,7 @@ providers:
|
|||
kvstore:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: /home/ashwin/.llama/distributions/remote-vllm/faiss_store.db
|
||||
db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/faiss_store.db"
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
@ -50,7 +50,7 @@ providers:
|
|||
persistence_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: /home/ashwin/.llama/distributions/remote-vllm/agents_store.db
|
||||
db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/agents_store.db"
|
||||
telemetry:
|
||||
- provider_id: meta0
|
||||
provider_type: inline::meta-reference
|
||||
|
@ -58,11 +58,11 @@ providers:
|
|||
metadata_store:
|
||||
namespace: null
|
||||
type: sqlite
|
||||
db_path: /home/ashwin/.llama/distributions/remote-vllm/registry.db
|
||||
db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/registry.db"
|
||||
models:
|
||||
- model_id: Llama3.1-8B-Instruct
|
||||
- model_id: ${env.LLAMA_INFERENCE_MODEL:Llama3.1-8B-Instruct}
|
||||
provider_id: vllm-0
|
||||
- model_id: Llama-Guard-3-1B
|
||||
- model_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}
|
||||
provider_id: vllm-1
|
||||
shields:
|
||||
- shield_id: Llama-Guard-3-1B
|
||||
- shield_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue