add support for ${env.FOO_BAR} placeholders in run.yaml files (#439)

# What does this PR do?

We'd like our docker steps to require _ZERO EDITS_ to a YAML file in
order to get going. This is often not possible because depending on the
provider, we do need some configuration input from the user. Environment
variables are the best way to obtain this information.

This PR allows our run.yaml to contain `${env.FOO_BAR}` placeholders
which can be replaced using `docker run -e FOO_BAR=baz` (and similar
`docker compose` equivalent).

## Test Plan

For remote-vllm, example `run.yaml` snippet looks like this:
```yaml
providers:
  inference:
  # serves main inference model
  - provider_id: vllm-0
    provider_type: remote::vllm
    config:
      # NOTE: replace with "localhost" if you are running in "host" network mode
      url: ${env.LLAMA_INFERENCE_VLLM_URL:http://host.docker.internal:5100/v1}
      max_tokens: ${env.MAX_TOKENS:4096}
      api_token: fake
  # serves safety llama_guard model
  - provider_id: vllm-1
    provider_type: remote::vllm
    config:
      # NOTE: replace with "localhost" if you are running in "host" network mode
      url: ${env.LLAMA_SAFETY_VLLM_URL:http://host.docker.internal:5101/v1}
      max_tokens: ${env.MAX_TOKENS:4096}
      api_token: fake
```

`compose.yaml` snippet looks like this:
```yaml
llamastack:
    depends_on:
    - vllm-0
    - vllm-1
      # image: llamastack/distribution-remote-vllm
    image: llamastack/distribution-remote-vllm:test-0.0.52rc3
    volumes:
      - ~/.llama:/root/.llama
      - ~/local/llama-stack/distributions/remote-vllm/run.yaml:/root/llamastack-run-remote-vllm.yaml
    # network_mode: "host"
    environment:
      - LLAMA_INFERENCE_VLLM_URL=${LLAMA_INFERENCE_VLLM_URL:-http://host.docker.internal:5100/v1}
      - LLAMA_INFERENCE_MODEL=${LLAMA_INFERENCE_MODEL:-Llama3.1-8B-Instruct}
      - MAX_TOKENS=${MAX_TOKENS:-4096}
      - SQLITE_STORE_DIR=${SQLITE_STORE_DIR:-$HOME/.llama/distributions/remote-vllm}
      - LLAMA_SAFETY_VLLM_URL=${LLAMA_SAFETY_VLLM_URL:-http://host.docker.internal:5101/v1}
      - LLAMA_SAFETY_MODEL=${LLAMA_SAFETY_MODEL:-Llama-Guard-3-1B}
```
This commit is contained in:
Ashwin Bharambe 2024-11-13 11:25:58 -08:00 committed by GitHub
parent 838b8d4fb5
commit 96e7ef646f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 73 additions and 11 deletions

View file

@ -71,6 +71,13 @@ services:
- ~/.llama:/root/.llama
- ~/local/llama-stack/distributions/remote-vllm/run.yaml:/root/llamastack-run-remote-vllm.yaml
# network_mode: "host"
environment:
- LLAMA_INFERENCE_VLLM_URL=${LLAMA_INFERENCE_VLLM_URL:-http://host.docker.internal:5100/v1}
- LLAMA_INFERENCE_MODEL=${LLAMA_INFERENCE_MODEL:-Llama3.1-8B-Instruct}
- MAX_TOKENS=${MAX_TOKENS:-4096}
- SQLITE_STORE_DIR=${SQLITE_STORE_DIR:-$HOME/.llama/distributions/remote-vllm}
- LLAMA_SAFETY_VLLM_URL=${LLAMA_SAFETY_VLLM_URL:-http://host.docker.internal:5101/v1}
- LLAMA_SAFETY_MODEL=${LLAMA_SAFETY_MODEL:-Llama-Guard-3-1B}
ports:
- "5001:5001"
# Hack: wait for vLLM server to start before starting docker

View file

@ -16,16 +16,16 @@ providers:
provider_type: remote::vllm
config:
# NOTE: replace with "localhost" if you are running in "host" network mode
url: http://host.docker.internal:5100/v1
max_tokens: 4096
url: ${env.LLAMA_INFERENCE_VLLM_URL:http://host.docker.internal:5100/v1}
max_tokens: ${env.MAX_TOKENS:4096}
api_token: fake
# serves safety llama_guard model
- provider_id: vllm-1
provider_type: remote::vllm
config:
# NOTE: replace with "localhost" if you are running in "host" network mode
url: http://host.docker.internal:5101/v1
max_tokens: 4096
url: ${env.LLAMA_SAFETY_VLLM_URL:http://host.docker.internal:5101/v1}
max_tokens: ${env.MAX_TOKENS:4096}
api_token: fake
memory:
- provider_id: faiss-0
@ -34,7 +34,7 @@ providers:
kvstore:
namespace: null
type: sqlite
db_path: /home/ashwin/.llama/distributions/remote-vllm/faiss_store.db
db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/faiss_store.db"
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -50,7 +50,7 @@ providers:
persistence_store:
namespace: null
type: sqlite
db_path: /home/ashwin/.llama/distributions/remote-vllm/agents_store.db
db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/agents_store.db"
telemetry:
- provider_id: meta0
provider_type: inline::meta-reference
@ -58,11 +58,11 @@ providers:
metadata_store:
namespace: null
type: sqlite
db_path: /home/ashwin/.llama/distributions/remote-vllm/registry.db
db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/registry.db"
models:
- model_id: Llama3.1-8B-Instruct
- model_id: ${env.LLAMA_INFERENCE_MODEL:Llama3.1-8B-Instruct}
provider_id: vllm-0
- model_id: Llama-Guard-3-1B
- model_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}
provider_id: vllm-1
shields:
- shield_id: Llama-Guard-3-1B
- shield_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}