diff --git a/distributions/remote-vllm/compose.yaml b/distributions/remote-vllm/compose.yaml index 27d7de4e2..90d58a2af 100644 --- a/distributions/remote-vllm/compose.yaml +++ b/distributions/remote-vllm/compose.yaml @@ -71,6 +71,13 @@ services: - ~/.llama:/root/.llama - ~/local/llama-stack/distributions/remote-vllm/run.yaml:/root/llamastack-run-remote-vllm.yaml # network_mode: "host" + environment: + - LLAMA_INFERENCE_VLLM_URL=${LLAMA_INFERENCE_VLLM_URL:-http://host.docker.internal:5100/v1} + - LLAMA_INFERENCE_MODEL=${LLAMA_INFERENCE_MODEL:-Llama3.1-8B-Instruct} + - MAX_TOKENS=${MAX_TOKENS:-4096} + - SQLITE_STORE_DIR=${SQLITE_STORE_DIR:-$HOME/.llama/distributions/remote-vllm} + - LLAMA_SAFETY_VLLM_URL=${LLAMA_SAFETY_VLLM_URL:-http://host.docker.internal:5101/v1} + - LLAMA_SAFETY_MODEL=${LLAMA_SAFETY_MODEL:-Llama-Guard-3-1B} ports: - "5001:5001" # Hack: wait for vLLM server to start before starting docker diff --git a/distributions/remote-vllm/run.yaml b/distributions/remote-vllm/run.yaml index af02b1ba5..eae5b8a6f 100644 --- a/distributions/remote-vllm/run.yaml +++ b/distributions/remote-vllm/run.yaml @@ -16,16 +16,16 @@ providers: provider_type: remote::vllm config: # NOTE: replace with "localhost" if you are running in "host" network mode - url: http://host.docker.internal:5100/v1 - max_tokens: 4096 + url: ${env.LLAMA_INFERENCE_VLLM_URL:http://host.docker.internal:5100/v1} + max_tokens: ${env.MAX_TOKENS:4096} api_token: fake # serves safety llama_guard model - provider_id: vllm-1 provider_type: remote::vllm config: # NOTE: replace with "localhost" if you are running in "host" network mode - url: http://host.docker.internal:5101/v1 - max_tokens: 4096 + url: ${env.LLAMA_SAFETY_VLLM_URL:http://host.docker.internal:5101/v1} + max_tokens: ${env.MAX_TOKENS:4096} api_token: fake memory: - provider_id: faiss-0 @@ -34,7 +34,7 @@ providers: kvstore: namespace: null type: sqlite - db_path: /home/ashwin/.llama/distributions/remote-vllm/faiss_store.db + db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/faiss_store.db" safety: - provider_id: llama-guard provider_type: inline::llama-guard @@ -50,7 +50,7 @@ providers: persistence_store: namespace: null type: sqlite - db_path: /home/ashwin/.llama/distributions/remote-vllm/agents_store.db + db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/agents_store.db" telemetry: - provider_id: meta0 provider_type: inline::meta-reference @@ -58,11 +58,11 @@ providers: metadata_store: namespace: null type: sqlite - db_path: /home/ashwin/.llama/distributions/remote-vllm/registry.db + db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/registry.db" models: - - model_id: Llama3.1-8B-Instruct + - model_id: ${env.LLAMA_INFERENCE_MODEL:Llama3.1-8B-Instruct} provider_id: vllm-0 - - model_id: Llama-Guard-3-1B + - model_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B} provider_id: vllm-1 shields: - - shield_id: Llama-Guard-3-1B + - shield_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B} diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 741f7cf97..ab0df2ff5 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -295,11 +295,17 @@ def replace_env_vars(config: Any, path: str = "") -> Any: env_var = match.group(1) default_val = match.group(2) - if env_var not in os.environ: - if default_val is None: + if default_val is None: + if env_var not in os.environ: raise EnvVarError(env_var, path) - return default_val - return os.environ[env_var] + value = os.environ[env_var] + else: + # use the default if env var is "nullish" + value = os.environ.get(env_var) + if not value: + value = default_val + + return value try: return re.sub(pattern, get_env_var, config)