showcase using env vars

2025-12-16 01:12:37 +00:00 · 2024-11-13 10:55:33 -08:00 · 2024-11-13 10:55:33 -08:00 · 4074fcf83c
commit 4074fcf83c
parent e5ef94ded8
3 changed files with 27 additions and 14 deletions
--- a/distributions/remote-vllm/compose.yaml
+++ b/distributions/remote-vllm/compose.yaml
@ -71,6 +71,13 @@ services:
      - ~/.llama:/root/.llama
      - ~/local/llama-stack/distributions/remote-vllm/run.yaml:/root/llamastack-run-remote-vllm.yaml
    # network_mode: "host"
+    environment:
+      - LLAMA_INFERENCE_VLLM_URL=${LLAMA_INFERENCE_VLLM_URL:-http://host.docker.internal:5100/v1}
+      - LLAMA_INFERENCE_MODEL=${LLAMA_INFERENCE_MODEL:-Llama3.1-8B-Instruct}
+      - MAX_TOKENS=${MAX_TOKENS:-4096}
+      - SQLITE_STORE_DIR=${SQLITE_STORE_DIR:-$HOME/.llama/distributions/remote-vllm}
+      - LLAMA_SAFETY_VLLM_URL=${LLAMA_SAFETY_VLLM_URL:-http://host.docker.internal:5101/v1}
+      - LLAMA_SAFETY_MODEL=${LLAMA_SAFETY_MODEL:-Llama-Guard-3-1B}
    ports:
      - "5001:5001"
    # Hack: wait for vLLM server to start before starting docker
--- a/distributions/remote-vllm/run.yaml
+++ b/distributions/remote-vllm/run.yaml
@ -16,16 +16,16 @@ providers:
    provider_type: remote::vllm
    config:
      # NOTE: replace with "localhost" if you are running in "host" network mode
-      url: http://host.docker.internal:5100/v1
-      max_tokens: 4096
+      url: ${env.LLAMA_INFERENCE_VLLM_URL:http://host.docker.internal:5100/v1}
+      max_tokens: ${env.MAX_TOKENS:4096}
      api_token: fake
  # serves safety llama_guard model
  - provider_id: vllm-1
    provider_type: remote::vllm
    config:
      # NOTE: replace with "localhost" if you are running in "host" network mode
-      url: http://host.docker.internal:5101/v1
-      max_tokens: 4096
+      url: ${env.LLAMA_SAFETY_VLLM_URL:http://host.docker.internal:5101/v1}
+      max_tokens: ${env.MAX_TOKENS:4096}
      api_token: fake
  memory:
  - provider_id: faiss-0
@ -34,7 +34,7 @@ providers:
      kvstore:
        namespace: null
        type: sqlite
-        db_path: /home/ashwin/.llama/distributions/remote-vllm/faiss_store.db
+        db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/faiss_store.db"
  safety:
  - provider_id: llama-guard
    provider_type: inline::llama-guard
@ -50,7 +50,7 @@ providers:
      persistence_store:
        namespace: null
        type: sqlite
-        db_path: /home/ashwin/.llama/distributions/remote-vllm/agents_store.db
+        db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/agents_store.db"
  telemetry:
  - provider_id: meta0
    provider_type: inline::meta-reference
@ -58,11 +58,11 @@ providers:
 metadata_store:
  namespace: null
  type: sqlite
-  db_path: /home/ashwin/.llama/distributions/remote-vllm/registry.db
+  db_path: "${env.SQLITE_STORE_DIR:/home/ashwin/.llama/distributions/remote-vllm}/registry.db"
 models:
-  - model_id: Llama3.1-8B-Instruct
+  - model_id: ${env.LLAMA_INFERENCE_MODEL:Llama3.1-8B-Instruct}
    provider_id: vllm-0
-  - model_id: Llama-Guard-3-1B
+  - model_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}
    provider_id: vllm-1
 shields:
-  - shield_id: Llama-Guard-3-1B
+  - shield_id: ${env.LLAMA_SAFETY_MODEL:Llama-Guard-3-1B}
--- a/llama_stack/distribution/server/server.py
+++ b/llama_stack/distribution/server/server.py
@ -295,11 +295,17 @@ def replace_env_vars(config: Any, path: str = "") -> Any:
            env_var = match.group(1)
            default_val = match.group(2)

-            if env_var not in os.environ:
-                if default_val is None:
+            if default_val is None:
+                if env_var not in os.environ:
                    raise EnvVarError(env_var, path)
-                return default_val
-            return os.environ[env_var]
+                value = os.environ[env_var]
+            else:
+                # use the default if env var is "nullish"
+                value = os.environ.get(env_var)
+                if not value:
+                    value = default_val
+
+            return value

        try:
            return re.sub(pattern, get_env_var, config)