From b1d119466e02b8cfe7ffc2181814bf20d5a2be4e Mon Sep 17 00:00:00 2001
From: Ashwin Bharambe <ashwin.bharambe@gmail.com>
Date: Sun, 17 Nov 2024 19:33:48 -0800
Subject: [PATCH] Allow setting environment variables from llama stack run and
 fix ollama

---
 distributions/ollama/run-with-safety.yaml     | 12 ++++-----
 distributions/ollama/run.yaml                 | 12 ++++-----
 .../remote-vllm/run-with-safety.yaml          |  8 +++---
 distributions/remote-vllm/run.yaml            |  8 +++---
 distributions/tgi/run-with-safety.yaml        |  8 +++---
 distributions/tgi/run.yaml                    |  8 +++---
 .../self_hosted_distro/ollama.md              |  2 +-
 llama_stack/cli/stack/build.py                | 20 +++++++++-----
 llama_stack/cli/stack/run.py                  | 19 +++++++++++++
 llama_stack/distribution/build_container.sh   |  2 ++
 llama_stack/distribution/start_conda_env.sh   | 25 ++++++++++++++++-
 llama_stack/distribution/start_container.sh   | 27 +++++++++++++++++--
 .../remote/inference/ollama/config.py         | 12 ++++-----
 .../remote/inference/ollama/ollama.py         |  2 +-
 .../providers/remote/inference/tgi/config.py  |  2 +-
 .../providers/remote/inference/vllm/config.py |  1 +
 llama_stack/templates/ollama/build.yaml       |  2 +-
 llama_stack/templates/ollama/ollama.py        | 12 ++++-----
 llama_stack/templates/template.py             |  2 +-
 19 files changed, 129 insertions(+), 55 deletions(-)

diff --git a/distributions/ollama/run-with-safety.yaml b/distributions/ollama/run-with-safety.yaml
index 4582ab800..32eb30eea 100644
--- a/distributions/ollama/run-with-safety.yaml
+++ b/distributions/ollama/run-with-safety.yaml
@@ -1,20 +1,20 @@
 version: '2'
-built_at: 2024-11-17 15:19:07.405618
+built_at: 2024-11-17 19:33:00
 image_name: ollama
-docker_image: llamastack/distribution-ollama:test-0.0.52rc3
+docker_image: null
 conda_env: null
 apis:
-- telemetry
-- agents
 - memory
-- inference
+- agents
 - safety
+- inference
+- telemetry
 providers:
   inference:
   - provider_id: ollama
     provider_type: remote::ollama
     config:
-      port: ${env.OLLAMA_PORT}
+      url: ${env.OLLAMA_URL:http://localhost:11434}
   memory:
   - provider_id: faiss
     provider_type: inline::faiss
diff --git a/distributions/ollama/run.yaml b/distributions/ollama/run.yaml
index b6d411614..110b63b0c 100644
--- a/distributions/ollama/run.yaml
+++ b/distributions/ollama/run.yaml
@@ -1,20 +1,20 @@
 version: '2'
-built_at: 2024-11-17 15:19:07.395495
+built_at: 2024-11-17 19:33:00
 image_name: ollama
-docker_image: llamastack/distribution-ollama:test-0.0.52rc3
+docker_image: null
 conda_env: null
 apis:
-- telemetry
-- agents
 - memory
-- inference
+- agents
 - safety
+- inference
+- telemetry
 providers:
   inference:
   - provider_id: ollama
     provider_type: remote::ollama
     config:
-      port: ${env.OLLAMA_PORT}
+      url: ${env.OLLAMA_URL:http://localhost:11434}
   memory:
   - provider_id: faiss
     provider_type: inline::faiss
diff --git a/distributions/remote-vllm/run-with-safety.yaml b/distributions/remote-vllm/run-with-safety.yaml
index 43eb955d7..4f4cce415 100644
--- a/distributions/remote-vllm/run-with-safety.yaml
+++ b/distributions/remote-vllm/run-with-safety.yaml
@@ -1,14 +1,14 @@
 version: '2'
-built_at: 2024-11-17 15:19:07.405727
+built_at: 2024-11-17 19:33:00
 image_name: remote-vllm
 docker_image: llamastack/distribution-remote-vllm:test-0.0.52rc3
 conda_env: null
 apis:
-- telemetry
-- agents
 - memory
-- inference
+- agents
 - safety
+- inference
+- telemetry
 providers:
   inference:
   - provider_id: vllm-inference
diff --git a/distributions/remote-vllm/run.yaml b/distributions/remote-vllm/run.yaml
index 4552e6571..4617a48d2 100644
--- a/distributions/remote-vllm/run.yaml
+++ b/distributions/remote-vllm/run.yaml
@@ -1,14 +1,14 @@
 version: '2'
-built_at: 2024-11-17 15:19:07.395327
+built_at: 2024-11-17 19:33:00
 image_name: remote-vllm
 docker_image: llamastack/distribution-remote-vllm:test-0.0.52rc3
 conda_env: null
 apis:
-- telemetry
-- agents
 - memory
-- inference
+- agents
 - safety
+- inference
+- telemetry
 providers:
   inference:
   - provider_id: vllm-inference
diff --git a/distributions/tgi/run-with-safety.yaml b/distributions/tgi/run-with-safety.yaml
index d8a4619f6..35828a37a 100644
--- a/distributions/tgi/run-with-safety.yaml
+++ b/distributions/tgi/run-with-safety.yaml
@@ -1,14 +1,14 @@
 version: '2'
-built_at: 2024-11-17 15:19:09.184709
+built_at: 2024-11-17 19:33:00
 image_name: tgi
 docker_image: llamastack/distribution-tgi:test-0.0.52rc3
 conda_env: null
 apis:
-- telemetry
-- agents
 - memory
-- inference
+- agents
 - safety
+- inference
+- telemetry
 providers:
   inference:
   - provider_id: tgi-inference
diff --git a/distributions/tgi/run.yaml b/distributions/tgi/run.yaml
index 1d01c8ea8..fea2f5da8 100644
--- a/distributions/tgi/run.yaml
+++ b/distributions/tgi/run.yaml
@@ -1,14 +1,14 @@
 version: '2'
-built_at: 2024-11-17 15:19:09.156305
+built_at: 2024-11-17 19:33:00
 image_name: tgi
 docker_image: llamastack/distribution-tgi:test-0.0.52rc3
 conda_env: null
 apis:
-- telemetry
-- agents
 - memory
-- inference
+- agents
 - safety
+- inference
+- telemetry
 providers:
   inference:
   - provider_id: tgi-inference
diff --git a/docs/source/getting_started/distributions/self_hosted_distro/ollama.md b/docs/source/getting_started/distributions/self_hosted_distro/ollama.md
index 37c5851ab..9f3757301 100644
--- a/docs/source/getting_started/distributions/self_hosted_distro/ollama.md
+++ b/docs/source/getting_started/distributions/self_hosted_distro/ollama.md
@@ -20,7 +20,7 @@ The following environment variables can be configured:
 
 - `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`)
 - `INFERENCE_MODEL`: Inference model loaded into the TGI server (default: `meta-llama/Llama-3.2-3B-Instruct`)
-- `OLLAMA_PORT`: Port of the Ollama server (default: `14343`)
+- `OLLAMA_URL`: URL of the Ollama server (default: `http://host.docker.internal:11434`)
 - `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`)
 ### Models
 
diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py
index 94d41cfab..f7631bca3 100644
--- a/llama_stack/cli/stack/build.py
+++ b/llama_stack/cli/stack/build.py
@@ -217,15 +217,23 @@ class StackBuild(Subcommand):
                 provider_types = [provider_types]
 
             for i, provider_type in enumerate(provider_types):
-                p_spec = Provider(
-                    provider_id=f"{provider_type}-{i}",
-                    provider_type=provider_type,
-                    config={},
-                )
+                pid = provider_type.split("::")[-1]
+
                 config_type = instantiate_class_type(
                     provider_registry[Api(api)][provider_type].config_class
                 )
-                p_spec.config = config_type()
+                if hasattr(config_type, "sample_run_config"):
+                    config = config_type.sample_run_config(
+                        __distro_dir__=f"distributions/{build_config.name}"
+                    )
+                else:
+                    config = {}
+
+                p_spec = Provider(
+                    provider_id=f"{pid}-{i}" if len(provider_types) > 1 else pid,
+                    provider_type=provider_type,
+                    config=config,
+                )
                 run_config.providers[api].append(p_spec)
 
         os.makedirs(build_dir, exist_ok=True)
diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py
index 842703d4c..961d02d5b 100644
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@@ -39,6 +39,13 @@ class StackRun(Subcommand):
             help="Disable IPv6 support",
             default=False,
         )
+        self.parser.add_argument(
+            "--env",
+            action="append",
+            help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.",
+            default=[],
+            metavar="KEY=VALUE",
+        )
 
     def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
         from pathlib import Path
@@ -97,4 +104,16 @@ class StackRun(Subcommand):
         if args.disable_ipv6:
             run_args.append("--disable-ipv6")
 
+        for env_var in args.env:
+            if "=" not in env_var:
+                self.parser.error(
+                    f"Environment variable '{env_var}' must be in KEY=VALUE format"
+                )
+                return
+            key, value = env_var.split("=", 1)  # split on first = only
+            if not key:
+                self.parser.error(f"Environment variable '{env_var}' has empty key")
+                return
+            run_args.extend(["--env", f"{key}={value}"])
+
         run_with_pty(run_args)
diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh
index 0764fee62..139883618 100755
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@@ -146,6 +146,8 @@ fi
 # Set version tag based on PyPI version
 if [ -n "$TEST_PYPI_VERSION" ]; then
   version_tag="test-$TEST_PYPI_VERSION"
+elif [[ -n "$LLAMA_STACK_DIR" || -n "$LLAMA_MODELS_DIR" ]]; then
+  version_tag="dev"
 else
   URL="https://pypi.org/pypi/llama-stack/json"
   version_tag=$(curl -s $URL | jq -r '.info.version')
diff --git a/llama_stack/distribution/start_conda_env.sh b/llama_stack/distribution/start_conda_env.sh
index 3d91564b8..56e921d13 100755
--- a/llama_stack/distribution/start_conda_env.sh
+++ b/llama_stack/distribution/start_conda_env.sh
@@ -33,10 +33,33 @@ shift
 port="$1"
 shift
 
+# Process environment variables from --env arguments
+env_vars=""
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --env)
+
+            if [[ -n "$2" ]]; then
+                # collect environment variables so we can set them after activating the conda env
+                env_vars="$env_vars $2"
+                shift 2
+            else
+                echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2
+                exit 1
+            fi
+            ;;
+        *)
+            shift
+            ;;
+    esac
+done
+
 eval "$(conda shell.bash hook)"
 conda deactivate && conda activate "$env_name"
 
-$CONDA_PREFIX/bin/python \
+set -x
+$env_vars \
+  $CONDA_PREFIX/bin/python \
   -m llama_stack.distribution.server.server \
   --yaml_config "$yaml_config" \
   --port "$port" "$@"
diff --git a/llama_stack/distribution/start_container.sh b/llama_stack/distribution/start_container.sh
index 1efb76fb9..c56606826 100755
--- a/llama_stack/distribution/start_container.sh
+++ b/llama_stack/distribution/start_container.sh
@@ -31,7 +31,7 @@ if [ $# -lt 3 ]; then
 fi
 
 build_name="$1"
-docker_image="distribution-$build_name"
+docker_image="localhost/distribution-$build_name"
 shift
 
 yaml_config="$1"
@@ -40,6 +40,26 @@ shift
 port="$1"
 shift
 
+# Process environment variables from --env arguments
+env_vars=""
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --env)
+            echo "env = $2"
+            if [[ -n "$2" ]]; then
+                env_vars="$env_vars -e $2"
+                shift 2
+            else
+                echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2
+                exit 1
+            fi
+            ;;
+        *)
+            shift
+            ;;
+    esac
+done
+
 set -x
 
 if command -v selinuxenabled &> /dev/null && selinuxenabled; then
@@ -59,15 +79,18 @@ fi
 version_tag="latest"
 if [ -n "$PYPI_VERSION" ]; then
   version_tag="$PYPI_VERSION"
+elif [ -n "$LLAMA_STACK_DIR" ]; then
+  version_tag="dev"
 elif [ -n "$TEST_PYPI_VERSION" ]; then
   version_tag="test-$TEST_PYPI_VERSION"
 fi
 
 $DOCKER_BINARY run $DOCKER_OPTS -it \
   -p $port:$port \
+  $env_vars \
   -v "$yaml_config:/app/config.yaml" \
   $mounts \
   $docker_image:$version_tag \
   python -m llama_stack.distribution.server.server \
   --yaml_config /app/config.yaml \
-  --port $port "$@"
+  --port "$port"
diff --git a/llama_stack/providers/remote/inference/ollama/config.py b/llama_stack/providers/remote/inference/ollama/config.py
index 09900ecf2..ad16cac62 100644
--- a/llama_stack/providers/remote/inference/ollama/config.py
+++ b/llama_stack/providers/remote/inference/ollama/config.py
@@ -6,17 +6,17 @@
 
 from typing import Any, Dict
 
-from llama_stack.distribution.datatypes import RemoteProviderConfig
+from pydantic import BaseModel
 
 
-DEFAULT_OLLAMA_PORT = 11434
+DEFAULT_OLLAMA_URL = "http://localhost:11434"
 
 
-class OllamaImplConfig(RemoteProviderConfig):
-    port: int
+class OllamaImplConfig(BaseModel):
+    url: str = DEFAULT_OLLAMA_URL
 
     @classmethod
     def sample_run_config(
-        cls, port_str: str = str(DEFAULT_OLLAMA_PORT)
+        cls, url: str = "${env.OLLAMA_URL:http://localhost:11434}", **kwargs
     ) -> Dict[str, Any]:
-        return {"port": port_str}
+        return {"url": url}
diff --git a/llama_stack/providers/remote/inference/ollama/ollama.py b/llama_stack/providers/remote/inference/ollama/ollama.py
index 3b3f3868b..27bf0088e 100644
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@@ -82,7 +82,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
         return AsyncClient(host=self.url)
 
     async def initialize(self) -> None:
-        print("Initializing Ollama, checking connectivity to server...")
+        print(f"checking connectivity to Ollama at `{self.url}`...")
         try:
             await self.client.ps()
         except httpx.ConnectError as e:
diff --git a/llama_stack/providers/remote/inference/tgi/config.py b/llama_stack/providers/remote/inference/tgi/config.py
index 4441b1352..55bda4179 100644
--- a/llama_stack/providers/remote/inference/tgi/config.py
+++ b/llama_stack/providers/remote/inference/tgi/config.py
@@ -21,7 +21,7 @@ class TGIImplConfig(BaseModel):
     )
 
     @classmethod
-    def sample_run_config(cls, url: str = "${env.TGI_URL}"):
+    def sample_run_config(cls, url: str = "${env.TGI_URL}", **kwargs):
         return {
             "url": url,
         }
diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py
index e1d932c87..a3a4c6930 100644
--- a/llama_stack/providers/remote/inference/vllm/config.py
+++ b/llama_stack/providers/remote/inference/vllm/config.py
@@ -29,6 +29,7 @@ class VLLMInferenceAdapterConfig(BaseModel):
     def sample_run_config(
         cls,
         url: str = "${env.VLLM_URL}",
+        **kwargs,
     ):
         return {
             "url": url,
diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml
index 45ab2a6e5..106449309 100644
--- a/llama_stack/templates/ollama/build.yaml
+++ b/llama_stack/templates/ollama/build.yaml
@@ -2,7 +2,7 @@ version: '2'
 name: ollama
 distribution_spec:
   description: Use (an external) Ollama server for running LLM inference
-  docker_image: llamastack/distribution-ollama:test-0.0.52rc3
+  docker_image: null
   providers:
     inference:
     - remote::ollama
diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py
index 0c45f8dc1..deb254c80 100644
--- a/llama_stack/templates/ollama/ollama.py
+++ b/llama_stack/templates/ollama/ollama.py
@@ -23,9 +23,7 @@ def get_distribution_template() -> DistributionTemplate:
     inference_provider = Provider(
         provider_id="ollama",
         provider_type="remote::ollama",
-        config=OllamaImplConfig.sample_run_config(
-            port_str="${env.OLLAMA_PORT}",
-        ),
+        config=OllamaImplConfig.sample_run_config(),
     )
 
     inference_model = ModelInput(
@@ -41,7 +39,7 @@ def get_distribution_template() -> DistributionTemplate:
         name="ollama",
         distro_type="self_hosted",
         description="Use (an external) Ollama server for running LLM inference",
-        docker_image="llamastack/distribution-ollama:test-0.0.52rc3",
+        docker_image=None,
         template_path=Path(__file__).parent / "doc_template.md",
         providers=providers,
         default_models=[inference_model, safety_model],
@@ -74,9 +72,9 @@ def get_distribution_template() -> DistributionTemplate:
                 "meta-llama/Llama-3.2-3B-Instruct",
                 "Inference model loaded into the TGI server",
             ),
-            "OLLAMA_PORT": (
-                "14343",
-                "Port of the Ollama server",
+            "OLLAMA_URL": (
+                "http://host.docker.internal:11434",
+                "URL of the Ollama server",
             ),
             "SAFETY_MODEL": (
                 "meta-llama/Llama-Guard-3-1B",
diff --git a/llama_stack/templates/template.py b/llama_stack/templates/template.py
index 2074f19c3..feddadb9a 100644
--- a/llama_stack/templates/template.py
+++ b/llama_stack/templates/template.py
@@ -87,7 +87,7 @@ class RunConfigSettings(BaseModel):
         return StackRunConfig(
             image_name=name,
             docker_image=docker_image,
-            built_at=datetime.now(),
+            built_at=datetime.now().strftime("%Y-%m-%d %H:%M"),
             apis=list(apis),
             providers=provider_configs,
             metadata_store=SqliteKVStoreConfig.sample_run_config(