From fdafbd6ec2b2f070b531f5fbc71a3d566c23dc2f Mon Sep 17 00:00:00 2001
From: Eric Huang <erichuang@meta.com>
Date: Mon, 6 Oct 2025 14:30:20 -0700
Subject: [PATCH] chore: remove --env from `llama stack run`

# What does this PR do?


## Test Plan
---
 docs/docs/building_applications/tools.mdx     |  9 ++--
 docs/docs/contributing/new_api_provider.mdx   |  2 +-
 docs/docs/distributions/building_distro.mdx   | 17 ++++---
 docs/docs/distributions/configuration.mdx     |  9 ++--
 .../remote_hosted_distro/watsonx.md           |  8 ++--
 .../distributions/self_hosted_distro/dell.md  | 44 +++++++++----------
 .../self_hosted_distro/meta-reference-gpu.md  | 20 ++++-----
 .../self_hosted_distro/nvidia.md              | 10 ++---
 .../getting_started/detailed_tutorial.mdx     |  8 ++--
 docs/getting_started_llama4.ipynb             |  2 +-
 docs/zero_to_hero_guide/README.md             |  8 ++--
 llama_stack/cli/stack/run.py                  | 30 +------------
 llama_stack/core/stack.py                     | 16 -------
 llama_stack/core/start_stack.sh               | 13 +-----
 .../distributions/dell/doc_template.md        | 42 +++++++++---------
 .../meta-reference-gpu/doc_template.md        | 20 ++++-----
 .../distributions/nvidia/doc_template.md      | 10 ++---
 scripts/install.sh                            |  4 +-
 18 files changed, 105 insertions(+), 167 deletions(-)
diff --git a/docs/docs/building_applications/tools.mdx b/docs/docs/building_applications/tools.mdx
index e5d9c46f9..3b78ec57b 100644
--- a/docs/docs/building_applications/tools.mdx
+++ b/docs/docs/building_applications/tools.mdx
@@ -219,13 +219,10 @@ group_tools = client.tools.list_tools(toolgroup_id="search_tools")
 <TabItem value="setup" label="Setup & Configuration">
 
 1. Start by registering a Tavily API key at [Tavily](https://tavily.com/).
-2. [Optional] Provide the API key directly to the Llama Stack server
+2. [Optional] Set the API key in your environment before starting the Llama Stack server
 ```bash
 export TAVILY_SEARCH_API_KEY="your key"
 ```
-```bash
---env TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY}
-```
 
 </TabItem>
 <TabItem value="implementation" label="Implementation">
@@ -273,9 +270,9 @@ for log in EventLogger().log(response):
 <TabItem value="setup" label="Setup & Configuration">
 
 1. Start by registering for a WolframAlpha API key at [WolframAlpha Developer Portal](https://developer.wolframalpha.com/access).
-2. Provide the API key either when starting the Llama Stack server:
+2. Provide the API key either by setting it in your environment before starting the Llama Stack server:
     ```bash
-    --env WOLFRAM_ALPHA_API_KEY=${WOLFRAM_ALPHA_API_KEY}
+    export WOLFRAM_ALPHA_API_KEY="your key"
     ```
     or from the client side:
     ```python
diff --git a/docs/docs/contributing/new_api_provider.mdx b/docs/docs/contributing/new_api_provider.mdx
index 4ae6d5e72..6f9744771 100644
--- a/docs/docs/contributing/new_api_provider.mdx
+++ b/docs/docs/contributing/new_api_provider.mdx
@@ -76,7 +76,7 @@ Integration tests are located in [tests/integration](https://github.com/meta-lla
 Consult [tests/integration/README.md](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more details on how to run the tests.
 
 Note that each provider's `sample_run_config()` method (in the configuration class for that provider)
- typically references some environment variables for specifying API keys and the like. You can set these in the environment or pass these via the `--env` flag to the test command.
+ typically references some environment variables for specifying API keys and the like. You can set these in the environment before running the test command.
 
 
 ### 2. Unit Testing
diff --git a/docs/docs/distributions/building_distro.mdx b/docs/docs/distributions/building_distro.mdx
index 5b65b7f16..5ffb623b5 100644
--- a/docs/docs/distributions/building_distro.mdx
+++ b/docs/docs/distributions/building_distro.mdx
@@ -289,10 +289,10 @@ After this step is successful, you should be able to find the built container im
 docker run -d \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
+  -e INFERENCE_MODEL=$INFERENCE_MODEL \
+  -e OLLAMA_URL=http://host.docker.internal:11434 \
   localhost/distribution-ollama:dev \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env OLLAMA_URL=http://host.docker.internal:11434
+  --port $LLAMA_STACK_PORT
 ```
 
 Here are the docker flags and their uses:
@@ -305,12 +305,12 @@ Here are the docker flags and their uses:
 
 * `localhost/distribution-ollama:dev`: The name and tag of the container image to run
 
+* `-e INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the INFERENCE_MODEL environment variable in the container
+
+* `-e OLLAMA_URL=http://host.docker.internal:11434`: Sets the OLLAMA_URL environment variable in the container
+
 * `--port $LLAMA_STACK_PORT`: Port number for the server to listen on
 
-* `--env INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the model to use for inference
-
-* `--env OLLAMA_URL=http://host.docker.internal:11434`: Configures the URL for the Ollama service
-
 </TabItem>
 </Tabs>
 
@@ -320,7 +320,7 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con
 
 ```
 llama stack run -h
-usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE]
+usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME]
                        [--image-type {venv}] [--enable-ui]
                        [config | template]
 
@@ -334,7 +334,6 @@ options:
   --port PORT           Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
   --image-name IMAGE_NAME
                         Name of the image to run. Defaults to the current environment (default: None)
-  --env KEY=VALUE       Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: None)
   --image-type {venv}
                         Image Type used during the build. This should be venv. (default: None)
   --enable-ui           Start the UI server (default: False)
diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx
index dbf879024..81243c97b 100644
--- a/docs/docs/distributions/configuration.mdx
+++ b/docs/docs/distributions/configuration.mdx
@@ -101,7 +101,7 @@ A few things to note:
 - The id is a string you can choose freely.
 - You can instantiate any number of provider instances of the same type.
 - The configuration dictionary is provider-specific.
-- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value.
+- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server, you can set environment variables in your shell before running `llama stack run` to override the default values.
 
 ### Environment Variable Substitution
 
@@ -173,13 +173,10 @@ optional_token: ${env.OPTIONAL_TOKEN:+}
 
 #### Runtime Override
 
-You can override environment variables at runtime when starting the server:
+You can override environment variables at runtime by setting them in your shell before starting the server:
 
 ```bash
-# Override specific environment variables
-llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com
-
-# Or set them in your shell
+# Set environment variables in your shell
 export API_KEY=sk-123
 export BASE_URL=https://custom-api.com
 llama stack run --config run.yaml
diff --git a/docs/docs/distributions/remote_hosted_distro/watsonx.md b/docs/docs/distributions/remote_hosted_distro/watsonx.md
index 977af90dd..5add678f3 100644
--- a/docs/docs/distributions/remote_hosted_distro/watsonx.md
+++ b/docs/docs/distributions/remote_hosted_distro/watsonx.md
@@ -69,10 +69,10 @@ docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ./run.yaml:/root/my-run.yaml \
+  -e WATSONX_API_KEY=$WATSONX_API_KEY \
+  -e WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
+  -e WATSONX_BASE_URL=$WATSONX_BASE_URL \
   llamastack/distribution-watsonx \
   --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env WATSONX_API_KEY=$WATSONX_API_KEY \
-  --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \
-  --env WATSONX_BASE_URL=$WATSONX_BASE_URL
+  --port $LLAMA_STACK_PORT
 ```
diff --git a/docs/docs/distributions/self_hosted_distro/dell.md b/docs/docs/distributions/self_hosted_distro/dell.md
index 52d40cf9d..851eac3bf 100644
--- a/docs/docs/distributions/self_hosted_distro/dell.md
+++ b/docs/docs/distributions/self_hosted_distro/dell.md
@@ -129,11 +129,11 @@ docker run -it \
   # NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed
   -v $HOME/git/llama-stack:/app/llama-stack-source -v $HOME/git/llama-models:/app/llama-models-source \
   # localhost/distribution-dell:dev if building / testing locally
-  llamastack/distribution-dell\
-  --port $LLAMA_STACK_PORT  \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env CHROMA_URL=$CHROMA_URL
+  -e INFERENCE_MODEL=$INFERENCE_MODEL \
+  -e DEH_URL=$DEH_URL \
+  -e CHROMA_URL=$CHROMA_URL \
+  llamastack/distribution-dell \
+  --port $LLAMA_STACK_PORT
 
 ```
 
@@ -154,14 +154,14 @@ docker run \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v $HOME/.llama:/root/.llama \
   -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
+  -e INFERENCE_MODEL=$INFERENCE_MODEL \
+  -e DEH_URL=$DEH_URL \
+  -e SAFETY_MODEL=$SAFETY_MODEL \
+  -e DEH_SAFETY_URL=$DEH_SAFETY_URL \
+  -e CHROMA_URL=$CHROMA_URL \
   llamastack/distribution-dell \
   --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
-  --env CHROMA_URL=$CHROMA_URL
+  --port $LLAMA_STACK_PORT
 ```
 
 ### Via venv
@@ -170,21 +170,21 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
 
 ```bash
 llama stack build --distro dell --image-type venv
-llama stack run dell
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env CHROMA_URL=$CHROMA_URL
+INFERENCE_MODEL=$INFERENCE_MODEL \
+DEH_URL=$DEH_URL \
+CHROMA_URL=$CHROMA_URL \
+llama stack run dell \
+  --port $LLAMA_STACK_PORT
 ```
 
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+INFERENCE_MODEL=$INFERENCE_MODEL \
+DEH_URL=$DEH_URL \
+SAFETY_MODEL=$SAFETY_MODEL \
+DEH_SAFETY_URL=$DEH_SAFETY_URL \
+CHROMA_URL=$CHROMA_URL \
 llama stack run ./run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
-  --env CHROMA_URL=$CHROMA_URL
+  --port $LLAMA_STACK_PORT
 ```
diff --git a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
index 84b85b91c..1c0ef5f6e 100644
--- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
+++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md
@@ -84,9 +84,9 @@ docker run \
   --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
+  -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
   llamastack/distribution-meta-reference-gpu \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
+  --port $LLAMA_STACK_PORT
 ```
 
 If you are using Llama Stack Safety / Shield APIs, use:
@@ -98,10 +98,10 @@ docker run \
   --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
+  -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
+  -e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
   llamastack/distribution-meta-reference-gpu \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+  --port $LLAMA_STACK_PORT
 ```
 
 ### Via venv
@@ -110,16 +110,16 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
 
 ```bash
 llama stack build --distro meta-reference-gpu --image-type venv
+INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
 llama stack run distributions/meta-reference-gpu/run.yaml \
-  --port 8321 \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
+  --port 8321
 ```
 
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
+SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
 llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \
-  --port 8321 \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+  --port 8321
 ```
diff --git a/docs/docs/distributions/self_hosted_distro/nvidia.md b/docs/docs/distributions/self_hosted_distro/nvidia.md
index 1e52797db..a6e185442 100644
--- a/docs/docs/distributions/self_hosted_distro/nvidia.md
+++ b/docs/docs/distributions/self_hosted_distro/nvidia.md
@@ -129,10 +129,10 @@ docker run \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ./run.yaml:/root/my-run.yaml \
+  -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
   llamastack/distribution-nvidia \
   --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
+  --port $LLAMA_STACK_PORT
 ```
 
 ### Via venv
@@ -142,10 +142,10 @@ If you've set up your local development environment, you can also build the imag
 ```bash
 INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
 llama stack build --distro nvidia --image-type venv
+NVIDIA_API_KEY=$NVIDIA_API_KEY \
+INFERENCE_MODEL=$INFERENCE_MODEL \
 llama stack run ./run.yaml \
-  --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL
+  --port 8321
 ```
 
 ## Example Notebooks
diff --git a/docs/docs/getting_started/detailed_tutorial.mdx b/docs/docs/getting_started/detailed_tutorial.mdx
index 33786ac0e..e6c22224d 100644
--- a/docs/docs/getting_started/detailed_tutorial.mdx
+++ b/docs/docs/getting_started/detailed_tutorial.mdx
@@ -86,9 +86,9 @@ docker run -it \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
+  -e OLLAMA_URL=http://host.docker.internal:11434 \
   llamastack/distribution-starter \
-  --port $LLAMA_STACK_PORT \
-  --env OLLAMA_URL=http://host.docker.internal:11434
+  --port $LLAMA_STACK_PORT
 ```
 Note to start the container with Podman, you can do the same but replace `docker` at the start of the command with
 `podman`. If you are using `podman` older than `4.7.0`, please also replace `host.docker.internal` in the `OLLAMA_URL`
@@ -106,9 +106,9 @@ docker run -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
   --network=host \
+  -e OLLAMA_URL=http://localhost:11434 \
   llamastack/distribution-starter \
-  --port $LLAMA_STACK_PORT \
-  --env OLLAMA_URL=http://localhost:11434
+  --port $LLAMA_STACK_PORT
 ```
 :::
 You will see output like below:
diff --git a/docs/getting_started_llama4.ipynb b/docs/getting_started_llama4.ipynb
index cd5f83517..b840117f1 100644
--- a/docs/getting_started_llama4.ipynb
+++ b/docs/getting_started_llama4.ipynb
@@ -238,7 +238,7 @@
         "def run_llama_stack_server_background():\n",
         "    log_file = open(\"llama_stack_server.log\", \"w\")\n",
         "    process = subprocess.Popen(\n",
-        "        f\"uv run --with llama-stack llama stack run meta-reference-gpu --image-type venv --env INFERENCE_MODEL={model_id}\",\n",
+        "        f\"INFERENCE_MODEL={model_id} uv run --with llama-stack llama stack run meta-reference-gpu --image-type venv\",\n",
         "        shell=True,\n",
         "        stdout=log_file,\n",
         "        stderr=log_file,\n",
diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md
index 183038a88..a899d3ebe 100644
--- a/docs/zero_to_hero_guide/README.md
+++ b/docs/zero_to_hero_guide/README.md
@@ -102,12 +102,12 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next
 3. **Run the Llama Stack**:
    Run the stack using uv:
    ```bash
+   INFERENCE_MODEL=$INFERENCE_MODEL \
+   SAFETY_MODEL=$SAFETY_MODEL \
+   OLLAMA_URL=$OLLAMA_URL \
    uv run --with llama-stack llama stack run starter \
       --image-type venv \
-      --port $LLAMA_STACK_PORT \
-      --env INFERENCE_MODEL=$INFERENCE_MODEL \
-      --env SAFETY_MODEL=$SAFETY_MODEL \
-      --env OLLAMA_URL=$OLLAMA_URL
+      --port $LLAMA_STACK_PORT
    ```
    Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model.
 
diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py
index cec101083..677f5e5fa 100644
--- a/llama_stack/cli/stack/run.py
+++ b/llama_stack/cli/stack/run.py
@@ -16,7 +16,7 @@ import yaml
 from llama_stack.cli.stack.utils import ImageType
 from llama_stack.cli.subcommand import Subcommand
 from llama_stack.core.datatypes import LoggingConfig, StackRunConfig
-from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars, validate_env_pair
+from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
 from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
 from llama_stack.log import get_logger
 
@@ -57,12 +57,6 @@ class StackRun(Subcommand):
             default=None,
             help="Name of the image to run. Defaults to the current environment",
         )
-        self.parser.add_argument(
-            "--env",
-            action="append",
-            help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.",
-            metavar="KEY=VALUE",
-        )
         self.parser.add_argument(
             "--image-type",
             type=str,
@@ -162,34 +156,12 @@ class StackRun(Subcommand):
             if config_file:
                 run_args.extend(["--config", str(config_file)])
 
-            if args.env:
-                for env_var in args.env:
-                    if "=" not in env_var:
-                        self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format")
-                        return
-                    key, value = env_var.split("=", 1)  # split on first = only
-                    if not key:
-                        self.parser.error(f"Environment variable '{env_var}' has empty key")
-                        return
-                    run_args.extend(["--env", f"{key}={value}"])
-
             run_command(run_args)
 
     def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None:
         if not config_file:
             self.parser.error("Config file is required")
 
-        # Set environment variables if provided
-        if args.env:
-            for env_pair in args.env:
-                try:
-                    key, value = validate_env_pair(env_pair)
-                    logger.info(f"Setting environment variable {key} => {value}")
-                    os.environ[key] = value
-                except ValueError as e:
-                    logger.error(f"Error: {str(e)}")
-                    self.parser.error(f"Invalid environment variable format: {env_pair}")
-
         config_file = resolve_config_or_distro(str(config_file), Mode.RUN)
         with open(config_file) as fp:
             config_contents = yaml.safe_load(fp)
diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py
index d5d55319a..acc02eeff 100644
--- a/llama_stack/core/stack.py
+++ b/llama_stack/core/stack.py
@@ -274,22 +274,6 @@ def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
     return config_dict
 
 
-def validate_env_pair(env_pair: str) -> tuple[str, str]:
-    """Validate and split an environment variable key-value pair."""
-    try:
-        key, value = env_pair.split("=", 1)
-        key = key.strip()
-        if not key:
-            raise ValueError(f"Empty key in environment variable pair: {env_pair}")
-        if not all(c.isalnum() or c == "_" for c in key):
-            raise ValueError(f"Key must contain only alphanumeric characters and underscores: {key}")
-        return key, value
-    except ValueError as e:
-        raise ValueError(
-            f"Invalid environment variable format '{env_pair}': {str(e)}. Expected format: KEY=value"
-        ) from e
-
-
 def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConfig) -> None:
     """Add internal implementations (inspect and providers) to the implementations dictionary.
 
diff --git a/llama_stack/core/start_stack.sh b/llama_stack/core/start_stack.sh
index 02b1cd408..cc0ae68d8 100755
--- a/llama_stack/core/start_stack.sh
+++ b/llama_stack/core/start_stack.sh
@@ -25,7 +25,7 @@ error_handler() {
 trap 'error_handler ${LINENO}' ERR
 
 if [ $# -lt 3 ]; then
-  echo "Usage: $0 <env_type> <env_path_or_name> <port> [--config <yaml_config>] [--env KEY=VALUE]..."
+  echo "Usage: $0 <env_type> <env_path_or_name> <port> [--config <yaml_config>]"
   exit 1
 fi
 
@@ -43,7 +43,6 @@ SCRIPT_DIR=$(dirname "$(readlink -f "$0")")
 
 # Initialize variables
 yaml_config=""
-env_vars=""
 other_args=""
 
 # Process remaining arguments
@@ -58,15 +57,6 @@ while [[ $# -gt 0 ]]; do
         exit 1
       fi
       ;;
-    --env)
-      if [[ -n "$2" ]]; then
-        env_vars="$env_vars --env $2"
-        shift 2
-      else
-        echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2
-        exit 1
-      fi
-      ;;
     *)
       other_args="$other_args $1"
       shift
@@ -119,7 +109,6 @@ if [[ "$env_type" == "venv" ]]; then
     llama stack run \
     $yaml_config_arg \
     --port "$port" \
-    $env_vars \
     $other_args
 elif [[ "$env_type" == "container" ]]; then
     echo -e "${RED}Warning: Llama Stack no longer supports running Containers via the 'llama stack run' command.${NC}"
diff --git a/llama_stack/distributions/dell/doc_template.md b/llama_stack/distributions/dell/doc_template.md
index fcec3ea14..852e78d0e 100644
--- a/llama_stack/distributions/dell/doc_template.md
+++ b/llama_stack/distributions/dell/doc_template.md
@@ -117,11 +117,11 @@ docker run -it \
   # NOTE: mount the llama-stack directory if testing local changes else not needed
   -v $HOME/git/llama-stack:/app/llama-stack-source \
   # localhost/distribution-dell:dev if building / testing locally
+  -e INFERENCE_MODEL=$INFERENCE_MODEL \
+  -e DEH_URL=$DEH_URL \
+  -e CHROMA_URL=$CHROMA_URL \
   llamastack/distribution-{{ name }}\
-  --port $LLAMA_STACK_PORT  \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env CHROMA_URL=$CHROMA_URL
+  --port $LLAMA_STACK_PORT
 
 ```
 
@@ -142,14 +142,14 @@ docker run \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v $HOME/.llama:/root/.llama \
   -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \
+  -e INFERENCE_MODEL=$INFERENCE_MODEL \
+  -e DEH_URL=$DEH_URL \
+  -e SAFETY_MODEL=$SAFETY_MODEL \
+  -e DEH_SAFETY_URL=$DEH_SAFETY_URL \
+  -e CHROMA_URL=$CHROMA_URL \
   llamastack/distribution-{{ name }} \
   --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
-  --env CHROMA_URL=$CHROMA_URL
+  --port $LLAMA_STACK_PORT
 ```
 
 ### Via Conda
@@ -158,21 +158,21 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a
 
 ```bash
 llama stack build --distro {{ name }} --image-type conda
-llama stack run {{ name }}
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env CHROMA_URL=$CHROMA_URL
+INFERENCE_MODEL=$INFERENCE_MODEL \
+DEH_URL=$DEH_URL \
+CHROMA_URL=$CHROMA_URL \
+llama stack run {{ name }} \
+  --port $LLAMA_STACK_PORT
 ```
 
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+INFERENCE_MODEL=$INFERENCE_MODEL \
+DEH_URL=$DEH_URL \
+SAFETY_MODEL=$SAFETY_MODEL \
+DEH_SAFETY_URL=$DEH_SAFETY_URL \
+CHROMA_URL=$CHROMA_URL \
 llama stack run ./run-with-safety.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env DEH_URL=$DEH_URL \
-  --env SAFETY_MODEL=$SAFETY_MODEL \
-  --env DEH_SAFETY_URL=$DEH_SAFETY_URL \
-  --env CHROMA_URL=$CHROMA_URL
+  --port $LLAMA_STACK_PORT
 ```
diff --git a/llama_stack/distributions/meta-reference-gpu/doc_template.md b/llama_stack/distributions/meta-reference-gpu/doc_template.md
index 602d053c4..92dcc6102 100644
--- a/llama_stack/distributions/meta-reference-gpu/doc_template.md
+++ b/llama_stack/distributions/meta-reference-gpu/doc_template.md
@@ -72,9 +72,9 @@ docker run \
   --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
+  -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
   llamastack/distribution-{{ name }} \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
+  --port $LLAMA_STACK_PORT
 ```
 
 If you are using Llama Stack Safety / Shield APIs, use:
@@ -86,10 +86,10 @@ docker run \
   --gpu all \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
+  -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
+  -e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
   llamastack/distribution-{{ name }} \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+  --port $LLAMA_STACK_PORT
 ```
 
 ### Via venv
@@ -98,16 +98,16 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL
 
 ```bash
 llama stack build --distro {{ name }} --image-type venv
+INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
 llama stack run distributions/{{ name }}/run.yaml \
-  --port 8321 \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
+  --port 8321
 ```
 
 If you are using Llama Stack Safety / Shield APIs, use:
 
 ```bash
+INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
+SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \
 llama stack run distributions/{{ name }}/run-with-safety.yaml \
-  --port 8321 \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \
-  --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B
+  --port 8321
 ```
diff --git a/llama_stack/distributions/nvidia/doc_template.md b/llama_stack/distributions/nvidia/doc_template.md
index fbee17ef8..df2b68ef7 100644
--- a/llama_stack/distributions/nvidia/doc_template.md
+++ b/llama_stack/distributions/nvidia/doc_template.md
@@ -118,10 +118,10 @@ docker run \
   --pull always \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ./run.yaml:/root/my-run.yaml \
+  -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
   llamastack/distribution-{{ name }} \
   --config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY
+  --port $LLAMA_STACK_PORT
 ```
 
 ### Via venv
@@ -131,10 +131,10 @@ If you've set up your local development environment, you can also build the imag
 ```bash
 INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
 llama stack build --distro nvidia --image-type venv
+NVIDIA_API_KEY=$NVIDIA_API_KEY \
+INFERENCE_MODEL=$INFERENCE_MODEL \
 llama stack run ./run.yaml \
-  --port 8321 \
-  --env NVIDIA_API_KEY=$NVIDIA_API_KEY \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL
+  --port 8321
 ```
 
 ## Example Notebooks
diff --git a/scripts/install.sh b/scripts/install.sh
index f6fbc259c..571468dc5 100755
--- a/scripts/install.sh
+++ b/scripts/install.sh
@@ -221,8 +221,8 @@ fi
 cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
       --network llama-net \
       -p "${PORT}:${PORT}" \
-      "${SERVER_IMAGE}" --port "${PORT}" \
-      --env OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}")
+      -e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
+      "${SERVER_IMAGE}" --port "${PORT}")
 
 log "🦙 Starting Llama Stack..."
 if ! execute_with_log $ENGINE "${cmd[@]}"; then