From fdafbd6ec2b2f070b531f5fbc71a3d566c23dc2f Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Mon, 6 Oct 2025 14:30:20 -0700 Subject: [PATCH] chore: remove --env from `llama stack run` # What does this PR do? ## Test Plan --- docs/docs/building_applications/tools.mdx | 9 ++-- docs/docs/contributing/new_api_provider.mdx | 2 +- docs/docs/distributions/building_distro.mdx | 17 ++++--- docs/docs/distributions/configuration.mdx | 9 ++-- .../remote_hosted_distro/watsonx.md | 8 ++-- .../distributions/self_hosted_distro/dell.md | 44 +++++++++---------- .../self_hosted_distro/meta-reference-gpu.md | 20 ++++----- .../self_hosted_distro/nvidia.md | 10 ++--- .../getting_started/detailed_tutorial.mdx | 8 ++-- docs/getting_started_llama4.ipynb | 2 +- docs/zero_to_hero_guide/README.md | 8 ++-- llama_stack/cli/stack/run.py | 30 +------------ llama_stack/core/stack.py | 16 ------- llama_stack/core/start_stack.sh | 13 +----- .../distributions/dell/doc_template.md | 42 +++++++++--------- .../meta-reference-gpu/doc_template.md | 20 ++++----- .../distributions/nvidia/doc_template.md | 10 ++--- scripts/install.sh | 4 +- 18 files changed, 105 insertions(+), 167 deletions(-) diff --git a/docs/docs/building_applications/tools.mdx b/docs/docs/building_applications/tools.mdx index e5d9c46f9..3b78ec57b 100644 --- a/docs/docs/building_applications/tools.mdx +++ b/docs/docs/building_applications/tools.mdx @@ -219,13 +219,10 @@ group_tools = client.tools.list_tools(toolgroup_id="search_tools") 1. Start by registering a Tavily API key at [Tavily](https://tavily.com/). -2. [Optional] Provide the API key directly to the Llama Stack server +2. [Optional] Set the API key in your environment before starting the Llama Stack server ```bash export TAVILY_SEARCH_API_KEY="your key" ``` -```bash ---env TAVILY_SEARCH_API_KEY=${TAVILY_SEARCH_API_KEY} -``` @@ -273,9 +270,9 @@ for log in EventLogger().log(response): 1. Start by registering for a WolframAlpha API key at [WolframAlpha Developer Portal](https://developer.wolframalpha.com/access). -2. Provide the API key either when starting the Llama Stack server: +2. Provide the API key either by setting it in your environment before starting the Llama Stack server: ```bash - --env WOLFRAM_ALPHA_API_KEY=${WOLFRAM_ALPHA_API_KEY} + export WOLFRAM_ALPHA_API_KEY="your key" ``` or from the client side: ```python diff --git a/docs/docs/contributing/new_api_provider.mdx b/docs/docs/contributing/new_api_provider.mdx index 4ae6d5e72..6f9744771 100644 --- a/docs/docs/contributing/new_api_provider.mdx +++ b/docs/docs/contributing/new_api_provider.mdx @@ -76,7 +76,7 @@ Integration tests are located in [tests/integration](https://github.com/meta-lla Consult [tests/integration/README.md](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more details on how to run the tests. Note that each provider's `sample_run_config()` method (in the configuration class for that provider) - typically references some environment variables for specifying API keys and the like. You can set these in the environment or pass these via the `--env` flag to the test command. + typically references some environment variables for specifying API keys and the like. You can set these in the environment before running the test command. ### 2. Unit Testing diff --git a/docs/docs/distributions/building_distro.mdx b/docs/docs/distributions/building_distro.mdx index 5b65b7f16..5ffb623b5 100644 --- a/docs/docs/distributions/building_distro.mdx +++ b/docs/docs/distributions/building_distro.mdx @@ -289,10 +289,10 @@ After this step is successful, you should be able to find the built container im docker run -d \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e OLLAMA_URL=http://host.docker.internal:11434 \ localhost/distribution-ollama:dev \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env OLLAMA_URL=http://host.docker.internal:11434 + --port $LLAMA_STACK_PORT ``` Here are the docker flags and their uses: @@ -305,12 +305,12 @@ Here are the docker flags and their uses: * `localhost/distribution-ollama:dev`: The name and tag of the container image to run +* `-e INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the INFERENCE_MODEL environment variable in the container + +* `-e OLLAMA_URL=http://host.docker.internal:11434`: Sets the OLLAMA_URL environment variable in the container + * `--port $LLAMA_STACK_PORT`: Port number for the server to listen on -* `--env INFERENCE_MODEL=$INFERENCE_MODEL`: Sets the model to use for inference - -* `--env OLLAMA_URL=http://host.docker.internal:11434`: Configures the URL for the Ollama service - @@ -320,7 +320,7 @@ Now, let's start the Llama Stack Distribution Server. You will need the YAML con ``` llama stack run -h -usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--env KEY=VALUE] +usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--image-type {venv}] [--enable-ui] [config | template] @@ -334,7 +334,6 @@ options: --port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321) --image-name IMAGE_NAME Name of the image to run. Defaults to the current environment (default: None) - --env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: None) --image-type {venv} Image Type used during the build. This should be venv. (default: None) --enable-ui Start the UI server (default: False) diff --git a/docs/docs/distributions/configuration.mdx b/docs/docs/distributions/configuration.mdx index dbf879024..81243c97b 100644 --- a/docs/docs/distributions/configuration.mdx +++ b/docs/docs/distributions/configuration.mdx @@ -101,7 +101,7 @@ A few things to note: - The id is a string you can choose freely. - You can instantiate any number of provider instances of the same type. - The configuration dictionary is provider-specific. -- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server (via docker or via `llama stack run`), you can specify `--env OLLAMA_URL=http://my-server:11434` to override the default value. +- Notice that configuration can reference environment variables (with default values), which are expanded at runtime. When you run a stack server, you can set environment variables in your shell before running `llama stack run` to override the default values. ### Environment Variable Substitution @@ -173,13 +173,10 @@ optional_token: ${env.OPTIONAL_TOKEN:+} #### Runtime Override -You can override environment variables at runtime when starting the server: +You can override environment variables at runtime by setting them in your shell before starting the server: ```bash -# Override specific environment variables -llama stack run --config run.yaml --env API_KEY=sk-123 --env BASE_URL=https://custom-api.com - -# Or set them in your shell +# Set environment variables in your shell export API_KEY=sk-123 export BASE_URL=https://custom-api.com llama stack run --config run.yaml diff --git a/docs/docs/distributions/remote_hosted_distro/watsonx.md b/docs/docs/distributions/remote_hosted_distro/watsonx.md index 977af90dd..5add678f3 100644 --- a/docs/docs/distributions/remote_hosted_distro/watsonx.md +++ b/docs/docs/distributions/remote_hosted_distro/watsonx.md @@ -69,10 +69,10 @@ docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ./run.yaml:/root/my-run.yaml \ + -e WATSONX_API_KEY=$WATSONX_API_KEY \ + -e WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \ + -e WATSONX_BASE_URL=$WATSONX_BASE_URL \ llamastack/distribution-watsonx \ --config /root/my-run.yaml \ - --port $LLAMA_STACK_PORT \ - --env WATSONX_API_KEY=$WATSONX_API_KEY \ - --env WATSONX_PROJECT_ID=$WATSONX_PROJECT_ID \ - --env WATSONX_BASE_URL=$WATSONX_BASE_URL + --port $LLAMA_STACK_PORT ``` diff --git a/docs/docs/distributions/self_hosted_distro/dell.md b/docs/docs/distributions/self_hosted_distro/dell.md index 52d40cf9d..851eac3bf 100644 --- a/docs/docs/distributions/self_hosted_distro/dell.md +++ b/docs/docs/distributions/self_hosted_distro/dell.md @@ -129,11 +129,11 @@ docker run -it \ # NOTE: mount the llama-stack / llama-model directories if testing local changes else not needed -v $HOME/git/llama-stack:/app/llama-stack-source -v $HOME/git/llama-models:/app/llama-models-source \ # localhost/distribution-dell:dev if building / testing locally - llamastack/distribution-dell\ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env CHROMA_URL=$CHROMA_URL + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e DEH_URL=$DEH_URL \ + -e CHROMA_URL=$CHROMA_URL \ + llamastack/distribution-dell \ + --port $LLAMA_STACK_PORT ``` @@ -154,14 +154,14 @@ docker run \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v $HOME/.llama:/root/.llama \ -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \ + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e DEH_URL=$DEH_URL \ + -e SAFETY_MODEL=$SAFETY_MODEL \ + -e DEH_SAFETY_URL=$DEH_SAFETY_URL \ + -e CHROMA_URL=$CHROMA_URL \ llamastack/distribution-dell \ --config /root/my-run.yaml \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env SAFETY_MODEL=$SAFETY_MODEL \ - --env DEH_SAFETY_URL=$DEH_SAFETY_URL \ - --env CHROMA_URL=$CHROMA_URL + --port $LLAMA_STACK_PORT ``` ### Via venv @@ -170,21 +170,21 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a ```bash llama stack build --distro dell --image-type venv -llama stack run dell - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env CHROMA_URL=$CHROMA_URL +INFERENCE_MODEL=$INFERENCE_MODEL \ +DEH_URL=$DEH_URL \ +CHROMA_URL=$CHROMA_URL \ +llama stack run dell \ + --port $LLAMA_STACK_PORT ``` If you are using Llama Stack Safety / Shield APIs, use: ```bash +INFERENCE_MODEL=$INFERENCE_MODEL \ +DEH_URL=$DEH_URL \ +SAFETY_MODEL=$SAFETY_MODEL \ +DEH_SAFETY_URL=$DEH_SAFETY_URL \ +CHROMA_URL=$CHROMA_URL \ llama stack run ./run-with-safety.yaml \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env SAFETY_MODEL=$SAFETY_MODEL \ - --env DEH_SAFETY_URL=$DEH_SAFETY_URL \ - --env CHROMA_URL=$CHROMA_URL + --port $LLAMA_STACK_PORT ``` diff --git a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md index 84b85b91c..1c0ef5f6e 100644 --- a/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/docs/distributions/self_hosted_distro/meta-reference-gpu.md @@ -84,9 +84,9 @@ docker run \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ + -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ llamastack/distribution-meta-reference-gpu \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct + --port $LLAMA_STACK_PORT ``` If you are using Llama Stack Safety / Shield APIs, use: @@ -98,10 +98,10 @@ docker run \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ + -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ + -e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \ llamastack/distribution-meta-reference-gpu \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ - --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B + --port $LLAMA_STACK_PORT ``` ### Via venv @@ -110,16 +110,16 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL ```bash llama stack build --distro meta-reference-gpu --image-type venv +INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ llama stack run distributions/meta-reference-gpu/run.yaml \ - --port 8321 \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct + --port 8321 ``` If you are using Llama Stack Safety / Shield APIs, use: ```bash +INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ +SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \ llama stack run distributions/meta-reference-gpu/run-with-safety.yaml \ - --port 8321 \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ - --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B + --port 8321 ``` diff --git a/docs/docs/distributions/self_hosted_distro/nvidia.md b/docs/docs/distributions/self_hosted_distro/nvidia.md index 1e52797db..a6e185442 100644 --- a/docs/docs/distributions/self_hosted_distro/nvidia.md +++ b/docs/docs/distributions/self_hosted_distro/nvidia.md @@ -129,10 +129,10 @@ docker run \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ./run.yaml:/root/my-run.yaml \ + -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ llamastack/distribution-nvidia \ --config /root/my-run.yaml \ - --port $LLAMA_STACK_PORT \ - --env NVIDIA_API_KEY=$NVIDIA_API_KEY + --port $LLAMA_STACK_PORT ``` ### Via venv @@ -142,10 +142,10 @@ If you've set up your local development environment, you can also build the imag ```bash INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct llama stack build --distro nvidia --image-type venv +NVIDIA_API_KEY=$NVIDIA_API_KEY \ +INFERENCE_MODEL=$INFERENCE_MODEL \ llama stack run ./run.yaml \ - --port 8321 \ - --env NVIDIA_API_KEY=$NVIDIA_API_KEY \ - --env INFERENCE_MODEL=$INFERENCE_MODEL + --port 8321 ``` ## Example Notebooks diff --git a/docs/docs/getting_started/detailed_tutorial.mdx b/docs/docs/getting_started/detailed_tutorial.mdx index 33786ac0e..e6c22224d 100644 --- a/docs/docs/getting_started/detailed_tutorial.mdx +++ b/docs/docs/getting_started/detailed_tutorial.mdx @@ -86,9 +86,9 @@ docker run -it \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ + -e OLLAMA_URL=http://host.docker.internal:11434 \ llamastack/distribution-starter \ - --port $LLAMA_STACK_PORT \ - --env OLLAMA_URL=http://host.docker.internal:11434 + --port $LLAMA_STACK_PORT ``` Note to start the container with Podman, you can do the same but replace `docker` at the start of the command with `podman`. If you are using `podman` older than `4.7.0`, please also replace `host.docker.internal` in the `OLLAMA_URL` @@ -106,9 +106,9 @@ docker run -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ --network=host \ + -e OLLAMA_URL=http://localhost:11434 \ llamastack/distribution-starter \ - --port $LLAMA_STACK_PORT \ - --env OLLAMA_URL=http://localhost:11434 + --port $LLAMA_STACK_PORT ``` ::: You will see output like below: diff --git a/docs/getting_started_llama4.ipynb b/docs/getting_started_llama4.ipynb index cd5f83517..b840117f1 100644 --- a/docs/getting_started_llama4.ipynb +++ b/docs/getting_started_llama4.ipynb @@ -238,7 +238,7 @@ "def run_llama_stack_server_background():\n", " log_file = open(\"llama_stack_server.log\", \"w\")\n", " process = subprocess.Popen(\n", - " f\"uv run --with llama-stack llama stack run meta-reference-gpu --image-type venv --env INFERENCE_MODEL={model_id}\",\n", + " f\"INFERENCE_MODEL={model_id} uv run --with llama-stack llama stack run meta-reference-gpu --image-type venv\",\n", " shell=True,\n", " stdout=log_file,\n", " stderr=log_file,\n", diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md index 183038a88..a899d3ebe 100644 --- a/docs/zero_to_hero_guide/README.md +++ b/docs/zero_to_hero_guide/README.md @@ -102,12 +102,12 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next 3. **Run the Llama Stack**: Run the stack using uv: ```bash + INFERENCE_MODEL=$INFERENCE_MODEL \ + SAFETY_MODEL=$SAFETY_MODEL \ + OLLAMA_URL=$OLLAMA_URL \ uv run --with llama-stack llama stack run starter \ --image-type venv \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env SAFETY_MODEL=$SAFETY_MODEL \ - --env OLLAMA_URL=$OLLAMA_URL + --port $LLAMA_STACK_PORT ``` Note: Every time you run a new model with `ollama run`, you will need to restart the llama stack. Otherwise it won't see the new model. diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index cec101083..677f5e5fa 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -16,7 +16,7 @@ import yaml from llama_stack.cli.stack.utils import ImageType from llama_stack.cli.subcommand import Subcommand from llama_stack.core.datatypes import LoggingConfig, StackRunConfig -from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars, validate_env_pair +from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro from llama_stack.log import get_logger @@ -57,12 +57,6 @@ class StackRun(Subcommand): default=None, help="Name of the image to run. Defaults to the current environment", ) - self.parser.add_argument( - "--env", - action="append", - help="Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times.", - metavar="KEY=VALUE", - ) self.parser.add_argument( "--image-type", type=str, @@ -162,34 +156,12 @@ class StackRun(Subcommand): if config_file: run_args.extend(["--config", str(config_file)]) - if args.env: - for env_var in args.env: - if "=" not in env_var: - self.parser.error(f"Environment variable '{env_var}' must be in KEY=VALUE format") - return - key, value = env_var.split("=", 1) # split on first = only - if not key: - self.parser.error(f"Environment variable '{env_var}' has empty key") - return - run_args.extend(["--env", f"{key}={value}"]) - run_command(run_args) def _uvicorn_run(self, config_file: Path | None, args: argparse.Namespace) -> None: if not config_file: self.parser.error("Config file is required") - # Set environment variables if provided - if args.env: - for env_pair in args.env: - try: - key, value = validate_env_pair(env_pair) - logger.info(f"Setting environment variable {key} => {value}") - os.environ[key] = value - except ValueError as e: - logger.error(f"Error: {str(e)}") - self.parser.error(f"Invalid environment variable format: {env_pair}") - config_file = resolve_config_or_distro(str(config_file), Mode.RUN) with open(config_file) as fp: config_contents = yaml.safe_load(fp) diff --git a/llama_stack/core/stack.py b/llama_stack/core/stack.py index d5d55319a..acc02eeff 100644 --- a/llama_stack/core/stack.py +++ b/llama_stack/core/stack.py @@ -274,22 +274,6 @@ def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]: return config_dict -def validate_env_pair(env_pair: str) -> tuple[str, str]: - """Validate and split an environment variable key-value pair.""" - try: - key, value = env_pair.split("=", 1) - key = key.strip() - if not key: - raise ValueError(f"Empty key in environment variable pair: {env_pair}") - if not all(c.isalnum() or c == "_" for c in key): - raise ValueError(f"Key must contain only alphanumeric characters and underscores: {key}") - return key, value - except ValueError as e: - raise ValueError( - f"Invalid environment variable format '{env_pair}': {str(e)}. Expected format: KEY=value" - ) from e - - def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConfig) -> None: """Add internal implementations (inspect and providers) to the implementations dictionary. diff --git a/llama_stack/core/start_stack.sh b/llama_stack/core/start_stack.sh index 02b1cd408..cc0ae68d8 100755 --- a/llama_stack/core/start_stack.sh +++ b/llama_stack/core/start_stack.sh @@ -25,7 +25,7 @@ error_handler() { trap 'error_handler ${LINENO}' ERR if [ $# -lt 3 ]; then - echo "Usage: $0 [--config ] [--env KEY=VALUE]..." + echo "Usage: $0 [--config ]" exit 1 fi @@ -43,7 +43,6 @@ SCRIPT_DIR=$(dirname "$(readlink -f "$0")") # Initialize variables yaml_config="" -env_vars="" other_args="" # Process remaining arguments @@ -58,15 +57,6 @@ while [[ $# -gt 0 ]]; do exit 1 fi ;; - --env) - if [[ -n "$2" ]]; then - env_vars="$env_vars --env $2" - shift 2 - else - echo -e "${RED}Error: --env requires a KEY=VALUE argument${NC}" >&2 - exit 1 - fi - ;; *) other_args="$other_args $1" shift @@ -119,7 +109,6 @@ if [[ "$env_type" == "venv" ]]; then llama stack run \ $yaml_config_arg \ --port "$port" \ - $env_vars \ $other_args elif [[ "$env_type" == "container" ]]; then echo -e "${RED}Warning: Llama Stack no longer supports running Containers via the 'llama stack run' command.${NC}" diff --git a/llama_stack/distributions/dell/doc_template.md b/llama_stack/distributions/dell/doc_template.md index fcec3ea14..852e78d0e 100644 --- a/llama_stack/distributions/dell/doc_template.md +++ b/llama_stack/distributions/dell/doc_template.md @@ -117,11 +117,11 @@ docker run -it \ # NOTE: mount the llama-stack directory if testing local changes else not needed -v $HOME/git/llama-stack:/app/llama-stack-source \ # localhost/distribution-dell:dev if building / testing locally + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e DEH_URL=$DEH_URL \ + -e CHROMA_URL=$CHROMA_URL \ llamastack/distribution-{{ name }}\ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env CHROMA_URL=$CHROMA_URL + --port $LLAMA_STACK_PORT ``` @@ -142,14 +142,14 @@ docker run \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v $HOME/.llama:/root/.llama \ -v ./llama_stack/distributions/tgi/run-with-safety.yaml:/root/my-run.yaml \ + -e INFERENCE_MODEL=$INFERENCE_MODEL \ + -e DEH_URL=$DEH_URL \ + -e SAFETY_MODEL=$SAFETY_MODEL \ + -e DEH_SAFETY_URL=$DEH_SAFETY_URL \ + -e CHROMA_URL=$CHROMA_URL \ llamastack/distribution-{{ name }} \ --config /root/my-run.yaml \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env SAFETY_MODEL=$SAFETY_MODEL \ - --env DEH_SAFETY_URL=$DEH_SAFETY_URL \ - --env CHROMA_URL=$CHROMA_URL + --port $LLAMA_STACK_PORT ``` ### Via Conda @@ -158,21 +158,21 @@ Make sure you have done `pip install llama-stack` and have the Llama Stack CLI a ```bash llama stack build --distro {{ name }} --image-type conda -llama stack run {{ name }} - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env CHROMA_URL=$CHROMA_URL +INFERENCE_MODEL=$INFERENCE_MODEL \ +DEH_URL=$DEH_URL \ +CHROMA_URL=$CHROMA_URL \ +llama stack run {{ name }} \ + --port $LLAMA_STACK_PORT ``` If you are using Llama Stack Safety / Shield APIs, use: ```bash +INFERENCE_MODEL=$INFERENCE_MODEL \ +DEH_URL=$DEH_URL \ +SAFETY_MODEL=$SAFETY_MODEL \ +DEH_SAFETY_URL=$DEH_SAFETY_URL \ +CHROMA_URL=$CHROMA_URL \ llama stack run ./run-with-safety.yaml \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env DEH_URL=$DEH_URL \ - --env SAFETY_MODEL=$SAFETY_MODEL \ - --env DEH_SAFETY_URL=$DEH_SAFETY_URL \ - --env CHROMA_URL=$CHROMA_URL + --port $LLAMA_STACK_PORT ``` diff --git a/llama_stack/distributions/meta-reference-gpu/doc_template.md b/llama_stack/distributions/meta-reference-gpu/doc_template.md index 602d053c4..92dcc6102 100644 --- a/llama_stack/distributions/meta-reference-gpu/doc_template.md +++ b/llama_stack/distributions/meta-reference-gpu/doc_template.md @@ -72,9 +72,9 @@ docker run \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ + -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ llamastack/distribution-{{ name }} \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct + --port $LLAMA_STACK_PORT ``` If you are using Llama Stack Safety / Shield APIs, use: @@ -86,10 +86,10 @@ docker run \ --gpu all \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ + -e INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ + -e SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \ llamastack/distribution-{{ name }} \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ - --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B + --port $LLAMA_STACK_PORT ``` ### Via venv @@ -98,16 +98,16 @@ Make sure you have done `uv pip install llama-stack` and have the Llama Stack CL ```bash llama stack build --distro {{ name }} --image-type venv +INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ llama stack run distributions/{{ name }}/run.yaml \ - --port 8321 \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct + --port 8321 ``` If you are using Llama Stack Safety / Shield APIs, use: ```bash +INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ +SAFETY_MODEL=meta-llama/Llama-Guard-3-1B \ llama stack run distributions/{{ name }}/run-with-safety.yaml \ - --port 8321 \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct \ - --env SAFETY_MODEL=meta-llama/Llama-Guard-3-1B + --port 8321 ``` diff --git a/llama_stack/distributions/nvidia/doc_template.md b/llama_stack/distributions/nvidia/doc_template.md index fbee17ef8..df2b68ef7 100644 --- a/llama_stack/distributions/nvidia/doc_template.md +++ b/llama_stack/distributions/nvidia/doc_template.md @@ -118,10 +118,10 @@ docker run \ --pull always \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ./run.yaml:/root/my-run.yaml \ + -e NVIDIA_API_KEY=$NVIDIA_API_KEY \ llamastack/distribution-{{ name }} \ --config /root/my-run.yaml \ - --port $LLAMA_STACK_PORT \ - --env NVIDIA_API_KEY=$NVIDIA_API_KEY + --port $LLAMA_STACK_PORT ``` ### Via venv @@ -131,10 +131,10 @@ If you've set up your local development environment, you can also build the imag ```bash INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct llama stack build --distro nvidia --image-type venv +NVIDIA_API_KEY=$NVIDIA_API_KEY \ +INFERENCE_MODEL=$INFERENCE_MODEL \ llama stack run ./run.yaml \ - --port 8321 \ - --env NVIDIA_API_KEY=$NVIDIA_API_KEY \ - --env INFERENCE_MODEL=$INFERENCE_MODEL + --port 8321 ``` ## Example Notebooks diff --git a/scripts/install.sh b/scripts/install.sh index f6fbc259c..571468dc5 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -221,8 +221,8 @@ fi cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \ --network llama-net \ -p "${PORT}:${PORT}" \ - "${SERVER_IMAGE}" --port "${PORT}" \ - --env OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}") + -e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \ + "${SERVER_IMAGE}" --port "${PORT}") log "🦙 Starting Llama Stack..." if ! execute_with_log $ENGINE "${cmd[@]}"; then