From ff182ff6de435f762608d251d7aa6652c89545c1 Mon Sep 17 00:00:00 2001 From: raghotham Date: Fri, 10 Jan 2025 11:09:49 -0800 Subject: [PATCH] rename LLAMASTACK_PORT to LLAMA_STACK_PORT for consistency with other env vars (#744) # What does this PR do? Rename environment var for consistency ## Test Plan No regressions ## Sources ## Before submitting - [X] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). - [X] Ran pre-commit to handle lint / formatting issues. - [X] Read the [contributor guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md), Pull Request section? - [X] Updated relevant documentation. - [ ] Wrote necessary unit or integration tests. --------- Signed-off-by: Yuan Tang Co-authored-by: Yuan Tang --- distributions/remote-vllm/compose.yaml | 2 +- docs/source/distributions/self_hosted_distro/bedrock.md | 2 +- docs/source/distributions/self_hosted_distro/cerebras.md | 2 +- docs/source/distributions/self_hosted_distro/fireworks.md | 2 +- .../distributions/self_hosted_distro/meta-reference-gpu.md | 2 +- .../self_hosted_distro/meta-reference-quantized-gpu.md | 2 +- docs/source/distributions/self_hosted_distro/ollama.md | 2 +- docs/source/distributions/self_hosted_distro/remote-vllm.md | 2 +- docs/source/distributions/self_hosted_distro/tgi.md | 2 +- docs/source/distributions/self_hosted_distro/together.md | 2 +- docs/zero_to_hero_guide/README.md | 2 +- llama_stack/cli/stack/run.py | 2 +- llama_stack/distribution/server/server.py | 2 +- llama_stack/distribution/start_container.sh | 2 +- llama_stack/templates/bedrock/bedrock.py | 2 +- llama_stack/templates/cerebras/cerebras.py | 2 +- llama_stack/templates/fireworks/fireworks.py | 2 +- llama_stack/templates/hf-endpoint/hf_endpoint.py | 2 +- llama_stack/templates/hf-serverless/hf_serverless.py | 2 +- llama_stack/templates/meta-reference-gpu/meta_reference.py | 2 +- .../templates/meta-reference-quantized-gpu/meta_reference.py | 2 +- llama_stack/templates/ollama/ollama.py | 2 +- llama_stack/templates/remote-vllm/vllm.py | 2 +- llama_stack/templates/tgi/tgi.py | 2 +- llama_stack/templates/together/together.py | 2 +- llama_stack/templates/vllm-gpu/vllm.py | 2 +- 26 files changed, 26 insertions(+), 26 deletions(-) diff --git a/distributions/remote-vllm/compose.yaml b/distributions/remote-vllm/compose.yaml index 09701e099..c387e1049 100644 --- a/distributions/remote-vllm/compose.yaml +++ b/distributions/remote-vllm/compose.yaml @@ -85,7 +85,7 @@ services: - SQLITE_STORE_DIR=${SQLITE_STORE_DIR:-$HOME/.llama/distributions/remote-vllm} - SAFETY_MODEL=${SAFETY_MODEL:-meta-llama/Llama-Guard-3-1B} ports: - - "${LLAMASTACK_PORT:-5001}:${LLAMASTACK_PORT:-5001}" + - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}" # Hack: wait for vLLM server to start before starting docker entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-remote-vllm.yaml --port 5001" deploy: diff --git a/docs/source/distributions/self_hosted_distro/bedrock.md b/docs/source/distributions/self_hosted_distro/bedrock.md index db4c7a8c9..71adfad09 100644 --- a/docs/source/distributions/self_hosted_distro/bedrock.md +++ b/docs/source/distributions/self_hosted_distro/bedrock.md @@ -27,7 +27,7 @@ The `llamastack/distribution-bedrock` distribution consists of the following pro The following environment variables can be configured: -- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) +- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) ### Models diff --git a/docs/source/distributions/self_hosted_distro/cerebras.md b/docs/source/distributions/self_hosted_distro/cerebras.md index f623ed0de..be69c8f92 100644 --- a/docs/source/distributions/self_hosted_distro/cerebras.md +++ b/docs/source/distributions/self_hosted_distro/cerebras.md @@ -16,7 +16,7 @@ The `llamastack/distribution-cerebras` distribution consists of the following pr The following environment variables can be configured: -- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) +- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) - `CEREBRAS_API_KEY`: Cerebras API Key (default: ``) ### Models diff --git a/docs/source/distributions/self_hosted_distro/fireworks.md b/docs/source/distributions/self_hosted_distro/fireworks.md index c5428306a..db10ab4f1 100644 --- a/docs/source/distributions/self_hosted_distro/fireworks.md +++ b/docs/source/distributions/self_hosted_distro/fireworks.md @@ -29,7 +29,7 @@ The `llamastack/distribution-fireworks` distribution consists of the following p The following environment variables can be configured: -- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) +- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) - `FIREWORKS_API_KEY`: Fireworks.AI API Key (default: ``) ### Models diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md index 0ca58e7df..a89719dea 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md @@ -31,7 +31,7 @@ Note that you need access to nvidia GPUs to run this distribution. This distribu The following environment variables can be configured: -- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) +- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) - `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `meta-llama/Llama-3.2-3B-Instruct`) - `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`) - `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`) diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md index 87f4f4a61..26ed5d05b 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md @@ -33,7 +33,7 @@ Note that you need access to nvidia GPUs to run this distribution. This distribu The following environment variables can be configured: -- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) +- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) - `INFERENCE_MODEL`: Inference model loaded into the Meta Reference server (default: `meta-llama/Llama-3.2-3B-Instruct`) - `INFERENCE_CHECKPOINT_DIR`: Directory containing the Meta Reference model checkpoint (default: `null`) diff --git a/docs/source/distributions/self_hosted_distro/ollama.md b/docs/source/distributions/self_hosted_distro/ollama.md index 7fe2ae408..e8e5dd397 100644 --- a/docs/source/distributions/self_hosted_distro/ollama.md +++ b/docs/source/distributions/self_hosted_distro/ollama.md @@ -29,7 +29,7 @@ You should use this distribution if you have a regular desktop machine without v The following environment variables can be configured: -- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) +- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) - `OLLAMA_URL`: URL of the Ollama server (default: `http://127.0.0.1:11434`) - `INFERENCE_MODEL`: Inference model loaded into the Ollama server (default: `meta-llama/Llama-3.2-3B-Instruct`) - `SAFETY_MODEL`: Safety model loaded into the Ollama server (default: `meta-llama/Llama-Guard-3-1B`) diff --git a/docs/source/distributions/self_hosted_distro/remote-vllm.md b/docs/source/distributions/self_hosted_distro/remote-vllm.md index 9d58a622b..98d02725c 100644 --- a/docs/source/distributions/self_hosted_distro/remote-vllm.md +++ b/docs/source/distributions/self_hosted_distro/remote-vllm.md @@ -27,7 +27,7 @@ You can use this distribution if you have GPUs and want to run an independent vL The following environment variables can be configured: -- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) +- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) - `INFERENCE_MODEL`: Inference model loaded into the vLLM server (default: `meta-llama/Llama-3.2-3B-Instruct`) - `VLLM_URL`: URL of the vLLM server with the main inference model (default: `http://host.docker.internal:5100/v1`) - `MAX_TOKENS`: Maximum number of tokens for generation (default: `4096`) diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md index 847018809..f4f705b12 100644 --- a/docs/source/distributions/self_hosted_distro/tgi.md +++ b/docs/source/distributions/self_hosted_distro/tgi.md @@ -32,7 +32,7 @@ You can use this distribution if you have GPUs and want to run an independent TG The following environment variables can be configured: -- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) +- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) - `INFERENCE_MODEL`: Inference model loaded into the TGI server (default: `meta-llama/Llama-3.2-3B-Instruct`) - `TGI_URL`: URL of the TGI server with the main inference model (default: `http://127.0.0.1:8080}/v1`) - `TGI_SAFETY_URL`: URL of the TGI server with the safety model (default: `http://127.0.0.1:8081/v1`) diff --git a/docs/source/distributions/self_hosted_distro/together.md b/docs/source/distributions/self_hosted_distro/together.md index 72b082226..3b476c9bf 100644 --- a/docs/source/distributions/self_hosted_distro/together.md +++ b/docs/source/distributions/self_hosted_distro/together.md @@ -29,7 +29,7 @@ The `llamastack/distribution-together` distribution consists of the following pr The following environment variables can be configured: -- `LLAMASTACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) +- `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) - `TOGETHER_API_KEY`: Together.AI API Key (default: ``) ### Models diff --git a/docs/zero_to_hero_guide/README.md b/docs/zero_to_hero_guide/README.md index b451e0af7..f96ae49ce 100644 --- a/docs/zero_to_hero_guide/README.md +++ b/docs/zero_to_hero_guide/README.md @@ -89,7 +89,7 @@ If you're looking for more specific topics, we have a [Zero to Hero Guide](#next ``` ... Build Successful! Next steps: - 1. Set the environment variables: LLAMASTACK_PORT, OLLAMA_URL, INFERENCE_MODEL, SAFETY_MODEL + 1. Set the environment variables: LLAMA_STACK_PORT, OLLAMA_URL, INFERENCE_MODEL, SAFETY_MODEL 2. `llama stack run /Users//.llama/distributions/llamastack-ollama/ollama-run.yaml ``` diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index 7ff50bd77..1e4e6d7a1 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -35,7 +35,7 @@ class StackRun(Subcommand): "--port", type=int, help="Port to run the server on. Defaults to 5000", - default=int(os.getenv("LLAMASTACK_PORT", 5000)), + default=int(os.getenv("LLAMA_STACK_PORT", 5000)), ) self.parser.add_argument( "--disable-ipv6", diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 1108d1049..34334de77 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -290,7 +290,7 @@ def main(): parser.add_argument( "--port", type=int, - default=int(os.getenv("LLAMASTACK_PORT", 5000)), + default=int(os.getenv("LLAMA_STACK_PORT", 5000)), help="Port to listen on", ) parser.add_argument( diff --git a/llama_stack/distribution/start_container.sh b/llama_stack/distribution/start_container.sh index 3b7b55b97..3b49a22f8 100755 --- a/llama_stack/distribution/start_container.sh +++ b/llama_stack/distribution/start_container.sh @@ -90,6 +90,6 @@ $DOCKER_BINARY run $DOCKER_OPTS -it \ $env_vars \ -v "$yaml_config:/app/config.yaml" \ $mounts \ - --env LLAMASTACK_PORT=$port \ + --env LLAMA_STACK_PORT=$port \ --entrypoint='["python", "-m", "llama_stack.distribution.server.server", "--yaml-config", "/app/config.yaml"]' \ $docker_image:$version_tag diff --git a/llama_stack/templates/bedrock/bedrock.py b/llama_stack/templates/bedrock/bedrock.py index a579e5b7f..c80625cf6 100644 --- a/llama_stack/templates/bedrock/bedrock.py +++ b/llama_stack/templates/bedrock/bedrock.py @@ -84,7 +84,7 @@ def get_distribution_template() -> DistributionTemplate: ), }, run_config_env_vars={ - "LLAMASTACK_PORT": ( + "LLAMA_STACK_PORT": ( "5001", "Port for the Llama Stack distribution server", ), diff --git a/llama_stack/templates/cerebras/cerebras.py b/llama_stack/templates/cerebras/cerebras.py index cbacdbaec..b51617f35 100644 --- a/llama_stack/templates/cerebras/cerebras.py +++ b/llama_stack/templates/cerebras/cerebras.py @@ -102,7 +102,7 @@ def get_distribution_template() -> DistributionTemplate: ), }, run_config_env_vars={ - "LLAMASTACK_PORT": ( + "LLAMA_STACK_PORT": ( "5001", "Port for the Llama Stack distribution server", ), diff --git a/llama_stack/templates/fireworks/fireworks.py b/llama_stack/templates/fireworks/fireworks.py index 090f98b59..c7b166699 100644 --- a/llama_stack/templates/fireworks/fireworks.py +++ b/llama_stack/templates/fireworks/fireworks.py @@ -114,7 +114,7 @@ def get_distribution_template() -> DistributionTemplate: ), }, run_config_env_vars={ - "LLAMASTACK_PORT": ( + "LLAMA_STACK_PORT": ( "5001", "Port for the Llama Stack distribution server", ), diff --git a/llama_stack/templates/hf-endpoint/hf_endpoint.py b/llama_stack/templates/hf-endpoint/hf_endpoint.py index 8bac2588d..54aaa56ac 100644 --- a/llama_stack/templates/hf-endpoint/hf_endpoint.py +++ b/llama_stack/templates/hf-endpoint/hf_endpoint.py @@ -126,7 +126,7 @@ def get_distribution_template() -> DistributionTemplate: ), }, run_config_env_vars={ - "LLAMASTACK_PORT": ( + "LLAMA_STACK_PORT": ( "5001", "Port for the Llama Stack distribution server", ), diff --git a/llama_stack/templates/hf-serverless/hf_serverless.py b/llama_stack/templates/hf-serverless/hf_serverless.py index 33eb594fe..51e16c3db 100644 --- a/llama_stack/templates/hf-serverless/hf_serverless.py +++ b/llama_stack/templates/hf-serverless/hf_serverless.py @@ -126,7 +126,7 @@ def get_distribution_template() -> DistributionTemplate: ), }, run_config_env_vars={ - "LLAMASTACK_PORT": ( + "LLAMA_STACK_PORT": ( "5001", "Port for the Llama Stack distribution server", ), diff --git a/llama_stack/templates/meta-reference-gpu/meta_reference.py b/llama_stack/templates/meta-reference-gpu/meta_reference.py index 8ad56d7f5..1477b31ff 100644 --- a/llama_stack/templates/meta-reference-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-gpu/meta_reference.py @@ -132,7 +132,7 @@ def get_distribution_template() -> DistributionTemplate: ), }, run_config_env_vars={ - "LLAMASTACK_PORT": ( + "LLAMA_STACK_PORT": ( "5001", "Port for the Llama Stack distribution server", ), diff --git a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py index 6af7175f7..5c40134af 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py +++ b/llama_stack/templates/meta-reference-quantized-gpu/meta_reference.py @@ -99,7 +99,7 @@ def get_distribution_template() -> DistributionTemplate: ), }, run_config_env_vars={ - "LLAMASTACK_PORT": ( + "LLAMA_STACK_PORT": ( "5001", "Port for the Llama Stack distribution server", ), diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 9a76e9371..5546c3fbc 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -120,7 +120,7 @@ def get_distribution_template() -> DistributionTemplate: ), }, run_config_env_vars={ - "LLAMASTACK_PORT": ( + "LLAMA_STACK_PORT": ( "5001", "Port for the Llama Stack distribution server", ), diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index f12752f2b..ecaa2cf14 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -125,7 +125,7 @@ def get_distribution_template() -> DistributionTemplate: ), }, run_config_env_vars={ - "LLAMASTACK_PORT": ( + "LLAMA_STACK_PORT": ( "5001", "Port for the Llama Stack distribution server", ), diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index 892d539d2..37ed2751b 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -127,7 +127,7 @@ def get_distribution_template() -> DistributionTemplate: ), }, run_config_env_vars={ - "LLAMASTACK_PORT": ( + "LLAMA_STACK_PORT": ( "5001", "Port for the Llama Stack distribution server", ), diff --git a/llama_stack/templates/together/together.py b/llama_stack/templates/together/together.py index d73e23e77..30ad47e30 100644 --- a/llama_stack/templates/together/together.py +++ b/llama_stack/templates/together/together.py @@ -112,7 +112,7 @@ def get_distribution_template() -> DistributionTemplate: ), }, run_config_env_vars={ - "LLAMASTACK_PORT": ( + "LLAMA_STACK_PORT": ( "5001", "Port for the Llama Stack distribution server", ), diff --git a/llama_stack/templates/vllm-gpu/vllm.py b/llama_stack/templates/vllm-gpu/vllm.py index 5cf478990..dd80c15dc 100644 --- a/llama_stack/templates/vllm-gpu/vllm.py +++ b/llama_stack/templates/vllm-gpu/vllm.py @@ -99,7 +99,7 @@ def get_distribution_template() -> DistributionTemplate: ), }, run_config_env_vars={ - "LLAMASTACK_PORT": ( + "LLAMA_STACK_PORT": ( "5001", "Port for the Llama Stack distribution server", ),