diff --git a/docs/source/distributions/self_hosted_distro/tgi.md b/docs/source/distributions/self_hosted_distro/tgi.md index 80baf9c81..e126f9a08 100644 --- a/docs/source/distributions/self_hosted_distro/tgi.md +++ b/docs/source/distributions/self_hosted_distro/tgi.md @@ -35,7 +35,7 @@ The following environment variables can be configured: - `LLAMA_STACK_PORT`: Port for the Llama Stack distribution server (default: `5001`) - `INFERENCE_MODEL`: Inference model loaded into the TGI server (default: `meta-llama/Llama-3.2-3B-Instruct`) -- `TGI_URL`: URL of the TGI server with the main inference model (default: `http://127.0.0.1:8080}/v1`) +- `TGI_URL`: URL of the TGI server with the main inference model (default: `http://127.0.0.1:8080/v1`) - `TGI_SAFETY_URL`: URL of the TGI server with the safety model (default: `http://127.0.0.1:8081/v1`) - `SAFETY_MODEL`: Name of the safety (Llama-Guard) model to use (default: `meta-llama/Llama-Guard-3-1B`) diff --git a/llama_stack/templates/tgi/tgi.py b/llama_stack/templates/tgi/tgi.py index eb49871a0..584831746 100644 --- a/llama_stack/templates/tgi/tgi.py +++ b/llama_stack/templates/tgi/tgi.py @@ -137,7 +137,7 @@ def get_distribution_template() -> DistributionTemplate: "Inference model loaded into the TGI server", ), "TGI_URL": ( - "http://127.0.0.1:8080}/v1", + "http://127.0.0.1:8080/v1", "URL of the TGI server with the main inference model", ), "TGI_SAFETY_URL": (