From dce9a24a6cb034da30cb4292319600ca68e8a20a Mon Sep 17 00:00:00 2001
From: Yuan Tang <terrytangyuan@gmail.com>
Date: Fri, 21 Mar 2025 10:31:59 -0400
Subject: [PATCH] test: Add default vLLM URL in remote-vllm template (#1736)

# What does this PR do?

This is to avoid errors like the following when running inference
integration tests:

```
ERROR tests/integration/inference/test_text_inference.py::test_text_completion_stop_sequence[txt=8B-inference:completion:stop_sequence] - llama_stack.distribution.stack.EnvVarError: Environment variable 'VLLM_URL' not set or empty at providers.inference[0].config.url
```

It's also good to have a default, which is consistent with vLLM API
server.

## Test Plan

Integration tests can run without the error above.

---------

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
---
 llama_stack/templates/remote-vllm/run-with-safety.yaml | 2 +-
 llama_stack/templates/remote-vllm/run.yaml             | 2 +-
 llama_stack/templates/remote-vllm/vllm.py              | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml
index 3830ffcdb..9ab6d014e 100644
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@@ -15,7 +15,7 @@ providers:
   - provider_id: vllm-inference
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL}
+      url: ${env.VLLM_URL:http://localhost:8000/v1}
       max_tokens: ${env.VLLM_MAX_TOKENS:4096}
       api_token: ${env.VLLM_API_TOKEN:fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:true}
diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml
index b6bba1252..1f3cdfb39 100644
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@@ -15,7 +15,7 @@ providers:
   - provider_id: vllm-inference
     provider_type: remote::vllm
     config:
-      url: ${env.VLLM_URL}
+      url: ${env.VLLM_URL:http://localhost:8000/v1}
       max_tokens: ${env.VLLM_MAX_TOKENS:4096}
       api_token: ${env.VLLM_API_TOKEN:fake}
       tls_verify: ${env.VLLM_TLS_VERIFY:true}
diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py
index ba0dacae0..0f6c7659e 100644
--- a/llama_stack/templates/remote-vllm/vllm.py
+++ b/llama_stack/templates/remote-vllm/vllm.py
@@ -45,7 +45,7 @@ def get_distribution_template() -> DistributionTemplate:
         provider_id="vllm-inference",
         provider_type="remote::vllm",
         config=VLLMInferenceAdapterConfig.sample_run_config(
-            url="${env.VLLM_URL}",
+            url="${env.VLLM_URL:http://localhost:8000/v1}",
         ),
     )
     embedding_provider = Provider(