From dce9a24a6cb034da30cb4292319600ca68e8a20a Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Fri, 21 Mar 2025 10:31:59 -0400 Subject: [PATCH] test: Add default vLLM URL in remote-vllm template (#1736) # What does this PR do? This is to avoid errors like the following when running inference integration tests: ``` ERROR tests/integration/inference/test_text_inference.py::test_text_completion_stop_sequence[txt=8B-inference:completion:stop_sequence] - llama_stack.distribution.stack.EnvVarError: Environment variable 'VLLM_URL' not set or empty at providers.inference[0].config.url ``` It's also good to have a default, which is consistent with vLLM API server. ## Test Plan Integration tests can run without the error above. --------- Signed-off-by: Yuan Tang --- llama_stack/templates/remote-vllm/run-with-safety.yaml | 2 +- llama_stack/templates/remote-vllm/run.yaml | 2 +- llama_stack/templates/remote-vllm/vllm.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 3830ffcdb..9ab6d014e 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -15,7 +15,7 @@ providers: - provider_id: vllm-inference provider_type: remote::vllm config: - url: ${env.VLLM_URL} + url: ${env.VLLM_URL:http://localhost:8000/v1} max_tokens: ${env.VLLM_MAX_TOKENS:4096} api_token: ${env.VLLM_API_TOKEN:fake} tls_verify: ${env.VLLM_TLS_VERIFY:true} diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index b6bba1252..1f3cdfb39 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -15,7 +15,7 @@ providers: - provider_id: vllm-inference provider_type: remote::vllm config: - url: ${env.VLLM_URL} + url: ${env.VLLM_URL:http://localhost:8000/v1} max_tokens: ${env.VLLM_MAX_TOKENS:4096} api_token: ${env.VLLM_API_TOKEN:fake} tls_verify: ${env.VLLM_TLS_VERIFY:true} diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index ba0dacae0..0f6c7659e 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -45,7 +45,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="vllm-inference", provider_type="remote::vllm", config=VLLMInferenceAdapterConfig.sample_run_config( - url="${env.VLLM_URL}", + url="${env.VLLM_URL:http://localhost:8000/v1}", ), ) embedding_provider = Provider(