diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 3830ffcdb..9ab6d014e 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -15,7 +15,7 @@ providers: - provider_id: vllm-inference provider_type: remote::vllm config: - url: ${env.VLLM_URL} + url: ${env.VLLM_URL:http://localhost:8000/v1} max_tokens: ${env.VLLM_MAX_TOKENS:4096} api_token: ${env.VLLM_API_TOKEN:fake} tls_verify: ${env.VLLM_TLS_VERIFY:true} diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index b6bba1252..1f3cdfb39 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -15,7 +15,7 @@ providers: - provider_id: vllm-inference provider_type: remote::vllm config: - url: ${env.VLLM_URL} + url: ${env.VLLM_URL:http://localhost:8000/v1} max_tokens: ${env.VLLM_MAX_TOKENS:4096} api_token: ${env.VLLM_API_TOKEN:fake} tls_verify: ${env.VLLM_TLS_VERIFY:true} diff --git a/llama_stack/templates/remote-vllm/vllm.py b/llama_stack/templates/remote-vllm/vllm.py index ba0dacae0..0f6c7659e 100644 --- a/llama_stack/templates/remote-vllm/vllm.py +++ b/llama_stack/templates/remote-vllm/vllm.py @@ -45,7 +45,7 @@ def get_distribution_template() -> DistributionTemplate: provider_id="vllm-inference", provider_type="remote::vllm", config=VLLMInferenceAdapterConfig.sample_run_config( - url="${env.VLLM_URL}", + url="${env.VLLM_URL:http://localhost:8000/v1}", ), ) embedding_provider = Provider(