From 168cbcbb92c2956475f0b1ee5434cd4b8416e8b2 Mon Sep 17 00:00:00 2001 From: Luis Tomas Bolivar Date: Tue, 18 Mar 2025 14:33:35 +0100 Subject: [PATCH] fix: Add the option to not verify SSL at remote-vllm provider (#1585) # What does this PR do? Add the option to not verify SSL certificates for the remote-vllm provider. This allows llama stack server to talk to remote LLMs which have self-signed certificates Partially addresses #1545 --- llama_stack/providers/remote/inference/vllm/config.py | 5 +++++ llama_stack/providers/remote/inference/vllm/vllm.py | 7 ++++++- llama_stack/templates/remote-vllm/run-with-safety.yaml | 2 ++ llama_stack/templates/remote-vllm/run.yaml | 1 + 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py index c75cc8926..762cffde3 100644 --- a/llama_stack/providers/remote/inference/vllm/config.py +++ b/llama_stack/providers/remote/inference/vllm/config.py @@ -25,6 +25,10 @@ class VLLMInferenceAdapterConfig(BaseModel): default="fake", description="The API token", ) + tls_verify: bool = Field( + default=True, + description="Whether to verify TLS certificates", + ) @classmethod def sample_run_config( @@ -36,4 +40,5 @@ class VLLMInferenceAdapterConfig(BaseModel): "url": url, "max_tokens": "${env.VLLM_MAX_TOKENS:4096}", "api_token": "${env.VLLM_API_TOKEN:fake}", + "tls_verify": "${env.VLLM_TLS_VERIFY:true}", } diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 4d7e66d78..f940de7ba 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -7,6 +7,7 @@ import json import logging from typing import AsyncGenerator, List, Optional, Union +import httpx from openai import AsyncOpenAI from openai.types.chat.chat_completion_chunk import ( ChatCompletionChunk as OpenAIChatCompletionChunk, @@ -229,7 +230,11 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): async def initialize(self) -> None: log.info(f"Initializing VLLM client with base_url={self.config.url}") - self.client = AsyncOpenAI(base_url=self.config.url, api_key=self.config.api_token) + self.client = AsyncOpenAI( + base_url=self.config.url, + api_key=self.config.api_token, + http_client=None if self.config.tls_verify else httpx.AsyncClient(verify=False), + ) async def shutdown(self) -> None: pass diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index 9741f5302..3830ffcdb 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -18,12 +18,14 @@ providers: url: ${env.VLLM_URL} max_tokens: ${env.VLLM_MAX_TOKENS:4096} api_token: ${env.VLLM_API_TOKEN:fake} + tls_verify: ${env.VLLM_TLS_VERIFY:true} - provider_id: vllm-safety provider_type: remote::vllm config: url: ${env.SAFETY_VLLM_URL} max_tokens: ${env.VLLM_MAX_TOKENS:4096} api_token: ${env.VLLM_API_TOKEN:fake} + tls_verify: ${env.VLLM_TLS_VERIFY:true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {} diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index e26b20e88..b6bba1252 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -18,6 +18,7 @@ providers: url: ${env.VLLM_URL} max_tokens: ${env.VLLM_MAX_TOKENS:4096} api_token: ${env.VLLM_API_TOKEN:fake} + tls_verify: ${env.VLLM_TLS_VERIFY:true} - provider_id: sentence-transformers provider_type: inline::sentence-transformers config: {}