From 168cbcbb92c2956475f0b1ee5434cd4b8416e8b2 Mon Sep 17 00:00:00 2001
From: Luis Tomas Bolivar <ltomasbo@redhat.com>
Date: Tue, 18 Mar 2025 14:33:35 +0100
Subject: [PATCH] fix: Add the option to not verify SSL at remote-vllm provider
 (#1585)

# What does this PR do?
Add the option to not verify SSL certificates for the remote-vllm
provider. This allows llama stack server to talk to remote LLMs which
have self-signed certificates

Partially addresses  #1545
---
 llama_stack/providers/remote/inference/vllm/config.py  | 5 +++++
 llama_stack/providers/remote/inference/vllm/vllm.py    | 7 ++++++-
 llama_stack/templates/remote-vllm/run-with-safety.yaml | 2 ++
 llama_stack/templates/remote-vllm/run.yaml             | 1 +
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/llama_stack/providers/remote/inference/vllm/config.py b/llama_stack/providers/remote/inference/vllm/config.py
index c75cc8926..762cffde3 100644
--- a/llama_stack/providers/remote/inference/vllm/config.py
+++ b/llama_stack/providers/remote/inference/vllm/config.py
@@ -25,6 +25,10 @@ class VLLMInferenceAdapterConfig(BaseModel):
         default="fake",
         description="The API token",
     )
+    tls_verify: bool = Field(
+        default=True,
+        description="Whether to verify TLS certificates",
+    )
 
     @classmethod
     def sample_run_config(
@@ -36,4 +40,5 @@ class VLLMInferenceAdapterConfig(BaseModel):
             "url": url,
             "max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
             "api_token": "${env.VLLM_API_TOKEN:fake}",
+            "tls_verify": "${env.VLLM_TLS_VERIFY:true}",
         }
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 4d7e66d78..f940de7ba 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -7,6 +7,7 @@ import json
 import logging
 from typing import AsyncGenerator, List, Optional, Union
 
+import httpx
 from openai import AsyncOpenAI
 from openai.types.chat.chat_completion_chunk import (
     ChatCompletionChunk as OpenAIChatCompletionChunk,
@@ -229,7 +230,11 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
 
     async def initialize(self) -> None:
         log.info(f"Initializing VLLM client with base_url={self.config.url}")
-        self.client = AsyncOpenAI(base_url=self.config.url, api_key=self.config.api_token)
+        self.client = AsyncOpenAI(
+            base_url=self.config.url,
+            api_key=self.config.api_token,
+            http_client=None if self.config.tls_verify else httpx.AsyncClient(verify=False),
+        )
 
     async def shutdown(self) -> None:
         pass
diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml
index 9741f5302..3830ffcdb 100644
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@@ -18,12 +18,14 @@ providers:
       url: ${env.VLLM_URL}
       max_tokens: ${env.VLLM_MAX_TOKENS:4096}
       api_token: ${env.VLLM_API_TOKEN:fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:true}
   - provider_id: vllm-safety
     provider_type: remote::vllm
     config:
       url: ${env.SAFETY_VLLM_URL}
       max_tokens: ${env.VLLM_MAX_TOKENS:4096}
       api_token: ${env.VLLM_API_TOKEN:fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:true}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}
diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml
index e26b20e88..b6bba1252 100644
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@@ -18,6 +18,7 @@ providers:
       url: ${env.VLLM_URL}
       max_tokens: ${env.VLLM_MAX_TOKENS:4096}
       api_token: ${env.VLLM_API_TOKEN:fake}
+      tls_verify: ${env.VLLM_TLS_VERIFY:true}
   - provider_id: sentence-transformers
     provider_type: inline::sentence-transformers
     config: {}