fix: Add the option to not verify SSL at remote-vllm provider (#1585)

# What does this PR do?
Add the option to not verify SSL certificates for the remote-vllm
provider. This allows llama stack server to talk to remote LLMs which
have self-signed certificates

Partially addresses  #1545
This commit is contained in:
Luis Tomas Bolivar 2025-03-18 14:33:35 +01:00 committed by GitHub
parent 37f155e41d
commit 168cbcbb92
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 14 additions and 1 deletions

View file

@ -25,6 +25,10 @@ class VLLMInferenceAdapterConfig(BaseModel):
default="fake", default="fake",
description="The API token", description="The API token",
) )
tls_verify: bool = Field(
default=True,
description="Whether to verify TLS certificates",
)
@classmethod @classmethod
def sample_run_config( def sample_run_config(
@ -36,4 +40,5 @@ class VLLMInferenceAdapterConfig(BaseModel):
"url": url, "url": url,
"max_tokens": "${env.VLLM_MAX_TOKENS:4096}", "max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
"api_token": "${env.VLLM_API_TOKEN:fake}", "api_token": "${env.VLLM_API_TOKEN:fake}",
"tls_verify": "${env.VLLM_TLS_VERIFY:true}",
} }

View file

@ -7,6 +7,7 @@ import json
import logging import logging
from typing import AsyncGenerator, List, Optional, Union from typing import AsyncGenerator, List, Optional, Union
import httpx
from openai import AsyncOpenAI from openai import AsyncOpenAI
from openai.types.chat.chat_completion_chunk import ( from openai.types.chat.chat_completion_chunk import (
ChatCompletionChunk as OpenAIChatCompletionChunk, ChatCompletionChunk as OpenAIChatCompletionChunk,
@ -229,7 +230,11 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
async def initialize(self) -> None: async def initialize(self) -> None:
log.info(f"Initializing VLLM client with base_url={self.config.url}") log.info(f"Initializing VLLM client with base_url={self.config.url}")
self.client = AsyncOpenAI(base_url=self.config.url, api_key=self.config.api_token) self.client = AsyncOpenAI(
base_url=self.config.url,
api_key=self.config.api_token,
http_client=None if self.config.tls_verify else httpx.AsyncClient(verify=False),
)
async def shutdown(self) -> None: async def shutdown(self) -> None:
pass pass

View file

@ -18,12 +18,14 @@ providers:
url: ${env.VLLM_URL} url: ${env.VLLM_URL}
max_tokens: ${env.VLLM_MAX_TOKENS:4096} max_tokens: ${env.VLLM_MAX_TOKENS:4096}
api_token: ${env.VLLM_API_TOKEN:fake} api_token: ${env.VLLM_API_TOKEN:fake}
tls_verify: ${env.VLLM_TLS_VERIFY:true}
- provider_id: vllm-safety - provider_id: vllm-safety
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.SAFETY_VLLM_URL} url: ${env.SAFETY_VLLM_URL}
max_tokens: ${env.VLLM_MAX_TOKENS:4096} max_tokens: ${env.VLLM_MAX_TOKENS:4096}
api_token: ${env.VLLM_API_TOKEN:fake} api_token: ${env.VLLM_API_TOKEN:fake}
tls_verify: ${env.VLLM_TLS_VERIFY:true}
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}

View file

@ -18,6 +18,7 @@ providers:
url: ${env.VLLM_URL} url: ${env.VLLM_URL}
max_tokens: ${env.VLLM_MAX_TOKENS:4096} max_tokens: ${env.VLLM_MAX_TOKENS:4096}
api_token: ${env.VLLM_API_TOKEN:fake} api_token: ${env.VLLM_API_TOKEN:fake}
tls_verify: ${env.VLLM_TLS_VERIFY:true}
- provider_id: sentence-transformers - provider_id: sentence-transformers
provider_type: inline::sentence-transformers provider_type: inline::sentence-transformers
config: {} config: {}