fix: Add the option to not verify SSL at remote-vllm provider

2025-08-11 12:38:02 +00:00 · 2025-03-13 13:22:36 +01:00 · 2025-03-13 13:22:36 +01:00 · b54454a222
commit b54454a222
parent e3edca7739
4 changed files with 14 additions and 1 deletions
--- a/llama_stack/providers/remote/inference/vllm/config.py
+++ b/llama_stack/providers/remote/inference/vllm/config.py
@ -25,6 +25,10 @@ class VLLMInferenceAdapterConfig(BaseModel):
        default="fake",
        description="The API token",
    )
    tls_verify: bool = Field(
        default=True,
        description="Whether to verify TLS certificates",
    )
    @classmethod
    def sample_run_config(
@ -36,4 +40,5 @@ class VLLMInferenceAdapterConfig(BaseModel):
            "url": url,
            "max_tokens": "${env.VLLM_MAX_TOKENS:4096}",
            "api_token": "${env.VLLM_API_TOKEN:fake}",
            "tls_verify": "${env.VLLM_TLS_VERIFY:true}",
        }
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -7,6 +7,7 @@ import json
 import logging
 from typing import AsyncGenerator, List, Optional, Union
 import httpx
 from openai import AsyncOpenAI
 from openai.types.chat.chat_completion_chunk import (
    ChatCompletionChunk as OpenAIChatCompletionChunk,
@ -229,7 +230,11 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
    async def initialize(self) -> None:
        log.info(f"Initializing VLLM client with base_url={self.config.url}")
-        self.client = AsyncOpenAI(base_url=self.config.url, api_key=self.config.api_token)
+        self.client = AsyncOpenAI(
            base_url=self.config.url,
            api_key=self.config.api_token,
            http_client=None if self.config.tls_verify else httpx.AsyncClient(verify=False),
        )
    async def shutdown(self) -> None:
        pass
--- a/llama_stack/templates/remote-vllm/run-with-safety.yaml
+++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml
@ -18,12 +18,14 @@ providers:
      url: ${env.VLLM_URL}
      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
      api_token: ${env.VLLM_API_TOKEN:fake}
      tls_verify: ${env.VLLM_TLS_VERIFY:true}
  - provider_id: vllm-safety
    provider_type: remote::vllm
    config:
      url: ${env.SAFETY_VLLM_URL}
      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
      api_token: ${env.VLLM_API_TOKEN:fake}
      tls_verify: ${env.VLLM_TLS_VERIFY:true}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
    config: {}
--- a/llama_stack/templates/remote-vllm/run.yaml
+++ b/llama_stack/templates/remote-vllm/run.yaml
@ -18,6 +18,7 @@ providers:
      url: ${env.VLLM_URL}
      max_tokens: ${env.VLLM_MAX_TOKENS:4096}
      api_token: ${env.VLLM_API_TOKEN:fake}
      tls_verify: ${env.VLLM_TLS_VERIFY:true}
  - provider_id: sentence-transformers
    provider_type: inline::sentence-transformers
    config: {}