Mark inline vllm as OpenAI unsupported inference

Signed-off-by: Ben Browning <bbrownin@redhat.com>
2025-12-31 09:00:01 +00:00 · 2025-04-09 08:36:01 -04:00 · 2025-04-09 08:36:01 -04:00 · 24cfa1ef1a
commit 24cfa1ef1a
parent de01b1455b
1 changed files with 8 additions and 1 deletions
--- a/llama_stack/providers/inline/inference/vllm/vllm.py
+++ b/llama_stack/providers/inline/inference/vllm/vllm.py
@ -66,8 +66,10 @@ from llama_stack.providers.utils.inference.model_registry import (
    ModelsProtocolPrivate,
 )
 from llama_stack.providers.utils.inference.openai_compat import (
    OpenAIChatCompletionUnsupportedMixin,
    OpenAICompatCompletionChoice,
    OpenAICompatCompletionResponse,
    OpenAICompletionUnsupportedMixin,
    get_stop_reason,
    process_chat_completion_stream_response,
 )
@ -172,7 +174,12 @@ def _convert_sampling_params(
    return vllm_sampling_params
-class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
+class VLLMInferenceImpl(
    Inference,
    OpenAIChatCompletionUnsupportedMixin,
    OpenAICompletionUnsupportedMixin,
    ModelsProtocolPrivate,
 ):
    """
    vLLM-based inference model adapter for Llama Stack with support for multiple models.