Mark inline vllm as OpenAI unsupported inference

Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
Ben Browning 2025-04-09 08:36:01 -04:00
parent de01b1455b
commit 24cfa1ef1a

View file

@ -66,8 +66,10 @@ from llama_stack.providers.utils.inference.model_registry import (
ModelsProtocolPrivate,
)
from llama_stack.providers.utils.inference.openai_compat import (
OpenAIChatCompletionUnsupportedMixin,
OpenAICompatCompletionChoice,
OpenAICompatCompletionResponse,
OpenAICompletionUnsupportedMixin,
get_stop_reason,
process_chat_completion_stream_response,
)
@ -172,7 +174,12 @@ def _convert_sampling_params(
return vllm_sampling_params
class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
class VLLMInferenceImpl(
Inference,
OpenAIChatCompletionUnsupportedMixin,
OpenAICompletionUnsupportedMixin,
ModelsProtocolPrivate,
):
"""
vLLM-based inference model adapter for Llama Stack with support for multiple models.