From 24cfa1ef1aaab15d355f57c4baa9d591f01afcda Mon Sep 17 00:00:00 2001 From: Ben Browning Date: Wed, 9 Apr 2025 08:36:01 -0400 Subject: [PATCH] Mark inline vllm as OpenAI unsupported inference Signed-off-by: Ben Browning --- llama_stack/providers/inline/inference/vllm/vllm.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/inline/inference/vllm/vllm.py b/llama_stack/providers/inline/inference/vllm/vllm.py index ea2643b7a..085c79d6b 100644 --- a/llama_stack/providers/inline/inference/vllm/vllm.py +++ b/llama_stack/providers/inline/inference/vllm/vllm.py @@ -66,8 +66,10 @@ from llama_stack.providers.utils.inference.model_registry import ( ModelsProtocolPrivate, ) from llama_stack.providers.utils.inference.openai_compat import ( + OpenAIChatCompletionUnsupportedMixin, OpenAICompatCompletionChoice, OpenAICompatCompletionResponse, + OpenAICompletionUnsupportedMixin, get_stop_reason, process_chat_completion_stream_response, ) @@ -172,7 +174,12 @@ def _convert_sampling_params( return vllm_sampling_params -class VLLMInferenceImpl(Inference, ModelsProtocolPrivate): +class VLLMInferenceImpl( + Inference, + OpenAIChatCompletionUnsupportedMixin, + OpenAICompletionUnsupportedMixin, + ModelsProtocolPrivate, +): """ vLLM-based inference model adapter for Llama Stack with support for multiple models.