mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-03 09:21:45 +00:00
Mark inline vllm as OpenAI unsupported inference
Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
de01b1455b
commit
24cfa1ef1a
1 changed files with 8 additions and 1 deletions
|
@ -66,8 +66,10 @@ from llama_stack.providers.utils.inference.model_registry import (
|
|||
ModelsProtocolPrivate,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
OpenAIChatCompletionUnsupportedMixin,
|
||||
OpenAICompatCompletionChoice,
|
||||
OpenAICompatCompletionResponse,
|
||||
OpenAICompletionUnsupportedMixin,
|
||||
get_stop_reason,
|
||||
process_chat_completion_stream_response,
|
||||
)
|
||||
|
@ -172,7 +174,12 @@ def _convert_sampling_params(
|
|||
return vllm_sampling_params
|
||||
|
||||
|
||||
class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
|
||||
class VLLMInferenceImpl(
|
||||
Inference,
|
||||
OpenAIChatCompletionUnsupportedMixin,
|
||||
OpenAICompletionUnsupportedMixin,
|
||||
ModelsProtocolPrivate,
|
||||
):
|
||||
"""
|
||||
vLLM-based inference model adapter for Llama Stack with support for multiple models.
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue