mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-05 18:22:41 +00:00
Mark inline vllm as OpenAI unsupported inference
Signed-off-by: Ben Browning <bbrownin@redhat.com>
This commit is contained in:
parent
de01b1455b
commit
24cfa1ef1a
1 changed files with 8 additions and 1 deletions
|
@ -66,8 +66,10 @@ from llama_stack.providers.utils.inference.model_registry import (
|
||||||
ModelsProtocolPrivate,
|
ModelsProtocolPrivate,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.openai_compat import (
|
from llama_stack.providers.utils.inference.openai_compat import (
|
||||||
|
OpenAIChatCompletionUnsupportedMixin,
|
||||||
OpenAICompatCompletionChoice,
|
OpenAICompatCompletionChoice,
|
||||||
OpenAICompatCompletionResponse,
|
OpenAICompatCompletionResponse,
|
||||||
|
OpenAICompletionUnsupportedMixin,
|
||||||
get_stop_reason,
|
get_stop_reason,
|
||||||
process_chat_completion_stream_response,
|
process_chat_completion_stream_response,
|
||||||
)
|
)
|
||||||
|
@ -172,7 +174,12 @@ def _convert_sampling_params(
|
||||||
return vllm_sampling_params
|
return vllm_sampling_params
|
||||||
|
|
||||||
|
|
||||||
class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
|
class VLLMInferenceImpl(
|
||||||
|
Inference,
|
||||||
|
OpenAIChatCompletionUnsupportedMixin,
|
||||||
|
OpenAICompletionUnsupportedMixin,
|
||||||
|
ModelsProtocolPrivate,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
vLLM-based inference model adapter for Llama Stack with support for multiple models.
|
vLLM-based inference model adapter for Llama Stack with support for multiple models.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue