more fixes, plug shutdown handlers

still, FastAPIs sigint handler is not calling ours
This commit is contained in:
Ashwin Bharambe 2024-10-05 23:48:18 -07:00 committed by Ashwin Bharambe
parent 60dead6196
commit e45a417543
4 changed files with 32 additions and 12 deletions

View file

@ -42,7 +42,7 @@ from llama_stack.apis.inference.inference import (
from llama_stack.providers.utils.inference.augment_messages import (
augment_messages_for_tools,
)
from llama_stack.providers.utils.inference.routable import RoutableProviderForModels
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
from .config import VLLMConfig
@ -75,7 +75,7 @@ def _vllm_sampling_params(sampling_params: Any) -> SamplingParams:
return SamplingParams().from_optional(**kwargs)
class VLLMInferenceImpl(Inference, RoutableProviderForModels):
class VLLMInferenceImpl(Inference, ModelRegistryHelper):
"""Inference implementation for vLLM."""
HF_MODEL_MAPPINGS = {
@ -109,7 +109,7 @@ class VLLMInferenceImpl(Inference, RoutableProviderForModels):
def __init__(self, config: VLLMConfig):
Inference.__init__(self)
RoutableProviderForModels.__init__(
ModelRegistryHelper.__init__(
self,
stack_to_provider_models_map=self.HF_MODEL_MAPPINGS,
)