diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index b9422d85d..af7a86905 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -231,7 +231,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): pass async def unregister_model(self, model_id: str) -> None: - pass + await self.register_helper.unregister_model(model_id) async def completion( self, @@ -342,6 +342,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): res = self.client.models.list() available_models = [m.id for m in res] if model.provider_resource_id not in available_models: + await self.unregister_model(model.provider_resource_id) raise ValueError( f"Model {model.provider_resource_id} is not being served by vLLM. " f"Available models: {', '.join(available_models)}"