From ae74b31ae36ff13f92bf18e472860510f948c845 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Mon, 6 Oct 2025 13:27:30 -0400 Subject: [PATCH] chore: remove vLLM inference adapter's custom list_models (#3703) # What does this PR do? remove vLLM inference adapter's custom list_models impl, rely on standard impl instead ## Test Plan ci --- .../providers/remote/inference/vllm/vllm.py | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 31241213a..4e7884cd2 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -19,7 +19,6 @@ from llama_stack.apis.inference import ( OpenAIResponseFormatParam, ToolChoice, ) -from llama_stack.apis.models import Model, ModelType from llama_stack.log import get_logger from llama_stack.providers.datatypes import ( HealthResponse, @@ -58,21 +57,6 @@ class VLLMInferenceAdapter(OpenAIMixin): # Strictly respecting the refresh_models directive return self.config.refresh_models - async def list_models(self) -> list[Model] | None: - models = [] - async for m in self.client.models.list(): - model_type = ModelType.llm # unclear how to determine embedding vs. llm models - models.append( - Model( - identifier=m.id, - provider_resource_id=m.id, - provider_id=self.__provider_id__, # type: ignore[attr-defined] - metadata={}, - model_type=model_type, - ) - ) - return models - async def health(self) -> HealthResponse: """ Performs a health check by verifying connectivity to the remote vLLM server.