chore: remove vLLM inference adapter's custom list_models (#3703)

# What does this PR do? remove vLLM inference adapter's custom list_models impl, rely on standard impl instead ## Test Plan ci
2025-12-03 09:53:45 +00:00 · 2025-10-06 13:27:30 -04:00 · 2025-10-06 13:27:30 -04:00 · ae74b31ae3
commit ae74b31ae3
parent d23ed26238
1 changed files with 0 additions and 16 deletions
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -19,7 +19,6 @@ from llama_stack.apis.inference import (
    OpenAIResponseFormatParam,
    ToolChoice,
 )
-from llama_stack.apis.models import Model, ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
    HealthResponse,
@ -58,21 +57,6 @@ class VLLMInferenceAdapter(OpenAIMixin):
        # Strictly respecting the refresh_models directive
        return self.config.refresh_models

-    async def list_models(self) -> list[Model] | None:
-        models = []
-        async for m in self.client.models.list():
-            model_type = ModelType.llm  # unclear how to determine embedding vs. llm models
-            models.append(
-                Model(
-                    identifier=m.id,
-                    provider_resource_id=m.id,
-                    provider_id=self.__provider_id__,  # type: ignore[attr-defined]
-                    metadata={},
-                    model_type=model_type,
-                )
-            )
-        return models
-
    async def health(self) -> HealthResponse:
        """
        Performs a health check by verifying connectivity to the remote vLLM server.