chore: remove vLLM inference adapter's custom list_models (#3703)

# What does this PR do? remove vLLM inference adapter's custom list_models impl, rely on standard impl instead ## Test Plan ci
2025-12-03 18:00:36 +00:00 · 2025-10-06 13:27:30 -04:00 · 2025-10-06 13:27:30 -04:00 · ae74b31ae3
commit ae74b31ae3
parent d23ed26238
1 changed files with 0 additions and 16 deletions
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -19,7 +19,6 @@ from llama_stack.apis.inference import (
    OpenAIResponseFormatParam,
    ToolChoice,
 )
 from llama_stack.apis.models import Model, ModelType
 from llama_stack.log import get_logger
 from llama_stack.providers.datatypes import (
    HealthResponse,
@ -58,21 +57,6 @@ class VLLMInferenceAdapter(OpenAIMixin):
        # Strictly respecting the refresh_models directive
        return self.config.refresh_models
    async def list_models(self) -> list[Model] | None:
        models = []
        async for m in self.client.models.list():
            model_type = ModelType.llm  # unclear how to determine embedding vs. llm models
            models.append(
                Model(
                    identifier=m.id,
                    provider_resource_id=m.id,
                    provider_id=self.__provider_id__,  # type: ignore[attr-defined]
                    metadata={},
                    model_type=model_type,
                )
            )
        return models
    async def health(self) -> HealthResponse:
        """
        Performs a health check by verifying connectivity to the remote vLLM server.