chore: remove vLLM inference adapter's custom list_models (#3703)

# What does this PR do?

remove vLLM inference adapter's custom list_models impl, rely on
standard impl instead

## Test Plan

ci
This commit is contained in:
Matthew Farrellee 2025-10-06 13:27:30 -04:00 committed by GitHub
parent d23ed26238
commit ae74b31ae3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -19,7 +19,6 @@ from llama_stack.apis.inference import (
OpenAIResponseFormatParam, OpenAIResponseFormatParam,
ToolChoice, ToolChoice,
) )
from llama_stack.apis.models import Model, ModelType
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ( from llama_stack.providers.datatypes import (
HealthResponse, HealthResponse,
@ -58,21 +57,6 @@ class VLLMInferenceAdapter(OpenAIMixin):
# Strictly respecting the refresh_models directive # Strictly respecting the refresh_models directive
return self.config.refresh_models return self.config.refresh_models
async def list_models(self) -> list[Model] | None:
models = []
async for m in self.client.models.list():
model_type = ModelType.llm # unclear how to determine embedding vs. llm models
models.append(
Model(
identifier=m.id,
provider_resource_id=m.id,
provider_id=self.__provider_id__, # type: ignore[attr-defined]
metadata={},
model_type=model_type,
)
)
return models
async def health(self) -> HealthResponse: async def health(self) -> HealthResponse:
""" """
Performs a health check by verifying connectivity to the remote vLLM server. Performs a health check by verifying connectivity to the remote vLLM server.