mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-06 20:44:58 +00:00
chore: remove vLLM inference adapter's custom list_models (#3703)
# What does this PR do? remove vLLM inference adapter's custom list_models impl, rely on standard impl instead ## Test Plan ci
This commit is contained in:
parent
d23ed26238
commit
ae74b31ae3
1 changed files with 0 additions and 16 deletions
|
@ -19,7 +19,6 @@ from llama_stack.apis.inference import (
|
|||
OpenAIResponseFormatParam,
|
||||
ToolChoice,
|
||||
)
|
||||
from llama_stack.apis.models import Model, ModelType
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import (
|
||||
HealthResponse,
|
||||
|
@ -58,21 +57,6 @@ class VLLMInferenceAdapter(OpenAIMixin):
|
|||
# Strictly respecting the refresh_models directive
|
||||
return self.config.refresh_models
|
||||
|
||||
async def list_models(self) -> list[Model] | None:
|
||||
models = []
|
||||
async for m in self.client.models.list():
|
||||
model_type = ModelType.llm # unclear how to determine embedding vs. llm models
|
||||
models.append(
|
||||
Model(
|
||||
identifier=m.id,
|
||||
provider_resource_id=m.id,
|
||||
provider_id=self.__provider_id__, # type: ignore[attr-defined]
|
||||
metadata={},
|
||||
model_type=model_type,
|
||||
)
|
||||
)
|
||||
return models
|
||||
|
||||
async def health(self) -> HealthResponse:
|
||||
"""
|
||||
Performs a health check by verifying connectivity to the remote vLLM server.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue