mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-16 16:52:35 +00:00
feat: add refresh_models support to inference adapters (default: false)
inference adapters can now configure `refresh_models: bool` to control periodic model listing from their providers BREAKING CHANGE: together inference adapter default changed. previously always refreshed, now follows config.
This commit is contained in:
parent
509ac4a659
commit
bc47900ec0
31 changed files with 33 additions and 67 deletions
|
|
@ -30,10 +30,6 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
|
|||
default=True,
|
||||
description="Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file.",
|
||||
)
|
||||
refresh_models: bool = Field(
|
||||
default=False,
|
||||
description="Whether to refresh models periodically",
|
||||
)
|
||||
|
||||
@field_validator("tls_verify")
|
||||
@classmethod
|
||||
|
|
|
|||
|
|
@ -53,10 +53,6 @@ class VLLMInferenceAdapter(OpenAIMixin):
|
|||
"You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM."
|
||||
)
|
||||
|
||||
async def should_refresh_models(self) -> bool:
|
||||
# Strictly respecting the refresh_models directive
|
||||
return self.config.refresh_models
|
||||
|
||||
async def health(self) -> HealthResponse:
|
||||
"""
|
||||
Performs a health check by verifying connectivity to the remote vLLM server.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue