feat: add refresh_models support to inference adapters (default: false)

inference adapters can now configure `refresh_models: bool` to control periodic model listing from their providers BREAKING CHANGE: together inference adapter default changed. previously always refreshed, now follows config.
2025-12-16 16:52:35 +00:00 · 2025-10-07 07:23:07 -04:00 · 2025-10-07 07:23:07 -04:00 · bc47900ec0
commit bc47900ec0
parent 509ac4a659
31 changed files with 33 additions and 67 deletions
--- a/llama_stack/providers/remote/inference/vllm/config.py
+++ b/llama_stack/providers/remote/inference/vllm/config.py
@ -30,10 +30,6 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
        default=True,
        description="Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file.",
    )
-    refresh_models: bool = Field(
-        default=False,
-        description="Whether to refresh models periodically",
-    )

    @field_validator("tls_verify")
    @classmethod
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@ -53,10 +53,6 @@ class VLLMInferenceAdapter(OpenAIMixin):
                "You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM."
            )

-    async def should_refresh_models(self) -> bool:
-        # Strictly respecting the refresh_models directive
-        return self.config.refresh_models
-
    async def health(self) -> HealthResponse:
        """
        Performs a health check by verifying connectivity to the remote vLLM server.