feat: add refresh_models support to inference adapters (default: false) (#3719)

# What does this PR do? inference adapters can now configure `refresh_models: bool` to control periodic model listing from their providers BREAKING CHANGE: together inference adapter default changed. previously always refreshed, now follows config. addresses "models: refresh" on #3517 ## Test Plan ci w/ new tests
2025-12-03 18:00:36 +00:00 · 2025-10-07 09:19:56 -04:00 · 2025-10-07 09:19:56 -04:00 · e892a3f7f4
commit e892a3f7f4
parent 8b9af03a1b
31 changed files with 33 additions and 67 deletions
--- a/llama_stack/providers/remote/inference/ollama/config.py
+++ b/llama_stack/providers/remote/inference/ollama/config.py
@ -6,8 +6,6 @@

 from typing import Any

-from pydantic import Field
-
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig

 DEFAULT_OLLAMA_URL = "http://localhost:11434"
@ -15,10 +13,6 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"

 class OllamaImplConfig(RemoteInferenceProviderConfig):
    url: str = DEFAULT_OLLAMA_URL
-    refresh_models: bool = Field(
-        default=False,
-        description="Whether to refresh models periodically",
-    )

    @classmethod
    def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]: