feat: add refresh_models support to inference adapters (default: false)

inference adapters can now configure `refresh_models: bool` to control periodic model listing from their providers

BREAKING CHANGE: together inference adapter default changed. previously always refreshed, now follows config.
This commit is contained in:
Matthew Farrellee 2025-10-07 07:23:07 -04:00
parent 509ac4a659
commit bc47900ec0
31 changed files with 33 additions and 67 deletions

View file

@ -6,8 +6,6 @@
from typing import Any
from pydantic import Field
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
DEFAULT_OLLAMA_URL = "http://localhost:11434"
@ -15,10 +13,6 @@ DEFAULT_OLLAMA_URL = "http://localhost:11434"
class OllamaImplConfig(RemoteInferenceProviderConfig):
url: str = DEFAULT_OLLAMA_URL
refresh_models: bool = Field(
default=False,
description="Whether to refresh models periodically",
)
@classmethod
def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]: