ollama: periodically refresh models

2025-12-23 04:42:25 +00:00 · 2025-07-17 16:01:49 -07:00 · 2025-07-17 16:01:49 -07:00 · a2f460871b
commit a2f460871b
parent 9e3ae50306
5 changed files with 121 additions and 9 deletions
--- a/llama_stack/apis/inference/inference.py
+++ b/llama_stack/apis/inference/inference.py
@ -819,6 +819,12 @@ class OpenAIEmbeddingsResponse(BaseModel):
 class ModelStore(Protocol):
    async def get_model(self, identifier: str) -> Model: ...

+    async def update_registered_models(
+        self,
+        provider_id: str,
+        models: list[Model],
+    ) -> None: ...
+

 class TextTruncation(Enum):
    """Config for how to truncate text for embedding when text is longer than the model's max sequence length. Start and End semantics depend on whether the language is left-to-right or right-to-left.