mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-20 19:56:59 +00:00
feat(ollama): periodically refresh models (#2805)
For self-hosted providers like Ollama (or vLLM), the backing server is running a set of models. That server should be treated as the source of truth and the Stack registry should just be a cache for those models. Of course, in production environments, you may not want this (because you know what model you are running statically) hence there's a config boolean to control this behavior. _This is part of a series of PRs aimed at removing the requirement of needing to set `INFERENCE_MODEL` env variables for running Llama Stack server._ ## Test Plan Copy and modify the starter.yaml template / config and enable `refresh_models: true, refresh_models_interval: 10` for the ollama provider. Then, run: ``` LLAMA_STACK_LOGGING=all=debug \ ENABLE_OLLAMA=ollama uv run llama stack run --image-type venv /tmp/starter.yaml ``` See a gargantuan amount of logs, but verify that the provider is periodically refreshing models. Stop and prune a model from ollama server, restart the server. Verify that the model goes away when I call `uv run llama-stack-client models list`
This commit is contained in:
parent
6d55f2f137
commit
68a2dfbad7
6 changed files with 123 additions and 16 deletions
|
@ -151,6 +151,8 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
|
|||
self.skip_logger_removal = skip_logger_removal
|
||||
self.provider_data = provider_data
|
||||
|
||||
self.loop = asyncio.new_event_loop()
|
||||
|
||||
def initialize(self):
|
||||
if in_notebook():
|
||||
import nest_asyncio
|
||||
|
@ -159,7 +161,7 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
|
|||
if not self.skip_logger_removal:
|
||||
self._remove_root_logger_handlers()
|
||||
|
||||
return asyncio.run(self.async_client.initialize())
|
||||
return self.loop.run_until_complete(self.async_client.initialize())
|
||||
|
||||
def _remove_root_logger_handlers(self):
|
||||
"""
|
||||
|
@ -172,10 +174,7 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
|
|||
logger.info(f"Removed handler {handler.__class__.__name__} from root logger")
|
||||
|
||||
def request(self, *args, **kwargs):
|
||||
# NOTE: We are using AsyncLlamaStackClient under the hood
|
||||
# A new event loop is needed to convert the AsyncStream
|
||||
# from async client into SyncStream return type for streaming
|
||||
loop = asyncio.new_event_loop()
|
||||
loop = self.loop
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
if kwargs.get("stream"):
|
||||
|
@ -192,7 +191,6 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
|
|||
pending = asyncio.all_tasks(loop)
|
||||
if pending:
|
||||
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
|
||||
loop.close()
|
||||
|
||||
return sync_generator()
|
||||
else:
|
||||
|
@ -202,7 +200,6 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
|
|||
pending = asyncio.all_tasks(loop)
|
||||
if pending:
|
||||
loop.run_until_complete(asyncio.gather(*pending, return_exceptions=True))
|
||||
loop.close()
|
||||
return result
|
||||
|
||||
|
||||
|
|
|
@ -80,3 +80,34 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
|||
if existing_model is None:
|
||||
raise ValueError(f"Model {model_id} not found")
|
||||
await self.unregister_object(existing_model)
|
||||
|
||||
async def update_registered_models(
|
||||
self,
|
||||
provider_id: str,
|
||||
models: list[Model],
|
||||
) -> None:
|
||||
existing_models = await self.get_all_with_type("model")
|
||||
|
||||
# we may have an alias for the model registered by the user (or during initialization
|
||||
# from run.yaml) that we need to keep track of
|
||||
model_ids = {}
|
||||
for model in existing_models:
|
||||
if model.provider_id == provider_id:
|
||||
model_ids[model.provider_resource_id] = model.identifier
|
||||
logger.debug(f"unregistering model {model.identifier}")
|
||||
await self.unregister_object(model)
|
||||
|
||||
for model in models:
|
||||
if model.provider_resource_id in model_ids:
|
||||
model.identifier = model_ids[model.provider_resource_id]
|
||||
|
||||
logger.debug(f"registering model {model.identifier} ({model.provider_resource_id})")
|
||||
await self.register_object(
|
||||
ModelWithOwner(
|
||||
identifier=model.identifier,
|
||||
provider_resource_id=model.provider_resource_id,
|
||||
provider_id=provider_id,
|
||||
metadata=model.metadata,
|
||||
model_type=model.model_type,
|
||||
)
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue