feat(vllm): periodically refresh models (#2823)

Just like #2805 but for vLLM. We also make VLLM_URL env variable optional (not required) -- if not specified, the provider silently sits idle and yells eventually if someone tries to call a completion on it. This is done so as to allow this provider to be present in the `starter` distribution. ## Test Plan Set up vLLM, copy the starter template and set `{ refresh_models: true, refresh_models_interval: 10 }` for the vllm provider and then run: ``` ENABLE_VLLM=vllm VLLM_URL=http://localhost:8000/v1 \ uv run llama stack run --image-type venv /tmp/starter.yaml ``` Verify that `llama-stack-client models list` brings up the model correctly from vLLM.
2025-12-23 02:22:25 +00:00 · 2025-07-18 15:53:09 -07:00 · 2025-07-18 15:53:09 -07:00 · 199f859eec
commit 199f859eec
parent ade075152e
7 changed files with 98 additions and 14 deletions
--- a/llama_stack/distribution/routing_tables/models.py
+++ b/llama_stack/distribution/routing_tables/models.py
@ -81,7 +81,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
            raise ValueError(f"Model {model_id} not found")
        await self.unregister_object(existing_model)

-    async def update_registered_models(
+    async def update_registered_llm_models(
        self,
        provider_id: str,
        models: list[Model],
@ -92,12 +92,16 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
        # from run.yaml) that we need to keep track of
        model_ids = {}
        for model in existing_models:
-            if model.provider_id == provider_id:
+            # we leave embeddings models alone because often we don't get metadata
+            # (embedding dimension, etc.) from the provider
+            if model.provider_id == provider_id and model.model_type == ModelType.llm:
                model_ids[model.provider_resource_id] = model.identifier
                logger.debug(f"unregistering model {model.identifier}")
                await self.unregister_object(model)

        for model in models:
+            if model.model_type != ModelType.llm:
+                continue
            if model.provider_resource_id in model_ids:
                model.identifier = model_ids[model.provider_resource_id]