fix: improve model availability checks: Allows use of unavailable models on startup (#3717)

- Allows use of unavailable models on startup - Add has_model method to ModelsRoutingTable for checking pre-registered models - Update check_model_availability to check model_store before provider APIs # What does this PR do?    ## Test Plan  Start llama stack and point unavailable vLLM ``` VLLM_URL=https://my-unavailable-vllm/v1 MILVUS_DB_PATH=./milvus.db INFERENCE_MODEL=vllm uv run --with llama-stack llama stack build --distro starter --image-type venv --run ``` llama stack will start without crashing but only notifying error. ``` - provider_id: rag-runtime toolgroup_id: builtin::rag vector_dbs: [] version: 2 INFO 2025-10-07 06:40:41,804 llama_stack.providers.utils.inference.inference_store:74 inference: Write queue disabled for SQLite to avoid concurrency issues INFO 2025-10-07 06:40:42,066 llama_stack.providers.utils.responses.responses_store:96 openai_responses: Write queue disabled for SQLite to avoid concurrency issues ERROR 2025-10-07 06:40:58,882 llama_stack.providers.utils.inference.openai_mixin:436 providers::utils: VLLMInferenceAdapter.list_provider_model_ids() failed with: Request timed out. WARNING 2025-10-07 06:40:58,883 llama_stack.core.routing_tables.models:36 core::routing_tables: Model refresh failed for provider vllm: Request timed out. [...] INFO 2025-10-07 06:40:59,036 uvicorn.error:216 uncategorized: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit) INFO 2025-10-07 06:41:04,064 openai._base_client:1618 uncategorized: Retrying request to /models in 0.398814 seconds INFO 2025-10-07 06:41:09,497 openai._base_client:1618 uncategorized: Retrying request to /models in 0.781908 seconds ERROR 2025-10-07 06:41:15,282 llama_stack.providers.utils.inference.openai_mixin:436 providers::utils: VLLMInferenceAdapter.list_provider_model_ids() failed with: Request timed out. WARNING 2025-10-07 06:41:15,283 llama_stack.core.routing_tables.models:36 core::routing_tables: Model refresh failed for provider vllm: Request timed out. ```
2025-12-08 03:00:56 +00:00 · 2025-10-07 19:27:24 +01:00 · 2025-10-07 19:27:24 +01:00 · 1970b4aa4b
commit 1970b4aa4b
parent d5b136ac66
4 changed files with 64 additions and 4 deletions
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@ -201,6 +201,12 @@ async def test_models_routing_table(cached_disk_dist_registry):
    non_existent = await table.get_object_by_identifier("model", "non-existent-model")
    assert non_existent is None

+    # Test has_model
+    assert await table.has_model("test_provider/test-model")
+    assert await table.has_model("test_provider/test-model-2")
+    assert not await table.has_model("non-existent-model")
+    assert not await table.has_model("test_provider/non-existent-model")
+
    await table.unregister_model(model_id="test_provider/test-model")
    await table.unregister_model(model_id="test_provider/test-model-2")

--- a/tests/unit/providers/utils/inference/test_openai_mixin.py
+++ b/tests/unit/providers/utils/inference/test_openai_mixin.py
@ -44,11 +44,12 @@ def mixin():
    config = RemoteInferenceProviderConfig()
    mixin_instance = OpenAIMixinImpl(config=config)

-    # just enough to satisfy _get_provider_model_id calls
-    mock_model_store = MagicMock()
+    # Mock model_store with async methods
+    mock_model_store = AsyncMock()
    mock_model = MagicMock()
    mock_model.provider_resource_id = "test-provider-resource-id"
    mock_model_store.get_model = AsyncMock(return_value=mock_model)
+    mock_model_store.has_model = AsyncMock(return_value=False)  # Default to False, tests can override
    mixin_instance.model_store = mock_model_store

    return mixin_instance
@ -189,6 +190,40 @@ class TestOpenAIMixinCheckModelAvailability:

            assert len(mixin._model_cache) == 3

+    async def test_check_model_availability_with_pre_registered_model(
+        self, mixin, mock_client_with_models, mock_client_context
+    ):
+        """Test that check_model_availability returns True for pre-registered models in model_store"""
+        # Mock model_store.has_model to return True for a specific model
+        mock_model_store = AsyncMock()
+        mock_model_store.has_model = AsyncMock(return_value=True)
+        mixin.model_store = mock_model_store
+
+        # Test that pre-registered model is found without calling the provider's API
+        with mock_client_context(mixin, mock_client_with_models):
+            mock_client_with_models.models.list.assert_not_called()
+            assert await mixin.check_model_availability("pre-registered-model")
+            # Should not call the provider's list_models since model was found in store
+            mock_client_with_models.models.list.assert_not_called()
+            mock_model_store.has_model.assert_called_once_with("pre-registered-model")
+
+    async def test_check_model_availability_fallback_to_provider_when_not_in_store(
+        self, mixin, mock_client_with_models, mock_client_context
+    ):
+        """Test that check_model_availability falls back to provider when model not in store"""
+        # Mock model_store.has_model to return False
+        mock_model_store = AsyncMock()
+        mock_model_store.has_model = AsyncMock(return_value=False)
+        mixin.model_store = mock_model_store
+
+        # Test that it falls back to provider's model cache
+        with mock_client_context(mixin, mock_client_with_models):
+            mock_client_with_models.models.list.assert_not_called()
+            assert await mixin.check_model_availability("some-mock-model-id")
+            # Should call the provider's list_models since model was not found in store
+            mock_client_with_models.models.list.assert_called_once()
+            mock_model_store.has_model.assert_called_once_with("some-mock-model-id")
+

 class TestOpenAIMixinCacheBehavior:
    """Test cases for cache behavior and edge cases"""