mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-08 13:00:52 +00:00
fix: improve model availability checks: Allows use of unavailable models on startup (#3717)
Some checks failed
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 3s
Python Package Build Test / build (3.12) (push) Failing after 1s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 4s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 4s
Python Package Build Test / build (3.13) (push) Failing after 2s
Vector IO Integration Tests / test-matrix (push) Failing after 5s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
API Conformance Tests / check-schema-compatibility (push) Successful in 10s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
Test External API and Providers / test-external (venv) (push) Failing after 7s
UI Tests / ui-tests (22) (push) Successful in 39s
Pre-commit / pre-commit (push) Successful in 1m28s
Some checks failed
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 0s
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 3s
Python Package Build Test / build (3.12) (push) Failing after 1s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 4s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 4s
Python Package Build Test / build (3.13) (push) Failing after 2s
Vector IO Integration Tests / test-matrix (push) Failing after 5s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
API Conformance Tests / check-schema-compatibility (push) Successful in 10s
Unit Tests / unit-tests (3.13) (push) Failing after 4s
Test External API and Providers / test-external (venv) (push) Failing after 7s
UI Tests / ui-tests (22) (push) Successful in 39s
Pre-commit / pre-commit (push) Successful in 1m28s
- Allows use of unavailable models on startup - Add has_model method to ModelsRoutingTable for checking pre-registered models - Update check_model_availability to check model_store before provider APIs # What does this PR do? <!-- Provide a short summary of what this PR does and why. Link to relevant issues if applicable. --> <!-- If resolving an issue, uncomment and update the line below --> <!-- Closes #[issue-number] --> ## Test Plan <!-- Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.* --> Start llama stack and point unavailable vLLM ``` VLLM_URL=https://my-unavailable-vllm/v1 MILVUS_DB_PATH=./milvus.db INFERENCE_MODEL=vllm uv run --with llama-stack llama stack build --distro starter --image-type venv --run ``` llama stack will start without crashing but only notifying error. ``` - provider_id: rag-runtime toolgroup_id: builtin::rag vector_dbs: [] version: 2 INFO 2025-10-07 06:40:41,804 llama_stack.providers.utils.inference.inference_store:74 inference: Write queue disabled for SQLite to avoid concurrency issues INFO 2025-10-07 06:40:42,066 llama_stack.providers.utils.responses.responses_store:96 openai_responses: Write queue disabled for SQLite to avoid concurrency issues ERROR 2025-10-07 06:40:58,882 llama_stack.providers.utils.inference.openai_mixin:436 providers::utils: VLLMInferenceAdapter.list_provider_model_ids() failed with: Request timed out. WARNING 2025-10-07 06:40:58,883 llama_stack.core.routing_tables.models:36 core::routing_tables: Model refresh failed for provider vllm: Request timed out. [...] INFO 2025-10-07 06:40:59,036 uvicorn.error:216 uncategorized: Uvicorn running on http://['::', '0.0.0.0']:8321 (Press CTRL+C to quit) INFO 2025-10-07 06:41:04,064 openai._base_client:1618 uncategorized: Retrying request to /models in 0.398814 seconds INFO 2025-10-07 06:41:09,497 openai._base_client:1618 uncategorized: Retrying request to /models in 0.781908 seconds ERROR 2025-10-07 06:41:15,282 llama_stack.providers.utils.inference.openai_mixin:436 providers::utils: VLLMInferenceAdapter.list_provider_model_ids() failed with: Request timed out. WARNING 2025-10-07 06:41:15,283 llama_stack.core.routing_tables.models:36 core::routing_tables: Model refresh failed for provider vllm: Request timed out. ```
This commit is contained in:
parent
d5b136ac66
commit
1970b4aa4b
4 changed files with 64 additions and 4 deletions
|
@ -201,6 +201,12 @@ async def test_models_routing_table(cached_disk_dist_registry):
|
|||
non_existent = await table.get_object_by_identifier("model", "non-existent-model")
|
||||
assert non_existent is None
|
||||
|
||||
# Test has_model
|
||||
assert await table.has_model("test_provider/test-model")
|
||||
assert await table.has_model("test_provider/test-model-2")
|
||||
assert not await table.has_model("non-existent-model")
|
||||
assert not await table.has_model("test_provider/non-existent-model")
|
||||
|
||||
await table.unregister_model(model_id="test_provider/test-model")
|
||||
await table.unregister_model(model_id="test_provider/test-model-2")
|
||||
|
||||
|
|
|
@ -44,11 +44,12 @@ def mixin():
|
|||
config = RemoteInferenceProviderConfig()
|
||||
mixin_instance = OpenAIMixinImpl(config=config)
|
||||
|
||||
# just enough to satisfy _get_provider_model_id calls
|
||||
mock_model_store = MagicMock()
|
||||
# Mock model_store with async methods
|
||||
mock_model_store = AsyncMock()
|
||||
mock_model = MagicMock()
|
||||
mock_model.provider_resource_id = "test-provider-resource-id"
|
||||
mock_model_store.get_model = AsyncMock(return_value=mock_model)
|
||||
mock_model_store.has_model = AsyncMock(return_value=False) # Default to False, tests can override
|
||||
mixin_instance.model_store = mock_model_store
|
||||
|
||||
return mixin_instance
|
||||
|
@ -189,6 +190,40 @@ class TestOpenAIMixinCheckModelAvailability:
|
|||
|
||||
assert len(mixin._model_cache) == 3
|
||||
|
||||
async def test_check_model_availability_with_pre_registered_model(
|
||||
self, mixin, mock_client_with_models, mock_client_context
|
||||
):
|
||||
"""Test that check_model_availability returns True for pre-registered models in model_store"""
|
||||
# Mock model_store.has_model to return True for a specific model
|
||||
mock_model_store = AsyncMock()
|
||||
mock_model_store.has_model = AsyncMock(return_value=True)
|
||||
mixin.model_store = mock_model_store
|
||||
|
||||
# Test that pre-registered model is found without calling the provider's API
|
||||
with mock_client_context(mixin, mock_client_with_models):
|
||||
mock_client_with_models.models.list.assert_not_called()
|
||||
assert await mixin.check_model_availability("pre-registered-model")
|
||||
# Should not call the provider's list_models since model was found in store
|
||||
mock_client_with_models.models.list.assert_not_called()
|
||||
mock_model_store.has_model.assert_called_once_with("pre-registered-model")
|
||||
|
||||
async def test_check_model_availability_fallback_to_provider_when_not_in_store(
|
||||
self, mixin, mock_client_with_models, mock_client_context
|
||||
):
|
||||
"""Test that check_model_availability falls back to provider when model not in store"""
|
||||
# Mock model_store.has_model to return False
|
||||
mock_model_store = AsyncMock()
|
||||
mock_model_store.has_model = AsyncMock(return_value=False)
|
||||
mixin.model_store = mock_model_store
|
||||
|
||||
# Test that it falls back to provider's model cache
|
||||
with mock_client_context(mixin, mock_client_with_models):
|
||||
mock_client_with_models.models.list.assert_not_called()
|
||||
assert await mixin.check_model_availability("some-mock-model-id")
|
||||
# Should call the provider's list_models since model was not found in store
|
||||
mock_client_with_models.models.list.assert_called_once()
|
||||
mock_model_store.has_model.assert_called_once_with("some-mock-model-id")
|
||||
|
||||
|
||||
class TestOpenAIMixinCacheBehavior:
|
||||
"""Test cases for cache behavior and edge cases"""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue