From d8be3111db9928e93bc7c2e1816bb41fd438f3e8 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Wed, 15 Oct 2025 21:31:15 -0700 Subject: [PATCH] fix(models): always prefix models with provider_id when registering --- llama_stack/core/routing_tables/common.py | 22 ++-------------- llama_stack/core/routing_tables/models.py | 12 ++------- llama_stack/core/store/registry.py | 3 +++ scripts/integration-tests.sh | 2 +- ...54792b9f22d2cb4522eab802810be8672d3dc.json | 21 +-------------- tests/integration/fixtures/common.py | 26 +++---------------- 6 files changed, 13 insertions(+), 73 deletions(-) diff --git a/llama_stack/core/routing_tables/common.py b/llama_stack/core/routing_tables/common.py index 0b5aa7843..8df0a89a9 100644 --- a/llama_stack/core/routing_tables/common.py +++ b/llama_stack/core/routing_tables/common.py @@ -245,25 +245,7 @@ class CommonRoutingTableImpl(RoutingTable): async def lookup_model(routing_table: CommonRoutingTableImpl, model_id: str) -> Model: - # first try to get the model by identifier - # this works if model_id is an alias or is of the form provider_id/provider_model_id model = await routing_table.get_object_by_identifier("model", model_id) - if model is not None: - return model - - logger.warning( - f"WARNING: model identifier '{model_id}' not found in routing table. Falling back to " - "searching in all providers. This is only for backwards compatibility and will stop working " - "soon. Migrate your calls to use fully scoped `provider_id/model_id` names." - ) - # if not found, this means model_id is an unscoped provider_model_id, we need - # to iterate (given a lack of an efficient index on the KVStore) - models = await routing_table.get_all_with_type("model") - matching_models = [m for m in models if m.provider_resource_id == model_id] - if len(matching_models) == 0: + if not model: raise ModelNotFoundError(model_id) - - if len(matching_models) > 1: - raise ValueError(f"Multiple providers found for '{model_id}': {[m.provider_id for m in matching_models]}") - - return matching_models[0] + return model diff --git a/llama_stack/core/routing_tables/models.py b/llama_stack/core/routing_tables/models.py index 716be936a..7e43d7273 100644 --- a/llama_stack/core/routing_tables/models.py +++ b/llama_stack/core/routing_tables/models.py @@ -33,7 +33,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): try: models = await provider.list_models() except Exception as e: - logger.debug(f"Model refresh failed for provider {provider_id}: {e}") + logger.warning(f"Model refresh failed for provider {provider_id}: {e}") continue self.listed_providers.add(provider_id) @@ -104,15 +104,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): if "embedding_dimension" not in metadata and model_type == ModelType.embedding: raise ValueError("Embedding model must have an embedding dimension in its metadata") - # an identifier different than provider_model_id implies it is an alias, so that - # becomes the globally unique identifier. otherwise provider_model_ids can conflict, - # so as a general rule we must use the provider_id to disambiguate. - - if model_id != provider_model_id: - identifier = model_id - else: - identifier = f"{provider_id}/{provider_model_id}" - + identifier = f"{provider_id}/{provider_model_id}" model = ModelWithOwner( identifier=identifier, provider_resource_id=provider_model_id, diff --git a/llama_stack/core/store/registry.py b/llama_stack/core/store/registry.py index 04581bab5..cb06f6a57 100644 --- a/llama_stack/core/store/registry.py +++ b/llama_stack/core/store/registry.py @@ -102,6 +102,9 @@ class DiskDistributionRegistry(DistributionRegistry): "Unregister it first if you want to replace it." ) + if "sentence-transformers/sentence-transformers" in obj.identifier: + raise Exception("OMG") + await self.kvstore.set( KEY_FORMAT.format(type=obj.type, identifier=obj.identifier), obj.model_dump_json(), diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index f3dc32745..138f1d144 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -290,7 +290,7 @@ pytest -s -v $PYTEST_TARGET \ -k "$PYTEST_PATTERN" \ $EXTRA_PARAMS \ --color=yes \ - --embedding-model=nomic-ai/nomic-embed-text-v1.5 \ + --embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \ --color=yes $EXTRA_PARAMS \ --capture=tee-sys exit_code=$? diff --git a/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json b/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json index 4ea0ee13f..2b2afeee4 100644 --- a/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json +++ b/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json @@ -12,26 +12,7 @@ "body": { "__type__": "ollama._types.ProcessResponse", "__data__": { - "models": [ - { - "model": "llama-guard3:1b", - "name": "llama-guard3:1b", - "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b", - "expires_at": "2025-10-13T14:07:12.309717-07:00", - "size": 2279663616, - "size_vram": 2279663616, - "details": { - "parent_model": "", - "format": "gguf", - "family": "llama", - "families": [ - "llama" - ], - "parameter_size": "1.5B", - "quantization_level": "Q8_0" - } - } - ] + "models": [] } }, "is_streaming": false diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index 6ebf0aed7..68a30fc69 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -117,42 +117,24 @@ def client_with_models( text_model_id, vision_model_id, embedding_model_id, - embedding_dimension, judge_model_id, ): client = llama_stack_client providers = [p for p in client.providers.list() if p.api == "inference"] assert len(providers) > 0, "No inference providers found" - inference_providers = [p.provider_id for p in providers if p.provider_type != "inline::sentence-transformers"] model_ids = {m.identifier for m in client.models.list()} - model_ids.update(m.provider_resource_id for m in client.models.list()) - # TODO: fix this crap where we use the first provider randomly - # that cannot be right. I think the test should just specify the provider_id if text_model_id and text_model_id not in model_ids: - client.models.register(model_id=text_model_id, provider_id=inference_providers[0]) + raise ValueError(f"text_model_id {text_model_id} not found") if vision_model_id and vision_model_id not in model_ids: - client.models.register(model_id=vision_model_id, provider_id=inference_providers[0]) + raise ValueError(f"vision_model_id {vision_model_id} not found") if judge_model_id and judge_model_id not in model_ids: - client.models.register(model_id=judge_model_id, provider_id=inference_providers[0]) + raise ValueError(f"judge_model_id {judge_model_id} not found") if embedding_model_id and embedding_model_id not in model_ids: - # try to find a provider that supports embeddings, if sentence-transformers is not available - selected_provider = None - for p in providers: - if p.provider_type == "inline::sentence-transformers": - selected_provider = p - break - - selected_provider = selected_provider or providers[0] - client.models.register( - model_id=embedding_model_id, - provider_id=selected_provider.provider_id, - model_type="embedding", - metadata={"embedding_dimension": embedding_dimension or 768}, - ) + raise ValueError(f"embedding_model_id {embedding_model_id} not found") return client