fix(models): always prefix models with provider_id when registering

2025-12-12 20:12:33 +00:00 · 2025-10-15 21:31:15 -07:00 · 2025-10-15 21:31:15 -07:00 · d8be3111db
commit d8be3111db
parent f205ab6f6c
6 changed files with 13 additions and 73 deletions
--- a/llama_stack/core/routing_tables/common.py
+++ b/llama_stack/core/routing_tables/common.py
@ -245,25 +245,7 @@ class CommonRoutingTableImpl(RoutingTable):
 async def lookup_model(routing_table: CommonRoutingTableImpl, model_id: str) -> Model:
    # first try to get the model by identifier
    # this works if model_id is an alias or is of the form provider_id/provider_model_id
    model = await routing_table.get_object_by_identifier("model", model_id)
-    if model is not None:
+    if not model:
        return model
    logger.warning(
        f"WARNING: model identifier '{model_id}' not found in routing table. Falling back to "
        "searching in all providers. This is only for backwards compatibility and will stop working "
        "soon. Migrate your calls to use fully scoped `provider_id/model_id` names."
    )
    # if not found, this means model_id is an unscoped provider_model_id, we need
    # to iterate (given a lack of an efficient index on the KVStore)
    models = await routing_table.get_all_with_type("model")
    matching_models = [m for m in models if m.provider_resource_id == model_id]
    if len(matching_models) == 0:
        raise ModelNotFoundError(model_id)
-
+    return model
    if len(matching_models) > 1:
        raise ValueError(f"Multiple providers found for '{model_id}': {[m.provider_id for m in matching_models]}")
    return matching_models[0]
--- a/llama_stack/core/routing_tables/models.py
+++ b/llama_stack/core/routing_tables/models.py
@ -33,7 +33,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
            try:
                models = await provider.list_models()
            except Exception as e:
-                logger.debug(f"Model refresh failed for provider {provider_id}: {e}")
+                logger.warning(f"Model refresh failed for provider {provider_id}: {e}")
                continue
            self.listed_providers.add(provider_id)
@ -104,15 +104,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
        if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
            raise ValueError("Embedding model must have an embedding dimension in its metadata")
-        # an identifier different than provider_model_id implies it is an alias, so that
+        identifier = f"{provider_id}/{provider_model_id}"
        # becomes the globally unique identifier. otherwise provider_model_ids can conflict,
        # so as a general rule we must use the provider_id to disambiguate.
        if model_id != provider_model_id:
            identifier = model_id
        else:
            identifier = f"{provider_id}/{provider_model_id}"
        model = ModelWithOwner(
            identifier=identifier,
            provider_resource_id=provider_model_id,
--- a/llama_stack/core/store/registry.py
+++ b/llama_stack/core/store/registry.py
@ -102,6 +102,9 @@ class DiskDistributionRegistry(DistributionRegistry):
                "Unregister it first if you want to replace it."
            )
        if "sentence-transformers/sentence-transformers" in obj.identifier:
            raise Exception("OMG")
        await self.kvstore.set(
            KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),
            obj.model_dump_json(),
--- a/scripts/integration-tests.sh
+++ b/scripts/integration-tests.sh
@ -290,7 +290,7 @@ pytest -s -v $PYTEST_TARGET \
    -k "$PYTEST_PATTERN" \
    $EXTRA_PARAMS \
    --color=yes \
-    --embedding-model=nomic-ai/nomic-embed-text-v1.5 \
+    --embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
    --color=yes $EXTRA_PARAMS \
    --capture=tee-sys
 exit_code=$?
--- a/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json
+++ b/tests/integration/common/recordings/02c93bb3c314427bae2b7a7a6f054792b9f22d2cb4522eab802810be8672d3dc.json
@ -12,26 +12,7 @@
    "body": {
      "__type__": "ollama._types.ProcessResponse",
      "__data__": {
-        "models": [
+        "models": []
          {
            "model": "llama-guard3:1b",
            "name": "llama-guard3:1b",
            "digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
            "expires_at": "2025-10-13T14:07:12.309717-07:00",
            "size": 2279663616,
            "size_vram": 2279663616,
            "details": {
              "parent_model": "",
              "format": "gguf",
              "family": "llama",
              "families": [
                "llama"
              ],
              "parameter_size": "1.5B",
              "quantization_level": "Q8_0"
            }
          }
        ]
      }
    },
    "is_streaming": false
--- a/tests/integration/fixtures/common.py
+++ b/tests/integration/fixtures/common.py
@ -117,42 +117,24 @@ def client_with_models(
    text_model_id,
    vision_model_id,
    embedding_model_id,
    embedding_dimension,
    judge_model_id,
 ):
    client = llama_stack_client
    providers = [p for p in client.providers.list() if p.api == "inference"]
    assert len(providers) > 0, "No inference providers found"
    inference_providers = [p.provider_id for p in providers if p.provider_type != "inline::sentence-transformers"]
    model_ids = {m.identifier for m in client.models.list()}
    model_ids.update(m.provider_resource_id for m in client.models.list())
    # TODO: fix this crap where we use the first provider randomly
    # that cannot be right. I think the test should just specify the provider_id
    if text_model_id and text_model_id not in model_ids:
-        client.models.register(model_id=text_model_id, provider_id=inference_providers[0])
+        raise ValueError(f"text_model_id {text_model_id} not found")
    if vision_model_id and vision_model_id not in model_ids:
-        client.models.register(model_id=vision_model_id, provider_id=inference_providers[0])
+        raise ValueError(f"vision_model_id {vision_model_id} not found")
    if judge_model_id and judge_model_id not in model_ids:
-        client.models.register(model_id=judge_model_id, provider_id=inference_providers[0])
+        raise ValueError(f"judge_model_id {judge_model_id} not found")
    if embedding_model_id and embedding_model_id not in model_ids:
-        # try to find a provider that supports embeddings, if sentence-transformers is not available
+        raise ValueError(f"embedding_model_id {embedding_model_id} not found")
        selected_provider = None
        for p in providers:
            if p.provider_type == "inline::sentence-transformers":
                selected_provider = p
                break
        selected_provider = selected_provider or providers[0]
        client.models.register(
            model_id=embedding_model_id,
            provider_id=selected_provider.provider_id,
            model_type="embedding",
            metadata={"embedding_dimension": embedding_dimension or 768},
        )
    return client