mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 20:12:33 +00:00
fix(models): always prefix models with provider_id when registering
This commit is contained in:
parent
f205ab6f6c
commit
d8be3111db
6 changed files with 13 additions and 73 deletions
|
|
@ -245,25 +245,7 @@ class CommonRoutingTableImpl(RoutingTable):
|
||||||
|
|
||||||
|
|
||||||
async def lookup_model(routing_table: CommonRoutingTableImpl, model_id: str) -> Model:
|
async def lookup_model(routing_table: CommonRoutingTableImpl, model_id: str) -> Model:
|
||||||
# first try to get the model by identifier
|
|
||||||
# this works if model_id is an alias or is of the form provider_id/provider_model_id
|
|
||||||
model = await routing_table.get_object_by_identifier("model", model_id)
|
model = await routing_table.get_object_by_identifier("model", model_id)
|
||||||
if model is not None:
|
if not model:
|
||||||
return model
|
|
||||||
|
|
||||||
logger.warning(
|
|
||||||
f"WARNING: model identifier '{model_id}' not found in routing table. Falling back to "
|
|
||||||
"searching in all providers. This is only for backwards compatibility and will stop working "
|
|
||||||
"soon. Migrate your calls to use fully scoped `provider_id/model_id` names."
|
|
||||||
)
|
|
||||||
# if not found, this means model_id is an unscoped provider_model_id, we need
|
|
||||||
# to iterate (given a lack of an efficient index on the KVStore)
|
|
||||||
models = await routing_table.get_all_with_type("model")
|
|
||||||
matching_models = [m for m in models if m.provider_resource_id == model_id]
|
|
||||||
if len(matching_models) == 0:
|
|
||||||
raise ModelNotFoundError(model_id)
|
raise ModelNotFoundError(model_id)
|
||||||
|
return model
|
||||||
if len(matching_models) > 1:
|
|
||||||
raise ValueError(f"Multiple providers found for '{model_id}': {[m.provider_id for m in matching_models]}")
|
|
||||||
|
|
||||||
return matching_models[0]
|
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
||||||
try:
|
try:
|
||||||
models = await provider.list_models()
|
models = await provider.list_models()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Model refresh failed for provider {provider_id}: {e}")
|
logger.warning(f"Model refresh failed for provider {provider_id}: {e}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
self.listed_providers.add(provider_id)
|
self.listed_providers.add(provider_id)
|
||||||
|
|
@ -104,15 +104,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
|
||||||
if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
|
if "embedding_dimension" not in metadata and model_type == ModelType.embedding:
|
||||||
raise ValueError("Embedding model must have an embedding dimension in its metadata")
|
raise ValueError("Embedding model must have an embedding dimension in its metadata")
|
||||||
|
|
||||||
# an identifier different than provider_model_id implies it is an alias, so that
|
identifier = f"{provider_id}/{provider_model_id}"
|
||||||
# becomes the globally unique identifier. otherwise provider_model_ids can conflict,
|
|
||||||
# so as a general rule we must use the provider_id to disambiguate.
|
|
||||||
|
|
||||||
if model_id != provider_model_id:
|
|
||||||
identifier = model_id
|
|
||||||
else:
|
|
||||||
identifier = f"{provider_id}/{provider_model_id}"
|
|
||||||
|
|
||||||
model = ModelWithOwner(
|
model = ModelWithOwner(
|
||||||
identifier=identifier,
|
identifier=identifier,
|
||||||
provider_resource_id=provider_model_id,
|
provider_resource_id=provider_model_id,
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,9 @@ class DiskDistributionRegistry(DistributionRegistry):
|
||||||
"Unregister it first if you want to replace it."
|
"Unregister it first if you want to replace it."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if "sentence-transformers/sentence-transformers" in obj.identifier:
|
||||||
|
raise Exception("OMG")
|
||||||
|
|
||||||
await self.kvstore.set(
|
await self.kvstore.set(
|
||||||
KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),
|
KEY_FORMAT.format(type=obj.type, identifier=obj.identifier),
|
||||||
obj.model_dump_json(),
|
obj.model_dump_json(),
|
||||||
|
|
|
||||||
|
|
@ -290,7 +290,7 @@ pytest -s -v $PYTEST_TARGET \
|
||||||
-k "$PYTEST_PATTERN" \
|
-k "$PYTEST_PATTERN" \
|
||||||
$EXTRA_PARAMS \
|
$EXTRA_PARAMS \
|
||||||
--color=yes \
|
--color=yes \
|
||||||
--embedding-model=nomic-ai/nomic-embed-text-v1.5 \
|
--embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
|
||||||
--color=yes $EXTRA_PARAMS \
|
--color=yes $EXTRA_PARAMS \
|
||||||
--capture=tee-sys
|
--capture=tee-sys
|
||||||
exit_code=$?
|
exit_code=$?
|
||||||
|
|
|
||||||
|
|
@ -12,26 +12,7 @@
|
||||||
"body": {
|
"body": {
|
||||||
"__type__": "ollama._types.ProcessResponse",
|
"__type__": "ollama._types.ProcessResponse",
|
||||||
"__data__": {
|
"__data__": {
|
||||||
"models": [
|
"models": []
|
||||||
{
|
|
||||||
"model": "llama-guard3:1b",
|
|
||||||
"name": "llama-guard3:1b",
|
|
||||||
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
|
|
||||||
"expires_at": "2025-10-13T14:07:12.309717-07:00",
|
|
||||||
"size": 2279663616,
|
|
||||||
"size_vram": 2279663616,
|
|
||||||
"details": {
|
|
||||||
"parent_model": "",
|
|
||||||
"format": "gguf",
|
|
||||||
"family": "llama",
|
|
||||||
"families": [
|
|
||||||
"llama"
|
|
||||||
],
|
|
||||||
"parameter_size": "1.5B",
|
|
||||||
"quantization_level": "Q8_0"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"is_streaming": false
|
"is_streaming": false
|
||||||
|
|
|
||||||
|
|
@ -117,42 +117,24 @@ def client_with_models(
|
||||||
text_model_id,
|
text_model_id,
|
||||||
vision_model_id,
|
vision_model_id,
|
||||||
embedding_model_id,
|
embedding_model_id,
|
||||||
embedding_dimension,
|
|
||||||
judge_model_id,
|
judge_model_id,
|
||||||
):
|
):
|
||||||
client = llama_stack_client
|
client = llama_stack_client
|
||||||
|
|
||||||
providers = [p for p in client.providers.list() if p.api == "inference"]
|
providers = [p for p in client.providers.list() if p.api == "inference"]
|
||||||
assert len(providers) > 0, "No inference providers found"
|
assert len(providers) > 0, "No inference providers found"
|
||||||
inference_providers = [p.provider_id for p in providers if p.provider_type != "inline::sentence-transformers"]
|
|
||||||
|
|
||||||
model_ids = {m.identifier for m in client.models.list()}
|
model_ids = {m.identifier for m in client.models.list()}
|
||||||
model_ids.update(m.provider_resource_id for m in client.models.list())
|
|
||||||
|
|
||||||
# TODO: fix this crap where we use the first provider randomly
|
|
||||||
# that cannot be right. I think the test should just specify the provider_id
|
|
||||||
if text_model_id and text_model_id not in model_ids:
|
if text_model_id and text_model_id not in model_ids:
|
||||||
client.models.register(model_id=text_model_id, provider_id=inference_providers[0])
|
raise ValueError(f"text_model_id {text_model_id} not found")
|
||||||
if vision_model_id and vision_model_id not in model_ids:
|
if vision_model_id and vision_model_id not in model_ids:
|
||||||
client.models.register(model_id=vision_model_id, provider_id=inference_providers[0])
|
raise ValueError(f"vision_model_id {vision_model_id} not found")
|
||||||
if judge_model_id and judge_model_id not in model_ids:
|
if judge_model_id and judge_model_id not in model_ids:
|
||||||
client.models.register(model_id=judge_model_id, provider_id=inference_providers[0])
|
raise ValueError(f"judge_model_id {judge_model_id} not found")
|
||||||
|
|
||||||
if embedding_model_id and embedding_model_id not in model_ids:
|
if embedding_model_id and embedding_model_id not in model_ids:
|
||||||
# try to find a provider that supports embeddings, if sentence-transformers is not available
|
raise ValueError(f"embedding_model_id {embedding_model_id} not found")
|
||||||
selected_provider = None
|
|
||||||
for p in providers:
|
|
||||||
if p.provider_type == "inline::sentence-transformers":
|
|
||||||
selected_provider = p
|
|
||||||
break
|
|
||||||
|
|
||||||
selected_provider = selected_provider or providers[0]
|
|
||||||
client.models.register(
|
|
||||||
model_id=embedding_model_id,
|
|
||||||
provider_id=selected_provider.provider_id,
|
|
||||||
model_type="embedding",
|
|
||||||
metadata={"embedding_dimension": embedding_dimension or 768},
|
|
||||||
)
|
|
||||||
return client
|
return client
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue