mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-18 07:18:53 +00:00
fix(models)!: always prefix models with provider_id when registering (#3822)
**!!BREAKING CHANGE!!** The lookup is also straightforward -- we always look for this identifier and don't try to find a match for something without the provider_id prefix. Note that, this ideally means we need to update the `register_model()` API also (we should kill "identifier" from there) but I am not doing that as part of this PR. ## Test Plan Existing unit tests
This commit is contained in:
parent
f205ab6f6c
commit
f70aa99c97
10 changed files with 53 additions and 124 deletions
|
@ -12,26 +12,7 @@
|
|||
"body": {
|
||||
"__type__": "ollama._types.ProcessResponse",
|
||||
"__data__": {
|
||||
"models": [
|
||||
{
|
||||
"model": "llama-guard3:1b",
|
||||
"name": "llama-guard3:1b",
|
||||
"digest": "494147e06bf99e10dbe67b63a07ac81c162f18ef3341aa3390007ac828571b3b",
|
||||
"expires_at": "2025-10-13T14:07:12.309717-07:00",
|
||||
"size": 2279663616,
|
||||
"size_vram": 2279663616,
|
||||
"details": {
|
||||
"parent_model": "",
|
||||
"format": "gguf",
|
||||
"family": "llama",
|
||||
"families": [
|
||||
"llama"
|
||||
],
|
||||
"parameter_size": "1.5B",
|
||||
"quantization_level": "Q8_0"
|
||||
}
|
||||
}
|
||||
]
|
||||
"models": []
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
|
|
|
@ -117,42 +117,24 @@ def client_with_models(
|
|||
text_model_id,
|
||||
vision_model_id,
|
||||
embedding_model_id,
|
||||
embedding_dimension,
|
||||
judge_model_id,
|
||||
):
|
||||
client = llama_stack_client
|
||||
|
||||
providers = [p for p in client.providers.list() if p.api == "inference"]
|
||||
assert len(providers) > 0, "No inference providers found"
|
||||
inference_providers = [p.provider_id for p in providers if p.provider_type != "inline::sentence-transformers"]
|
||||
|
||||
model_ids = {m.identifier for m in client.models.list()}
|
||||
model_ids.update(m.provider_resource_id for m in client.models.list())
|
||||
|
||||
# TODO: fix this crap where we use the first provider randomly
|
||||
# that cannot be right. I think the test should just specify the provider_id
|
||||
if text_model_id and text_model_id not in model_ids:
|
||||
client.models.register(model_id=text_model_id, provider_id=inference_providers[0])
|
||||
raise ValueError(f"text_model_id {text_model_id} not found")
|
||||
if vision_model_id and vision_model_id not in model_ids:
|
||||
client.models.register(model_id=vision_model_id, provider_id=inference_providers[0])
|
||||
raise ValueError(f"vision_model_id {vision_model_id} not found")
|
||||
if judge_model_id and judge_model_id not in model_ids:
|
||||
client.models.register(model_id=judge_model_id, provider_id=inference_providers[0])
|
||||
raise ValueError(f"judge_model_id {judge_model_id} not found")
|
||||
|
||||
if embedding_model_id and embedding_model_id not in model_ids:
|
||||
# try to find a provider that supports embeddings, if sentence-transformers is not available
|
||||
selected_provider = None
|
||||
for p in providers:
|
||||
if p.provider_type == "inline::sentence-transformers":
|
||||
selected_provider = p
|
||||
break
|
||||
|
||||
selected_provider = selected_provider or providers[0]
|
||||
client.models.register(
|
||||
model_id=embedding_model_id,
|
||||
provider_id=selected_provider.provider_id,
|
||||
model_type="embedding",
|
||||
metadata={"embedding_dimension": embedding_dimension or 768},
|
||||
)
|
||||
raise ValueError(f"embedding_model_id {embedding_model_id} not found")
|
||||
return client
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue