mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
fix(inference): enable routing of models with provider_data alone (#3928)
This PR enables routing of fully qualified model IDs of the form `provider_id/model_id` even when the models are not registered with the Stack. Here's the situation: assume a remote inference provider which works only when users provide their own API keys via `X-LlamaStack-Provider-Data` header. By definition, we cannot list models and hence update our routing registry. But because we _require_ a provider ID in the models now, we can identify which provider to route to and let that provider decide. Note that we still try to look up our registry since it may have a pre-registered alias. Just that we don't outright fail when we are not able to look it up. Also, updated inference router so that the responses have the _exact_ model that the request had. ## Test Plan Added an integration test Closes #3929 --------- Co-authored-by: ehhuang <ehhuang@users.noreply.github.com>
This commit is contained in:
parent
94b0592240
commit
f88416ef87
6 changed files with 216 additions and 63 deletions
|
|
@ -161,8 +161,7 @@ def test_openai_embeddings_single_string(compat_client, client_with_models, embe
|
|||
|
||||
assert response.object == "list"
|
||||
|
||||
# Handle provider-scoped model identifiers (e.g., sentence-transformers/nomic-ai/nomic-embed-text-v1.5)
|
||||
assert response.model == embedding_model_id or response.model.endswith(f"/{embedding_model_id}")
|
||||
assert response.model == embedding_model_id
|
||||
assert len(response.data) == 1
|
||||
assert response.data[0].object == "embedding"
|
||||
assert response.data[0].index == 0
|
||||
|
|
@ -186,8 +185,7 @@ def test_openai_embeddings_multiple_strings(compat_client, client_with_models, e
|
|||
|
||||
assert response.object == "list"
|
||||
|
||||
# Handle provider-scoped model identifiers (e.g., sentence-transformers/nomic-ai/nomic-embed-text-v1.5)
|
||||
assert response.model == embedding_model_id or response.model.endswith(f"/{embedding_model_id}")
|
||||
assert response.model == embedding_model_id
|
||||
assert len(response.data) == len(input_texts)
|
||||
|
||||
for i, embedding_data in enumerate(response.data):
|
||||
|
|
@ -365,8 +363,7 @@ def test_openai_embeddings_base64_batch_processing(compat_client, client_with_mo
|
|||
# Validate response structure
|
||||
assert response.object == "list"
|
||||
|
||||
# Handle provider-scoped model identifiers (e.g., sentence-transformers/nomic-ai/nomic-embed-text-v1.5)
|
||||
assert response.model == embedding_model_id or response.model.endswith(f"/{embedding_model_id}")
|
||||
assert response.model == embedding_model_id
|
||||
assert len(response.data) == len(input_texts)
|
||||
|
||||
# Validate each embedding in the batch
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue