fix(inference): enable routing of models with provider_data alone

Assume a remote inference provider which works only when users provide
their own API keys via provider_data. By definition, we cannot list
models and hence update our routing registry. But because we _require_ a
provider ID in the models now, we can identify which provider to route
to and let that provider decide.

Note that we still try to look up our registry since it may have a
pre-registered alias. Just that we don't outright fail when we are not
able to look it up.

Also, updated inference router so that the responses have the _exact_
model that the request had.

Added an integration test
This commit is contained in:
Ashwin Bharambe 2025-10-27 14:55:24 -07:00
parent 471b1b248b
commit d089a6d106
6 changed files with 209 additions and 57 deletions

View file

@ -161,8 +161,7 @@ def test_openai_embeddings_single_string(compat_client, client_with_models, embe
assert response.object == "list"
# Handle provider-scoped model identifiers (e.g., sentence-transformers/nomic-ai/nomic-embed-text-v1.5)
assert response.model == embedding_model_id or response.model.endswith(f"/{embedding_model_id}")
assert response.model == embedding_model_id
assert len(response.data) == 1
assert response.data[0].object == "embedding"
assert response.data[0].index == 0
@ -186,8 +185,7 @@ def test_openai_embeddings_multiple_strings(compat_client, client_with_models, e
assert response.object == "list"
# Handle provider-scoped model identifiers (e.g., sentence-transformers/nomic-ai/nomic-embed-text-v1.5)
assert response.model == embedding_model_id or response.model.endswith(f"/{embedding_model_id}")
assert response.model == embedding_model_id
assert len(response.data) == len(input_texts)
for i, embedding_data in enumerate(response.data):
@ -365,8 +363,7 @@ def test_openai_embeddings_base64_batch_processing(compat_client, client_with_mo
# Validate response structure
assert response.object == "list"
# Handle provider-scoped model identifiers (e.g., sentence-transformers/nomic-ai/nomic-embed-text-v1.5)
assert response.model == embedding_model_id or response.model.endswith(f"/{embedding_model_id}")
assert response.model == embedding_model_id
assert len(response.data) == len(input_texts)
# Validate each embedding in the batch