fix(route_llm_request.py): move to using common router, even for clie… (#8966)

* fix(route_llm_request.py): move to using common router, even for client-side credentials

ensures fallbacks / cooldown logic still works

* test(test_route_llm_request.py): add unit test for route request

* feat(router.py): generate unique model id when clientside credential passed in

Prevents cooldowns for api key 1 from impacting api key 2

* test(test_router.py): update testing to ensure original litellm params not mutated

* fix(router.py): upsert clientside call into llm router model list

enables cooldown logic to work accurately

* fix: fix linting error

* test(test_router_utils.py): add direct test for new util on router
This commit is contained in:
Krish Dholakia 2025-03-03 22:57:08 -08:00 committed by GitHub
parent bd2231400f
commit ae6f91a56d
9 changed files with 273 additions and 36 deletions

View file

@ -0,0 +1,37 @@
"""
Utils for handling clientside credentials
Supported clientside credentials:
- api_key
- api_base
- base_url
If given, generate a unique model_id for the deployment.
Ensures cooldowns are applied correctly.
"""
clientside_credential_keys = ["api_key", "api_base", "base_url"]
def is_clientside_credential(request_kwargs: dict) -> bool:
"""
Check if the credential is a clientside credential.
"""
return any(key in request_kwargs for key in clientside_credential_keys)
def get_dynamic_litellm_params(litellm_params: dict, request_kwargs: dict) -> dict:
"""
Generate a unique model_id for the deployment.
Returns
- litellm_params: dict
for generating a unique model_id.
"""
# update litellm_params with clientside credentials
for key in clientside_credential_keys:
if key in request_kwargs:
litellm_params[key] = request_kwargs[key]
return litellm_params

View file

@ -112,12 +112,19 @@ def _should_run_cooldown_logic(
deployment is None
or litellm_router_instance.get_model_group(id=deployment) is None
):
verbose_router_logger.debug(
"Should Not Run Cooldown Logic: deployment id is none or model group can't be found."
)
return False
if litellm_router_instance.disable_cooldowns:
verbose_router_logger.debug(
"Should Not Run Cooldown Logic: disable_cooldowns is True"
)
return False
if deployment is None:
verbose_router_logger.debug("Should Not Run Cooldown Logic: deployment is None")
return False
if not _is_cooldown_required(
@ -126,9 +133,15 @@ def _should_run_cooldown_logic(
exception_status=exception_status,
exception_str=str(original_exception),
):
verbose_router_logger.debug(
"Should Not Run Cooldown Logic: _is_cooldown_required returned False"
)
return False
if deployment in litellm_router_instance.provider_default_deployment_ids:
verbose_router_logger.debug(
"Should Not Run Cooldown Logic: deployment is in provider_default_deployment_ids"
)
return False
return True
@ -244,6 +257,8 @@ def _set_cooldown_deployments(
- True if the deployment should be put in cooldown
- False if the deployment should not be put in cooldown
"""
verbose_router_logger.debug("checks 'should_run_cooldown_logic'")
if (
_should_run_cooldown_logic(
litellm_router_instance, deployment, exception_status, original_exception
@ -251,6 +266,7 @@ def _set_cooldown_deployments(
is False
or deployment is None
):
verbose_router_logger.debug("should_run_cooldown_logic returned False")
return False
exception_status_int = cast_exception_status_to_int(exception_status)