mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
fix(route_llm_request.py): move to using common router, even for clie… (#8966)
* fix(route_llm_request.py): move to using common router, even for client-side credentials ensures fallbacks / cooldown logic still works * test(test_route_llm_request.py): add unit test for route request * feat(router.py): generate unique model id when clientside credential passed in Prevents cooldowns for api key 1 from impacting api key 2 * test(test_router.py): update testing to ensure original litellm params not mutated * fix(router.py): upsert clientside call into llm router model list enables cooldown logic to work accurately * fix: fix linting error * test(test_router_utils.py): add direct test for new util on router
This commit is contained in:
parent
bd2231400f
commit
ae6f91a56d
9 changed files with 273 additions and 36 deletions
37
litellm/router_utils/clientside_credential_handler.py
Normal file
37
litellm/router_utils/clientside_credential_handler.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
"""
|
||||
Utils for handling clientside credentials
|
||||
|
||||
Supported clientside credentials:
|
||||
- api_key
|
||||
- api_base
|
||||
- base_url
|
||||
|
||||
If given, generate a unique model_id for the deployment.
|
||||
|
||||
Ensures cooldowns are applied correctly.
|
||||
"""
|
||||
|
||||
clientside_credential_keys = ["api_key", "api_base", "base_url"]
|
||||
|
||||
|
||||
def is_clientside_credential(request_kwargs: dict) -> bool:
|
||||
"""
|
||||
Check if the credential is a clientside credential.
|
||||
"""
|
||||
return any(key in request_kwargs for key in clientside_credential_keys)
|
||||
|
||||
|
||||
def get_dynamic_litellm_params(litellm_params: dict, request_kwargs: dict) -> dict:
|
||||
"""
|
||||
Generate a unique model_id for the deployment.
|
||||
|
||||
Returns
|
||||
- litellm_params: dict
|
||||
|
||||
for generating a unique model_id.
|
||||
"""
|
||||
# update litellm_params with clientside credentials
|
||||
for key in clientside_credential_keys:
|
||||
if key in request_kwargs:
|
||||
litellm_params[key] = request_kwargs[key]
|
||||
return litellm_params
|
|
@ -112,12 +112,19 @@ def _should_run_cooldown_logic(
|
|||
deployment is None
|
||||
or litellm_router_instance.get_model_group(id=deployment) is None
|
||||
):
|
||||
verbose_router_logger.debug(
|
||||
"Should Not Run Cooldown Logic: deployment id is none or model group can't be found."
|
||||
)
|
||||
return False
|
||||
|
||||
if litellm_router_instance.disable_cooldowns:
|
||||
verbose_router_logger.debug(
|
||||
"Should Not Run Cooldown Logic: disable_cooldowns is True"
|
||||
)
|
||||
return False
|
||||
|
||||
if deployment is None:
|
||||
verbose_router_logger.debug("Should Not Run Cooldown Logic: deployment is None")
|
||||
return False
|
||||
|
||||
if not _is_cooldown_required(
|
||||
|
@ -126,9 +133,15 @@ def _should_run_cooldown_logic(
|
|||
exception_status=exception_status,
|
||||
exception_str=str(original_exception),
|
||||
):
|
||||
verbose_router_logger.debug(
|
||||
"Should Not Run Cooldown Logic: _is_cooldown_required returned False"
|
||||
)
|
||||
return False
|
||||
|
||||
if deployment in litellm_router_instance.provider_default_deployment_ids:
|
||||
verbose_router_logger.debug(
|
||||
"Should Not Run Cooldown Logic: deployment is in provider_default_deployment_ids"
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
@ -244,6 +257,8 @@ def _set_cooldown_deployments(
|
|||
- True if the deployment should be put in cooldown
|
||||
- False if the deployment should not be put in cooldown
|
||||
"""
|
||||
verbose_router_logger.debug("checks 'should_run_cooldown_logic'")
|
||||
|
||||
if (
|
||||
_should_run_cooldown_logic(
|
||||
litellm_router_instance, deployment, exception_status, original_exception
|
||||
|
@ -251,6 +266,7 @@ def _set_cooldown_deployments(
|
|||
is False
|
||||
or deployment is None
|
||||
):
|
||||
verbose_router_logger.debug("should_run_cooldown_logic returned False")
|
||||
return False
|
||||
|
||||
exception_status_int = cast_exception_status_to_int(exception_status)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue