fix(route_llm_request.py): move to using common router, even for clie… (#8966)

* fix(route_llm_request.py): move to using common router, even for client-side credentials ensures fallbacks / cooldown logic still works * test(test_route_llm_request.py): add unit test for route request * feat(router.py): generate unique model id when clientside credential passed in Prevents cooldowns for api key 1 from impacting api key 2 * test(test_router.py): update testing to ensure original litellm params not mutated * fix(router.py): upsert clientside call into llm router model list enables cooldown logic to work accurately * fix: fix linting error * test(test_router_utils.py): add direct test for new util on router
2025-04-27 11:43:54 +00:00 · 2025-03-03 22:57:08 -08:00 · 2025-03-03 22:57:08 -08:00 · ae6f91a56d
commit ae6f91a56d
parent bd2231400f
9 changed files with 273 additions and 36 deletions
--- a/litellm/router_utils/clientside_credential_handler.py
+++ b/litellm/router_utils/clientside_credential_handler.py
@ -0,0 +1,37 @@
+"""
+Utils for handling clientside credentials
+
+Supported clientside credentials:
+- api_key
+- api_base
+- base_url
+
+If given, generate a unique model_id for the deployment.
+
+Ensures cooldowns are applied correctly.
+"""
+
+clientside_credential_keys = ["api_key", "api_base", "base_url"]
+
+
+def is_clientside_credential(request_kwargs: dict) -> bool:
+    """
+    Check if the credential is a clientside credential.
+    """
+    return any(key in request_kwargs for key in clientside_credential_keys)
+
+
+def get_dynamic_litellm_params(litellm_params: dict, request_kwargs: dict) -> dict:
+    """
+    Generate a unique model_id for the deployment.
+
+    Returns
+    - litellm_params: dict
+
+    for generating a unique model_id.
+    """
+    # update litellm_params with clientside credentials
+    for key in clientside_credential_keys:
+        if key in request_kwargs:
+            litellm_params[key] = request_kwargs[key]
+    return litellm_params
--- a/litellm/router_utils/cooldown_handlers.py
+++ b/litellm/router_utils/cooldown_handlers.py
@ -112,12 +112,19 @@ def _should_run_cooldown_logic(
        deployment is None
        or litellm_router_instance.get_model_group(id=deployment) is None
    ):
+        verbose_router_logger.debug(
+            "Should Not Run Cooldown Logic: deployment id is none or model group can't be found."
+        )
        return False

    if litellm_router_instance.disable_cooldowns:
+        verbose_router_logger.debug(
+            "Should Not Run Cooldown Logic: disable_cooldowns is True"
+        )
        return False

    if deployment is None:
+        verbose_router_logger.debug("Should Not Run Cooldown Logic: deployment is None")
        return False

    if not _is_cooldown_required(
@ -126,9 +133,15 @@ def _should_run_cooldown_logic(
        exception_status=exception_status,
        exception_str=str(original_exception),
    ):
+        verbose_router_logger.debug(
+            "Should Not Run Cooldown Logic: _is_cooldown_required returned False"
+        )
        return False

    if deployment in litellm_router_instance.provider_default_deployment_ids:
+        verbose_router_logger.debug(
+            "Should Not Run Cooldown Logic: deployment is in provider_default_deployment_ids"
+        )
        return False

    return True
@ -244,6 +257,8 @@ def _set_cooldown_deployments(
    - True if the deployment should be put in cooldown
    - False if the deployment should not be put in cooldown
    """
+    verbose_router_logger.debug("checks 'should_run_cooldown_logic'")
+
    if (
        _should_run_cooldown_logic(
            litellm_router_instance, deployment, exception_status, original_exception
@ -251,6 +266,7 @@ def _set_cooldown_deployments(
        is False
        or deployment is None
    ):
+        verbose_router_logger.debug("should_run_cooldown_logic returned False")
        return False

    exception_status_int = cast_exception_status_to_int(exception_status)