fix(router.py): fix cooldown check

2025-04-26 03:04:13 +00:00 · 2024-08-28 16:38:05 -07:00 · 2024-08-28 16:38:05 -07:00 · f0fb8bdf45
commit f0fb8bdf45
parent 25d8cb69a7
3 changed files with 24 additions and 31 deletions
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,19 +1,9 @@
 model_list:
  - model_name: fake-openai-endpoint
    litellm_params:
-      model: gpt-3.5-turbo
-      # model: sagemaker/jumpstart-dft-hf-textgeneration1-mp-20240815-185614
-      # sagemaker_base_url: https://exampleopenaiendpoint-production.up.railway.app/invocations/
-      # api_base: https://exampleopenaiendpoint-production.up.railway.app
-      input_cost_per_token: 10
-      output_cost_per_token: 10
-
-litellm_settings:
-  max_internal_user_budget: 0.00001
-  internal_user_budget_duration: "3s" # reset every 3seconds
-
-general_settings:
-  proxy_budget_rescheduler_min_time: 1
-  proxy_budget_rescheduler_max_time: 2
+      model: openai/my-fake-model
+      api_key: my-fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+


--- a/litellm/router.py
+++ b/litellm/router.py
@ -92,6 +92,7 @@ from litellm.types.router import (
    RouterErrors,
    RouterGeneralSettings,
    RouterRateLimitError,
+    RouterRateLimitErrorBasic,
    updateDeployment,
    updateLiteLLMParams,
 )
@ -4459,16 +4460,8 @@ class Router:
            """

            if _rate_limit_error is True:  # allow generic fallback logic to take place
-                model_ids = self.get_model_ids(model_name=model)
-                cooldown_time = self.cooldown_cache.get_min_cooldown(
-                    model_ids=model_ids
-                )
-                cooldown_list = self._get_cooldown_deployments()
-                raise RouterRateLimitError(
+                raise RouterRateLimitErrorBasic(
                    model=model,
-                    cooldown_time=cooldown_time,
-                    enable_pre_call_checks=True,
-                    cooldown_list=cooldown_list,
                )

            elif _context_window_error is True:
@ -4579,14 +4572,10 @@ class Router:
        litellm.print_verbose(f"initial list of deployments: {healthy_deployments}")

        if len(healthy_deployments) == 0:
-            model_ids = self.get_model_ids(model_name=model)
-            _cooldown_time = self.cooldown_cache.get_min_cooldown(model_ids=model_ids)
-            _cooldown_list = self._get_cooldown_deployments()
-            raise RouterRateLimitError(
-                model=model,
-                cooldown_time=_cooldown_time,
-                enable_pre_call_checks=self.enable_pre_call_checks,
-                cooldown_list=_cooldown_list,
+            raise ValueError(
+                "{}. You passed in model={}. There is no 'model_name' with this string ".format(
+                    RouterErrors.no_deployments_available.value, model
+                )
            )

        if litellm.model_alias_map and model in litellm.model_alias_map:
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@ -551,6 +551,20 @@ class RouterGeneralSettings(BaseModel):
    )  # if passed a model not llm_router model list, pass through the request to litellm.acompletion/embedding


+class RouterRateLimitErrorBasic(ValueError):
+    """
+    Raise a basic error inside helper functions.
+    """
+
+    def __init__(
+        self,
+        model: str,
+    ):
+        self.model = model
+        _message = f"{RouterErrors.no_deployments_available.value}."
+        super().__init__(_message)
+
+
 class RouterRateLimitError(ValueError):
    def __init__(
        self,