diff --git a/litellm/router.py b/litellm/router.py index df4c2e046..b66c29533 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2807,7 +2807,7 @@ class Router: if _rate_limit_error == True: # allow generic fallback logic to take place raise ValueError( - f"No deployments available for selected model, passed model={model}" + f"{RouterErrors.no_deployments_available.value}, passed model={model}" ) elif _context_window_error == True: raise litellm.ContextWindowExceededError( @@ -3000,7 +3000,7 @@ class Router: f"get_available_deployment for model: {model}, No deployment available" ) raise ValueError( - f"No deployments available for selected model, passed model={model}" + f"{RouterErrors.no_deployments_available.value}, passed model={model}" ) verbose_router_logger.info( f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}" @@ -3130,7 +3130,7 @@ class Router: f"get_available_deployment for model: {model}, No deployment available" ) raise ValueError( - f"No deployments available for selected model, passed model={model}" + f"{RouterErrors.no_deployments_available.value}, passed model={model}" ) verbose_router_logger.info( f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}" diff --git a/litellm/router_strategy/lowest_tpm_rpm_v2.py b/litellm/router_strategy/lowest_tpm_rpm_v2.py index 39dbcd9d0..a11c6d872 100644 --- a/litellm/router_strategy/lowest_tpm_rpm_v2.py +++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py @@ -394,6 +394,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger): dt = get_utc_datetime() current_minute = dt.strftime("%H-%M") + tpm_keys = [] rpm_keys = [] for m in healthy_deployments: @@ -416,7 +417,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger): tpm_values = combined_tpm_rpm_values[: len(tpm_keys)] rpm_values = combined_tpm_rpm_values[len(tpm_keys) :] - return self._common_checks_available_deployment( + deployment = self._common_checks_available_deployment( model_group=model_group, healthy_deployments=healthy_deployments, tpm_keys=tpm_keys, @@ -427,6 +428,61 @@ class LowestTPMLoggingHandler_v2(CustomLogger): input=input, ) + try: + assert deployment is not None + return deployment + except Exception as e: + ### GET THE DICT OF TPM / RPM + LIMITS PER DEPLOYMENT ### + deployment_dict = {} + for index, _deployment in enumerate(healthy_deployments): + if isinstance(_deployment, dict): + id = _deployment.get("model_info", {}).get("id") + ### GET DEPLOYMENT TPM LIMIT ### + _deployment_tpm = None + if _deployment_tpm is None: + _deployment_tpm = _deployment.get("tpm", None) + if _deployment_tpm is None: + _deployment_tpm = _deployment.get("litellm_params", {}).get( + "tpm", None + ) + if _deployment_tpm is None: + _deployment_tpm = _deployment.get("model_info", {}).get( + "tpm", None + ) + if _deployment_tpm is None: + _deployment_tpm = float("inf") + + ### GET CURRENT TPM ### + current_tpm = tpm_values[index] + + ### GET DEPLOYMENT TPM LIMIT ### + _deployment_rpm = None + if _deployment_rpm is None: + _deployment_rpm = _deployment.get("rpm", None) + if _deployment_rpm is None: + _deployment_rpm = _deployment.get("litellm_params", {}).get( + "rpm", None + ) + if _deployment_rpm is None: + _deployment_rpm = _deployment.get("model_info", {}).get( + "rpm", None + ) + if _deployment_rpm is None: + _deployment_rpm = float("inf") + + ### GET CURRENT RPM ### + current_rpm = rpm_values[index] + + deployment_dict[id] = { + "current_tpm": current_tpm, + "tpm_limit": _deployment_tpm, + "current_rpm": current_rpm, + "rpm_limit": _deployment_rpm, + } + raise ValueError( + f"{RouterErrors.no_deployments_available.value}. Passed model={model_group}. Deployments={deployment_dict}" + ) + def get_available_deployments( self, model_group: str, @@ -464,7 +520,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger): keys=rpm_keys ) # [1, 2, None, ..] - return self._common_checks_available_deployment( + deployment = self._common_checks_available_deployment( model_group=model_group, healthy_deployments=healthy_deployments, tpm_keys=tpm_keys, @@ -474,3 +530,58 @@ class LowestTPMLoggingHandler_v2(CustomLogger): messages=messages, input=input, ) + + try: + assert deployment is not None + return deployment + except Exception as e: + ### GET THE DICT OF TPM / RPM + LIMITS PER DEPLOYMENT ### + deployment_dict = {} + for index, _deployment in enumerate(healthy_deployments): + if isinstance(_deployment, dict): + id = _deployment.get("model_info", {}).get("id") + ### GET DEPLOYMENT TPM LIMIT ### + _deployment_tpm = None + if _deployment_tpm is None: + _deployment_tpm = _deployment.get("tpm", None) + if _deployment_tpm is None: + _deployment_tpm = _deployment.get("litellm_params", {}).get( + "tpm", None + ) + if _deployment_tpm is None: + _deployment_tpm = _deployment.get("model_info", {}).get( + "tpm", None + ) + if _deployment_tpm is None: + _deployment_tpm = float("inf") + + ### GET CURRENT TPM ### + current_tpm = tpm_values[index] + + ### GET DEPLOYMENT TPM LIMIT ### + _deployment_rpm = None + if _deployment_rpm is None: + _deployment_rpm = _deployment.get("rpm", None) + if _deployment_rpm is None: + _deployment_rpm = _deployment.get("litellm_params", {}).get( + "rpm", None + ) + if _deployment_rpm is None: + _deployment_rpm = _deployment.get("model_info", {}).get( + "rpm", None + ) + if _deployment_rpm is None: + _deployment_rpm = float("inf") + + ### GET CURRENT RPM ### + current_rpm = rpm_values[index] + + deployment_dict[id] = { + "current_tpm": current_tpm, + "tpm_limit": _deployment_tpm, + "current_rpm": current_rpm, + "rpm_limit": _deployment_rpm, + } + raise ValueError( + f"{RouterErrors.no_deployments_available.value}. Passed model={model_group}. Deployments={deployment_dict}" + ) diff --git a/litellm/types/router.py b/litellm/types/router.py index 042d9f277..64b71b999 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -263,3 +263,4 @@ class RouterErrors(enum.Enum): """ user_defined_ratelimit_error = "Deployment over user-defined ratelimit." + no_deployments_available = "No deployments available for selected model"