fix(lowest_tpm_rpm_v2.py): add more detail to 'No deployments available' error message

This commit is contained in:
Krrish Dholakia 2024-04-29 15:04:22 -07:00
parent de3e642999
commit f10a066d36
3 changed files with 117 additions and 5 deletions

View file

@ -2807,7 +2807,7 @@ class Router:
if _rate_limit_error == True: # allow generic fallback logic to take place if _rate_limit_error == True: # allow generic fallback logic to take place
raise ValueError( raise ValueError(
f"No deployments available for selected model, passed model={model}" f"{RouterErrors.no_deployments_available.value}, passed model={model}"
) )
elif _context_window_error == True: elif _context_window_error == True:
raise litellm.ContextWindowExceededError( raise litellm.ContextWindowExceededError(
@ -3000,7 +3000,7 @@ class Router:
f"get_available_deployment for model: {model}, No deployment available" f"get_available_deployment for model: {model}, No deployment available"
) )
raise ValueError( raise ValueError(
f"No deployments available for selected model, passed model={model}" f"{RouterErrors.no_deployments_available.value}, passed model={model}"
) )
verbose_router_logger.info( verbose_router_logger.info(
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}" f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}"
@ -3130,7 +3130,7 @@ class Router:
f"get_available_deployment for model: {model}, No deployment available" f"get_available_deployment for model: {model}, No deployment available"
) )
raise ValueError( raise ValueError(
f"No deployments available for selected model, passed model={model}" f"{RouterErrors.no_deployments_available.value}, passed model={model}"
) )
verbose_router_logger.info( verbose_router_logger.info(
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}" f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}"

View file

@ -394,6 +394,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
dt = get_utc_datetime() dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M") current_minute = dt.strftime("%H-%M")
tpm_keys = [] tpm_keys = []
rpm_keys = [] rpm_keys = []
for m in healthy_deployments: for m in healthy_deployments:
@ -416,7 +417,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
tpm_values = combined_tpm_rpm_values[: len(tpm_keys)] tpm_values = combined_tpm_rpm_values[: len(tpm_keys)]
rpm_values = combined_tpm_rpm_values[len(tpm_keys) :] rpm_values = combined_tpm_rpm_values[len(tpm_keys) :]
return self._common_checks_available_deployment( deployment = self._common_checks_available_deployment(
model_group=model_group, model_group=model_group,
healthy_deployments=healthy_deployments, healthy_deployments=healthy_deployments,
tpm_keys=tpm_keys, tpm_keys=tpm_keys,
@ -427,6 +428,61 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
input=input, input=input,
) )
try:
assert deployment is not None
return deployment
except Exception as e:
### GET THE DICT OF TPM / RPM + LIMITS PER DEPLOYMENT ###
deployment_dict = {}
for index, _deployment in enumerate(healthy_deployments):
if isinstance(_deployment, dict):
id = _deployment.get("model_info", {}).get("id")
### GET DEPLOYMENT TPM LIMIT ###
_deployment_tpm = None
if _deployment_tpm is None:
_deployment_tpm = _deployment.get("tpm", None)
if _deployment_tpm is None:
_deployment_tpm = _deployment.get("litellm_params", {}).get(
"tpm", None
)
if _deployment_tpm is None:
_deployment_tpm = _deployment.get("model_info", {}).get(
"tpm", None
)
if _deployment_tpm is None:
_deployment_tpm = float("inf")
### GET CURRENT TPM ###
current_tpm = tpm_values[index]
### GET DEPLOYMENT TPM LIMIT ###
_deployment_rpm = None
if _deployment_rpm is None:
_deployment_rpm = _deployment.get("rpm", None)
if _deployment_rpm is None:
_deployment_rpm = _deployment.get("litellm_params", {}).get(
"rpm", None
)
if _deployment_rpm is None:
_deployment_rpm = _deployment.get("model_info", {}).get(
"rpm", None
)
if _deployment_rpm is None:
_deployment_rpm = float("inf")
### GET CURRENT RPM ###
current_rpm = rpm_values[index]
deployment_dict[id] = {
"current_tpm": current_tpm,
"tpm_limit": _deployment_tpm,
"current_rpm": current_rpm,
"rpm_limit": _deployment_rpm,
}
raise ValueError(
f"{RouterErrors.no_deployments_available.value}. Passed model={model_group}. Deployments={deployment_dict}"
)
def get_available_deployments( def get_available_deployments(
self, self,
model_group: str, model_group: str,
@ -464,7 +520,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
keys=rpm_keys keys=rpm_keys
) # [1, 2, None, ..] ) # [1, 2, None, ..]
return self._common_checks_available_deployment( deployment = self._common_checks_available_deployment(
model_group=model_group, model_group=model_group,
healthy_deployments=healthy_deployments, healthy_deployments=healthy_deployments,
tpm_keys=tpm_keys, tpm_keys=tpm_keys,
@ -474,3 +530,58 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
messages=messages, messages=messages,
input=input, input=input,
) )
try:
assert deployment is not None
return deployment
except Exception as e:
### GET THE DICT OF TPM / RPM + LIMITS PER DEPLOYMENT ###
deployment_dict = {}
for index, _deployment in enumerate(healthy_deployments):
if isinstance(_deployment, dict):
id = _deployment.get("model_info", {}).get("id")
### GET DEPLOYMENT TPM LIMIT ###
_deployment_tpm = None
if _deployment_tpm is None:
_deployment_tpm = _deployment.get("tpm", None)
if _deployment_tpm is None:
_deployment_tpm = _deployment.get("litellm_params", {}).get(
"tpm", None
)
if _deployment_tpm is None:
_deployment_tpm = _deployment.get("model_info", {}).get(
"tpm", None
)
if _deployment_tpm is None:
_deployment_tpm = float("inf")
### GET CURRENT TPM ###
current_tpm = tpm_values[index]
### GET DEPLOYMENT TPM LIMIT ###
_deployment_rpm = None
if _deployment_rpm is None:
_deployment_rpm = _deployment.get("rpm", None)
if _deployment_rpm is None:
_deployment_rpm = _deployment.get("litellm_params", {}).get(
"rpm", None
)
if _deployment_rpm is None:
_deployment_rpm = _deployment.get("model_info", {}).get(
"rpm", None
)
if _deployment_rpm is None:
_deployment_rpm = float("inf")
### GET CURRENT RPM ###
current_rpm = rpm_values[index]
deployment_dict[id] = {
"current_tpm": current_tpm,
"tpm_limit": _deployment_tpm,
"current_rpm": current_rpm,
"rpm_limit": _deployment_rpm,
}
raise ValueError(
f"{RouterErrors.no_deployments_available.value}. Passed model={model_group}. Deployments={deployment_dict}"
)

View file

@ -263,3 +263,4 @@ class RouterErrors(enum.Enum):
""" """
user_defined_ratelimit_error = "Deployment over user-defined ratelimit." user_defined_ratelimit_error = "Deployment over user-defined ratelimit."
no_deployments_available = "No deployments available for selected model"