forked from phoenix/litellm-mirror
fix(lowest_tpm_rpm_v2.py): add more detail to 'No deployments available' error message
This commit is contained in:
parent
de3e642999
commit
f10a066d36
3 changed files with 117 additions and 5 deletions
|
@ -2807,7 +2807,7 @@ class Router:
|
||||||
|
|
||||||
if _rate_limit_error == True: # allow generic fallback logic to take place
|
if _rate_limit_error == True: # allow generic fallback logic to take place
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"No deployments available for selected model, passed model={model}"
|
f"{RouterErrors.no_deployments_available.value}, passed model={model}"
|
||||||
)
|
)
|
||||||
elif _context_window_error == True:
|
elif _context_window_error == True:
|
||||||
raise litellm.ContextWindowExceededError(
|
raise litellm.ContextWindowExceededError(
|
||||||
|
@ -3000,7 +3000,7 @@ class Router:
|
||||||
f"get_available_deployment for model: {model}, No deployment available"
|
f"get_available_deployment for model: {model}, No deployment available"
|
||||||
)
|
)
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"No deployments available for selected model, passed model={model}"
|
f"{RouterErrors.no_deployments_available.value}, passed model={model}"
|
||||||
)
|
)
|
||||||
verbose_router_logger.info(
|
verbose_router_logger.info(
|
||||||
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}"
|
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}"
|
||||||
|
@ -3130,7 +3130,7 @@ class Router:
|
||||||
f"get_available_deployment for model: {model}, No deployment available"
|
f"get_available_deployment for model: {model}, No deployment available"
|
||||||
)
|
)
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"No deployments available for selected model, passed model={model}"
|
f"{RouterErrors.no_deployments_available.value}, passed model={model}"
|
||||||
)
|
)
|
||||||
verbose_router_logger.info(
|
verbose_router_logger.info(
|
||||||
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}"
|
f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment)} for model: {model}"
|
||||||
|
|
|
@ -394,6 +394,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
|
|
||||||
dt = get_utc_datetime()
|
dt = get_utc_datetime()
|
||||||
current_minute = dt.strftime("%H-%M")
|
current_minute = dt.strftime("%H-%M")
|
||||||
|
|
||||||
tpm_keys = []
|
tpm_keys = []
|
||||||
rpm_keys = []
|
rpm_keys = []
|
||||||
for m in healthy_deployments:
|
for m in healthy_deployments:
|
||||||
|
@ -416,7 +417,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
tpm_values = combined_tpm_rpm_values[: len(tpm_keys)]
|
tpm_values = combined_tpm_rpm_values[: len(tpm_keys)]
|
||||||
rpm_values = combined_tpm_rpm_values[len(tpm_keys) :]
|
rpm_values = combined_tpm_rpm_values[len(tpm_keys) :]
|
||||||
|
|
||||||
return self._common_checks_available_deployment(
|
deployment = self._common_checks_available_deployment(
|
||||||
model_group=model_group,
|
model_group=model_group,
|
||||||
healthy_deployments=healthy_deployments,
|
healthy_deployments=healthy_deployments,
|
||||||
tpm_keys=tpm_keys,
|
tpm_keys=tpm_keys,
|
||||||
|
@ -427,6 +428,61 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
input=input,
|
input=input,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
assert deployment is not None
|
||||||
|
return deployment
|
||||||
|
except Exception as e:
|
||||||
|
### GET THE DICT OF TPM / RPM + LIMITS PER DEPLOYMENT ###
|
||||||
|
deployment_dict = {}
|
||||||
|
for index, _deployment in enumerate(healthy_deployments):
|
||||||
|
if isinstance(_deployment, dict):
|
||||||
|
id = _deployment.get("model_info", {}).get("id")
|
||||||
|
### GET DEPLOYMENT TPM LIMIT ###
|
||||||
|
_deployment_tpm = None
|
||||||
|
if _deployment_tpm is None:
|
||||||
|
_deployment_tpm = _deployment.get("tpm", None)
|
||||||
|
if _deployment_tpm is None:
|
||||||
|
_deployment_tpm = _deployment.get("litellm_params", {}).get(
|
||||||
|
"tpm", None
|
||||||
|
)
|
||||||
|
if _deployment_tpm is None:
|
||||||
|
_deployment_tpm = _deployment.get("model_info", {}).get(
|
||||||
|
"tpm", None
|
||||||
|
)
|
||||||
|
if _deployment_tpm is None:
|
||||||
|
_deployment_tpm = float("inf")
|
||||||
|
|
||||||
|
### GET CURRENT TPM ###
|
||||||
|
current_tpm = tpm_values[index]
|
||||||
|
|
||||||
|
### GET DEPLOYMENT TPM LIMIT ###
|
||||||
|
_deployment_rpm = None
|
||||||
|
if _deployment_rpm is None:
|
||||||
|
_deployment_rpm = _deployment.get("rpm", None)
|
||||||
|
if _deployment_rpm is None:
|
||||||
|
_deployment_rpm = _deployment.get("litellm_params", {}).get(
|
||||||
|
"rpm", None
|
||||||
|
)
|
||||||
|
if _deployment_rpm is None:
|
||||||
|
_deployment_rpm = _deployment.get("model_info", {}).get(
|
||||||
|
"rpm", None
|
||||||
|
)
|
||||||
|
if _deployment_rpm is None:
|
||||||
|
_deployment_rpm = float("inf")
|
||||||
|
|
||||||
|
### GET CURRENT RPM ###
|
||||||
|
current_rpm = rpm_values[index]
|
||||||
|
|
||||||
|
deployment_dict[id] = {
|
||||||
|
"current_tpm": current_tpm,
|
||||||
|
"tpm_limit": _deployment_tpm,
|
||||||
|
"current_rpm": current_rpm,
|
||||||
|
"rpm_limit": _deployment_rpm,
|
||||||
|
}
|
||||||
|
raise ValueError(
|
||||||
|
f"{RouterErrors.no_deployments_available.value}. Passed model={model_group}. Deployments={deployment_dict}"
|
||||||
|
)
|
||||||
|
|
||||||
def get_available_deployments(
|
def get_available_deployments(
|
||||||
self,
|
self,
|
||||||
model_group: str,
|
model_group: str,
|
||||||
|
@ -464,7 +520,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
keys=rpm_keys
|
keys=rpm_keys
|
||||||
) # [1, 2, None, ..]
|
) # [1, 2, None, ..]
|
||||||
|
|
||||||
return self._common_checks_available_deployment(
|
deployment = self._common_checks_available_deployment(
|
||||||
model_group=model_group,
|
model_group=model_group,
|
||||||
healthy_deployments=healthy_deployments,
|
healthy_deployments=healthy_deployments,
|
||||||
tpm_keys=tpm_keys,
|
tpm_keys=tpm_keys,
|
||||||
|
@ -474,3 +530,58 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
messages=messages,
|
messages=messages,
|
||||||
input=input,
|
input=input,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
assert deployment is not None
|
||||||
|
return deployment
|
||||||
|
except Exception as e:
|
||||||
|
### GET THE DICT OF TPM / RPM + LIMITS PER DEPLOYMENT ###
|
||||||
|
deployment_dict = {}
|
||||||
|
for index, _deployment in enumerate(healthy_deployments):
|
||||||
|
if isinstance(_deployment, dict):
|
||||||
|
id = _deployment.get("model_info", {}).get("id")
|
||||||
|
### GET DEPLOYMENT TPM LIMIT ###
|
||||||
|
_deployment_tpm = None
|
||||||
|
if _deployment_tpm is None:
|
||||||
|
_deployment_tpm = _deployment.get("tpm", None)
|
||||||
|
if _deployment_tpm is None:
|
||||||
|
_deployment_tpm = _deployment.get("litellm_params", {}).get(
|
||||||
|
"tpm", None
|
||||||
|
)
|
||||||
|
if _deployment_tpm is None:
|
||||||
|
_deployment_tpm = _deployment.get("model_info", {}).get(
|
||||||
|
"tpm", None
|
||||||
|
)
|
||||||
|
if _deployment_tpm is None:
|
||||||
|
_deployment_tpm = float("inf")
|
||||||
|
|
||||||
|
### GET CURRENT TPM ###
|
||||||
|
current_tpm = tpm_values[index]
|
||||||
|
|
||||||
|
### GET DEPLOYMENT TPM LIMIT ###
|
||||||
|
_deployment_rpm = None
|
||||||
|
if _deployment_rpm is None:
|
||||||
|
_deployment_rpm = _deployment.get("rpm", None)
|
||||||
|
if _deployment_rpm is None:
|
||||||
|
_deployment_rpm = _deployment.get("litellm_params", {}).get(
|
||||||
|
"rpm", None
|
||||||
|
)
|
||||||
|
if _deployment_rpm is None:
|
||||||
|
_deployment_rpm = _deployment.get("model_info", {}).get(
|
||||||
|
"rpm", None
|
||||||
|
)
|
||||||
|
if _deployment_rpm is None:
|
||||||
|
_deployment_rpm = float("inf")
|
||||||
|
|
||||||
|
### GET CURRENT RPM ###
|
||||||
|
current_rpm = rpm_values[index]
|
||||||
|
|
||||||
|
deployment_dict[id] = {
|
||||||
|
"current_tpm": current_tpm,
|
||||||
|
"tpm_limit": _deployment_tpm,
|
||||||
|
"current_rpm": current_rpm,
|
||||||
|
"rpm_limit": _deployment_rpm,
|
||||||
|
}
|
||||||
|
raise ValueError(
|
||||||
|
f"{RouterErrors.no_deployments_available.value}. Passed model={model_group}. Deployments={deployment_dict}"
|
||||||
|
)
|
||||||
|
|
|
@ -263,3 +263,4 @@ class RouterErrors(enum.Enum):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
user_defined_ratelimit_error = "Deployment over user-defined ratelimit."
|
user_defined_ratelimit_error = "Deployment over user-defined ratelimit."
|
||||||
|
no_deployments_available = "No deployments available for selected model"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue