feat(router.py): add flag for mock testing loadbalancing for rate limit errors

This commit is contained in:
Krrish Dholakia 2024-08-03 12:34:11 -07:00
parent ed8b20fa18
commit 6b8806b45f
2 changed files with 36 additions and 11 deletions

View file

@ -1,7 +1,10 @@
model_list:
- model_name: "*"
- model_name: "gpt-4"
litellm_params:
model: "*"
# litellm_settings:
# failure_callback: ["langfuse"]
model: "gpt-4"
- model_name: "gpt-4"
litellm_params:
model: "gpt-4o"
- model_name: "gpt-4o-mini"
litellm_params:
model: "gpt-4o-mini"

View file

@ -2468,6 +2468,8 @@ class Router:
verbose_router_logger.info(
f"No fallback model group found for original model_group={model_group}. Fallbacks={fallbacks}"
)
if hasattr(original_exception, "message"):
original_exception.message += f"No fallback model group found for original model_group={model_group}. Fallbacks={fallbacks}"
raise original_exception
for mg in fallback_model_group:
"""
@ -2492,14 +2494,19 @@ class Router:
return response
except Exception as e:
raise e
except Exception as e:
verbose_router_logger.error(f"An exception occurred - {str(e)}")
verbose_router_logger.debug(traceback.format_exc())
except Exception as new_exception:
verbose_router_logger.error(
"litellm.router.py::async_function_with_fallbacks() - Error occurred while trying to do fallbacks - {}\n{}".format(
str(new_exception), traceback.format_exc()
)
)
if hasattr(original_exception, "message"):
# add the available fallbacks to the exception
original_exception.message += "\nReceived Model Group={}\nAvailable Model Group Fallbacks={}".format(
model_group, fallback_model_group
original_exception.message += "\nReceived Model Group={}\nAvailable Model Group Fallbacks={}\nCooldown Deployments={}".format(
model_group,
fallback_model_group,
await self._async_get_cooldown_deployments_with_debug_info(),
)
raise original_exception
@ -2508,6 +2515,9 @@ class Router:
f"Inside async function with retries: args - {args}; kwargs - {kwargs}"
)
original_function = kwargs.pop("original_function")
mock_testing_rate_limit_error = kwargs.pop(
"mock_testing_rate_limit_error", None
)
fallbacks = kwargs.pop("fallbacks", self.fallbacks)
context_window_fallbacks = kwargs.pop(
"context_window_fallbacks", self.context_window_fallbacks
@ -2515,13 +2525,25 @@ class Router:
content_policy_fallbacks = kwargs.pop(
"content_policy_fallbacks", self.content_policy_fallbacks
)
model_group = kwargs.get("model")
num_retries = kwargs.pop("num_retries")
verbose_router_logger.debug(
f"async function w/ retries: original_function - {original_function}, num_retries - {num_retries}"
)
try:
if (
mock_testing_rate_limit_error is not None
and mock_testing_rate_limit_error is True
):
verbose_router_logger.info(
"litellm.router.py::async_function_with_retries() - mock_testing_rate_limit_error=True. Raising litellm.RateLimitError."
)
raise litellm.RateLimitError(
model=model_group,
llm_provider="",
message=f"This is a mock exception for model={model_group}, to trigger a rate limit error.",
)
# if the function call is successful, no exception will be raised and we'll break out of the loop
response = await original_function(*args, **kwargs)
return response