mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
feat(router.py): add flag for mock testing loadbalancing for rate limit errors
This commit is contained in:
parent
ed8b20fa18
commit
6b8806b45f
2 changed files with 36 additions and 11 deletions
|
@ -1,7 +1,10 @@
|
|||
model_list:
|
||||
- model_name: "*"
|
||||
- model_name: "gpt-4"
|
||||
litellm_params:
|
||||
model: "*"
|
||||
|
||||
# litellm_settings:
|
||||
# failure_callback: ["langfuse"]
|
||||
model: "gpt-4"
|
||||
- model_name: "gpt-4"
|
||||
litellm_params:
|
||||
model: "gpt-4o"
|
||||
- model_name: "gpt-4o-mini"
|
||||
litellm_params:
|
||||
model: "gpt-4o-mini"
|
|
@ -2468,6 +2468,8 @@ class Router:
|
|||
verbose_router_logger.info(
|
||||
f"No fallback model group found for original model_group={model_group}. Fallbacks={fallbacks}"
|
||||
)
|
||||
if hasattr(original_exception, "message"):
|
||||
original_exception.message += f"No fallback model group found for original model_group={model_group}. Fallbacks={fallbacks}"
|
||||
raise original_exception
|
||||
for mg in fallback_model_group:
|
||||
"""
|
||||
|
@ -2492,14 +2494,19 @@ class Router:
|
|||
return response
|
||||
except Exception as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
verbose_router_logger.error(f"An exception occurred - {str(e)}")
|
||||
verbose_router_logger.debug(traceback.format_exc())
|
||||
except Exception as new_exception:
|
||||
verbose_router_logger.error(
|
||||
"litellm.router.py::async_function_with_fallbacks() - Error occurred while trying to do fallbacks - {}\n{}".format(
|
||||
str(new_exception), traceback.format_exc()
|
||||
)
|
||||
)
|
||||
|
||||
if hasattr(original_exception, "message"):
|
||||
# add the available fallbacks to the exception
|
||||
original_exception.message += "\nReceived Model Group={}\nAvailable Model Group Fallbacks={}".format(
|
||||
model_group, fallback_model_group
|
||||
original_exception.message += "\nReceived Model Group={}\nAvailable Model Group Fallbacks={}\nCooldown Deployments={}".format(
|
||||
model_group,
|
||||
fallback_model_group,
|
||||
await self._async_get_cooldown_deployments_with_debug_info(),
|
||||
)
|
||||
raise original_exception
|
||||
|
||||
|
@ -2508,6 +2515,9 @@ class Router:
|
|||
f"Inside async function with retries: args - {args}; kwargs - {kwargs}"
|
||||
)
|
||||
original_function = kwargs.pop("original_function")
|
||||
mock_testing_rate_limit_error = kwargs.pop(
|
||||
"mock_testing_rate_limit_error", None
|
||||
)
|
||||
fallbacks = kwargs.pop("fallbacks", self.fallbacks)
|
||||
context_window_fallbacks = kwargs.pop(
|
||||
"context_window_fallbacks", self.context_window_fallbacks
|
||||
|
@ -2515,13 +2525,25 @@ class Router:
|
|||
content_policy_fallbacks = kwargs.pop(
|
||||
"content_policy_fallbacks", self.content_policy_fallbacks
|
||||
)
|
||||
|
||||
model_group = kwargs.get("model")
|
||||
num_retries = kwargs.pop("num_retries")
|
||||
|
||||
verbose_router_logger.debug(
|
||||
f"async function w/ retries: original_function - {original_function}, num_retries - {num_retries}"
|
||||
)
|
||||
try:
|
||||
if (
|
||||
mock_testing_rate_limit_error is not None
|
||||
and mock_testing_rate_limit_error is True
|
||||
):
|
||||
verbose_router_logger.info(
|
||||
"litellm.router.py::async_function_with_retries() - mock_testing_rate_limit_error=True. Raising litellm.RateLimitError."
|
||||
)
|
||||
raise litellm.RateLimitError(
|
||||
model=model_group,
|
||||
llm_provider="",
|
||||
message=f"This is a mock exception for model={model_group}, to trigger a rate limit error.",
|
||||
)
|
||||
# if the function call is successful, no exception will be raised and we'll break out of the loop
|
||||
response = await original_function(*args, **kwargs)
|
||||
return response
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue