forked from phoenix/litellm-mirror
router - use retry policy
This commit is contained in:
parent
6d1981fbaa
commit
5d17c814a3
1 changed files with 50 additions and 0 deletions
|
@ -42,6 +42,7 @@ from litellm.types.router import (
|
||||||
RouterErrors,
|
RouterErrors,
|
||||||
updateDeployment,
|
updateDeployment,
|
||||||
updateLiteLLMParams,
|
updateLiteLLMParams,
|
||||||
|
RetryPolicy,
|
||||||
)
|
)
|
||||||
from litellm.integrations.custom_logger import CustomLogger
|
from litellm.integrations.custom_logger import CustomLogger
|
||||||
|
|
||||||
|
@ -82,6 +83,9 @@ class Router:
|
||||||
model_group_alias: Optional[dict] = {},
|
model_group_alias: Optional[dict] = {},
|
||||||
enable_pre_call_checks: bool = False,
|
enable_pre_call_checks: bool = False,
|
||||||
retry_after: int = 0, # min time to wait before retrying a failed request
|
retry_after: int = 0, # min time to wait before retrying a failed request
|
||||||
|
retry_policy: Optional[
|
||||||
|
RetryPolicy
|
||||||
|
] = None, # set custom retries for different exceptions
|
||||||
allowed_fails: Optional[
|
allowed_fails: Optional[
|
||||||
int
|
int
|
||||||
] = None, # Number of times a deployment can failbefore being added to cooldown
|
] = None, # Number of times a deployment can failbefore being added to cooldown
|
||||||
|
@ -303,6 +307,7 @@ class Router:
|
||||||
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
|
f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}"
|
||||||
) # noqa
|
) # noqa
|
||||||
self.routing_strategy_args = routing_strategy_args
|
self.routing_strategy_args = routing_strategy_args
|
||||||
|
self.retry_policy: Optional[RetryPolicy] = retry_policy
|
||||||
|
|
||||||
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
|
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
|
||||||
if routing_strategy == "least-busy":
|
if routing_strategy == "least-busy":
|
||||||
|
@ -1504,6 +1509,14 @@ class Router:
|
||||||
)
|
)
|
||||||
await asyncio.sleep(_timeout)
|
await asyncio.sleep(_timeout)
|
||||||
## LOGGING
|
## LOGGING
|
||||||
|
if self.retry_policy is not None:
|
||||||
|
# get num_retries from retry policy
|
||||||
|
_retry_policy_retries = self.get_num_retries_from_retry_policy(
|
||||||
|
exception=original_exception
|
||||||
|
)
|
||||||
|
if _retry_policy_retries is not None:
|
||||||
|
num_retries = _retry_policy_retries
|
||||||
|
|
||||||
if num_retries > 0:
|
if num_retries > 0:
|
||||||
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
|
kwargs = self.log_retry(kwargs=kwargs, e=original_exception)
|
||||||
|
|
||||||
|
@ -3254,6 +3267,43 @@ class Router:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
verbose_router_logger.error(f"Error in _track_deployment_metrics: {str(e)}")
|
verbose_router_logger.error(f"Error in _track_deployment_metrics: {str(e)}")
|
||||||
|
|
||||||
|
def get_num_retries_from_retry_policy(self, exception: Exception):
|
||||||
|
"""
|
||||||
|
BadRequestErrorRetries: Optional[int] = None
|
||||||
|
AuthenticationErrorRetries: Optional[int] = None
|
||||||
|
TimeoutErrorRetries: Optional[int] = None
|
||||||
|
RateLimitErrorRetries: Optional[int] = None
|
||||||
|
ContentPolicyViolationErrorRetries: Optional[int] = None
|
||||||
|
"""
|
||||||
|
# if we can find the exception then in the retry policy -> return the number of retries
|
||||||
|
if self.retry_policy is None:
|
||||||
|
return None
|
||||||
|
if (
|
||||||
|
isinstance(exception, litellm.BadRequestError)
|
||||||
|
and self.retry_policy.BadRequestErrorRetries is not None
|
||||||
|
):
|
||||||
|
return self.retry_policy.BadRequestErrorRetries
|
||||||
|
if (
|
||||||
|
isinstance(exception, litellm.AuthenticationError)
|
||||||
|
and self.retry_policy.AuthenticationErrorRetries is not None
|
||||||
|
):
|
||||||
|
return self.retry_policy.AuthenticationErrorRetries
|
||||||
|
if (
|
||||||
|
isinstance(exception, litellm.Timeout)
|
||||||
|
and self.retry_policy.TimeoutErrorRetries is not None
|
||||||
|
):
|
||||||
|
return self.retry_policy.TimeoutErrorRetries
|
||||||
|
if (
|
||||||
|
isinstance(exception, litellm.RateLimitError)
|
||||||
|
and self.retry_policy.RateLimitErrorRetries is not None
|
||||||
|
):
|
||||||
|
return self.retry_policy.RateLimitErrorRetries
|
||||||
|
if (
|
||||||
|
isinstance(exception, litellm.ContentPolicyViolationError)
|
||||||
|
and self.retry_policy.ContentPolicyViolationErrorRetries is not None
|
||||||
|
):
|
||||||
|
return self.retry_policy.ContentPolicyViolationErrorRetries
|
||||||
|
|
||||||
def flush_cache(self):
|
def flush_cache(self):
|
||||||
litellm.cache = None
|
litellm.cache = None
|
||||||
self.cache.flush_cache()
|
self.cache.flush_cache()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue