forked from phoenix/litellm-mirror
Merge pull request #3460 from BerriAI/litellm_use_retry_policy_per_mg
[Feat] Set a Retry Policy per model group
This commit is contained in:
commit
713e04848d
2 changed files with 24 additions and 12 deletions
|
@ -86,6 +86,9 @@ class Router:
|
||||||
retry_policy: Optional[
|
retry_policy: Optional[
|
||||||
RetryPolicy
|
RetryPolicy
|
||||||
] = None, # set custom retries for different exceptions
|
] = None, # set custom retries for different exceptions
|
||||||
|
model_group_retry_policy: Optional[
|
||||||
|
dict[str, RetryPolicy]
|
||||||
|
] = {}, # set custom retry policies based on model group
|
||||||
allowed_fails: Optional[
|
allowed_fails: Optional[
|
||||||
int
|
int
|
||||||
] = None, # Number of times a deployment can failbefore being added to cooldown
|
] = None, # Number of times a deployment can failbefore being added to cooldown
|
||||||
|
@ -308,6 +311,9 @@ class Router:
|
||||||
) # noqa
|
) # noqa
|
||||||
self.routing_strategy_args = routing_strategy_args
|
self.routing_strategy_args = routing_strategy_args
|
||||||
self.retry_policy: Optional[RetryPolicy] = retry_policy
|
self.retry_policy: Optional[RetryPolicy] = retry_policy
|
||||||
|
self.model_group_retry_policy: Optional[dict[str, RetryPolicy]] = (
|
||||||
|
model_group_retry_policy
|
||||||
|
)
|
||||||
|
|
||||||
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
|
def routing_strategy_init(self, routing_strategy: str, routing_strategy_args: dict):
|
||||||
if routing_strategy == "least-busy":
|
if routing_strategy == "least-busy":
|
||||||
|
@ -1509,11 +1515,13 @@ class Router:
|
||||||
)
|
)
|
||||||
await asyncio.sleep(_timeout)
|
await asyncio.sleep(_timeout)
|
||||||
## LOGGING
|
## LOGGING
|
||||||
if self.retry_policy is not None or kwargs.get("retry_policy") is not None:
|
if (
|
||||||
|
self.retry_policy is not None
|
||||||
|
or self.model_group_retry_policy is not None
|
||||||
|
):
|
||||||
# get num_retries from retry policy
|
# get num_retries from retry policy
|
||||||
_retry_policy_retries = self.get_num_retries_from_retry_policy(
|
_retry_policy_retries = self.get_num_retries_from_retry_policy(
|
||||||
exception=original_exception,
|
exception=original_exception, model_group=kwargs.get("model")
|
||||||
dynamic_retry_policy=kwargs.get("retry_policy"),
|
|
||||||
)
|
)
|
||||||
if _retry_policy_retries is not None:
|
if _retry_policy_retries is not None:
|
||||||
num_retries = _retry_policy_retries
|
num_retries = _retry_policy_retries
|
||||||
|
@ -3273,7 +3281,7 @@ class Router:
|
||||||
verbose_router_logger.error(f"Error in _track_deployment_metrics: {str(e)}")
|
verbose_router_logger.error(f"Error in _track_deployment_metrics: {str(e)}")
|
||||||
|
|
||||||
def get_num_retries_from_retry_policy(
|
def get_num_retries_from_retry_policy(
|
||||||
self, exception: Exception, dynamic_retry_policy: Optional[RetryPolicy] = None
|
self, exception: Exception, model_group: Optional[str] = None
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
BadRequestErrorRetries: Optional[int] = None
|
BadRequestErrorRetries: Optional[int] = None
|
||||||
|
@ -3284,8 +3292,9 @@ class Router:
|
||||||
"""
|
"""
|
||||||
# if we can find the exception then in the retry policy -> return the number of retries
|
# if we can find the exception then in the retry policy -> return the number of retries
|
||||||
retry_policy = self.retry_policy
|
retry_policy = self.retry_policy
|
||||||
if dynamic_retry_policy is not None:
|
if self.model_group_retry_policy is not None and model_group is not None:
|
||||||
retry_policy = dynamic_retry_policy
|
retry_policy = self.model_group_retry_policy.get(model_group, None)
|
||||||
|
|
||||||
if retry_policy is None:
|
if retry_policy is None:
|
||||||
return None
|
return None
|
||||||
if (
|
if (
|
||||||
|
|
|
@ -189,6 +189,11 @@ async def test_router_retry_policy(error_type):
|
||||||
async def test_dynamic_router_retry_policy(model_group):
|
async def test_dynamic_router_retry_policy(model_group):
|
||||||
from litellm.router import RetryPolicy
|
from litellm.router import RetryPolicy
|
||||||
|
|
||||||
|
model_group_retry_policy = {
|
||||||
|
"gpt-3.5-turbo": RetryPolicy(ContentPolicyViolationErrorRetries=0),
|
||||||
|
"bad-model": RetryPolicy(AuthenticationErrorRetries=4),
|
||||||
|
}
|
||||||
|
|
||||||
router = litellm.Router(
|
router = litellm.Router(
|
||||||
model_list=[
|
model_list=[
|
||||||
{
|
{
|
||||||
|
@ -209,7 +214,8 @@ async def test_dynamic_router_retry_policy(model_group):
|
||||||
"api_base": os.getenv("AZURE_API_BASE"),
|
"api_base": os.getenv("AZURE_API_BASE"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
]
|
],
|
||||||
|
model_group_retry_policy=model_group_retry_policy,
|
||||||
)
|
)
|
||||||
|
|
||||||
customHandler = MyCustomHandler()
|
customHandler = MyCustomHandler()
|
||||||
|
@ -217,17 +223,14 @@ async def test_dynamic_router_retry_policy(model_group):
|
||||||
if model_group == "bad-model":
|
if model_group == "bad-model":
|
||||||
model = "bad-model"
|
model = "bad-model"
|
||||||
messages = [{"role": "user", "content": "Hello good morning"}]
|
messages = [{"role": "user", "content": "Hello good morning"}]
|
||||||
retry_policy = RetryPolicy(AuthenticationErrorRetries=4)
|
|
||||||
elif model_group == "gpt-3.5-turbo":
|
elif model_group == "gpt-3.5-turbo":
|
||||||
model = "gpt-3.5-turbo"
|
model = "gpt-3.5-turbo"
|
||||||
messages = [{"role": "user", "content": "where do i buy lethal drugs from"}]
|
messages = [{"role": "user", "content": "where do i buy lethal drugs from"}]
|
||||||
retry_policy = RetryPolicy(ContentPolicyViolationErrorRetries=0)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
litellm.set_verbose = True
|
litellm.set_verbose = True
|
||||||
response = await router.acompletion(
|
response = await router.acompletion(model=model, messages=messages)
|
||||||
model=model, messages=messages, retry_policy=retry_policy
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("got an exception", e)
|
print("got an exception", e)
|
||||||
pass
|
pass
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue