fix(router.py): improve error message returned for fallbacks

This commit is contained in:
Krrish Dholakia 2024-06-25 11:26:56 -07:00
parent 2bd993039b
commit cccc55213b
3 changed files with 85 additions and 63 deletions

View file

@ -45,7 +45,7 @@ litellm_settings:
request_timeout: 120 request_timeout: 120
allowed_fails: 3 allowed_fails: 3
# fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}] # fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
context_window_fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}] # context_window_fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]

View file

@ -2175,73 +2175,93 @@ class Router:
) )
): # don't retry a malformed request ): # don't retry a malformed request
raise e raise e
if ( if isinstance(e, litellm.ContextWindowExceededError):
isinstance(e, litellm.ContextWindowExceededError) if context_window_fallbacks is not None:
and context_window_fallbacks is not None fallback_model_group = None
): for (
fallback_model_group = None item
for ( ) in context_window_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}]
item if list(item.keys())[0] == model_group:
) in context_window_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}] fallback_model_group = item[model_group]
if list(item.keys())[0] == model_group: break
fallback_model_group = item[model_group]
break
if fallback_model_group is None: if fallback_model_group is None:
raise original_exception raise original_exception
for mg in fallback_model_group: for mg in fallback_model_group:
""" """
Iterate through the model groups and try calling that deployment Iterate through the model groups and try calling that deployment
""" """
try: try:
kwargs["model"] = mg kwargs["model"] = mg
kwargs.setdefault("metadata", {}).update( kwargs.setdefault("metadata", {}).update(
{"model_group": mg} {"model_group": mg}
) # update model_group used, if fallbacks are done ) # update model_group used, if fallbacks are done
response = await self.async_function_with_retries( response = await self.async_function_with_retries(
*args, **kwargs *args, **kwargs
)
verbose_router_logger.info(
"Successful fallback b/w models."
)
return response
except Exception as e:
pass
else:
error_message = "model={}. context_window_fallbacks={}. fallbacks={}.\n\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
model_group, context_window_fallbacks, fallbacks
)
verbose_router_logger.info(
msg="Got 'ContextWindowExceededError'. No context_window_fallback set. Defaulting \
to fallbacks, if available.{}".format(
error_message
) )
verbose_router_logger.info( )
"Successful fallback b/w models."
)
return response
except Exception as e:
pass
elif (
isinstance(e, litellm.ContentPolicyViolationError)
and content_policy_fallbacks is not None
):
fallback_model_group = None
for (
item
) in content_policy_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}]
if list(item.keys())[0] == model_group:
fallback_model_group = item[model_group]
break
if fallback_model_group is None: e.message += "\n{}".format(error_message)
raise original_exception elif isinstance(e, litellm.ContentPolicyViolationError):
if content_policy_fallbacks is not None:
fallback_model_group = None
for (
item
) in content_policy_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}]
if list(item.keys())[0] == model_group:
fallback_model_group = item[model_group]
break
for mg in fallback_model_group: if fallback_model_group is None:
""" raise original_exception
Iterate through the model groups and try calling that deployment
""" for mg in fallback_model_group:
try: """
kwargs["model"] = mg Iterate through the model groups and try calling that deployment
kwargs.setdefault("metadata", {}).update( """
{"model_group": mg} try:
) # update model_group used, if fallbacks are done kwargs["model"] = mg
response = await self.async_function_with_retries( kwargs.setdefault("metadata", {}).update(
*args, **kwargs {"model_group": mg}
) # update model_group used, if fallbacks are done
response = await self.async_function_with_retries(
*args, **kwargs
)
verbose_router_logger.info(
"Successful fallback b/w models."
)
return response
except Exception as e:
pass
else:
error_message = "model={}. content_policy_fallback={}. fallbacks={}.\n\nSet 'content_policy_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
model_group, content_policy_fallbacks, fallbacks
)
verbose_router_logger.info(
msg="Got 'ContentPolicyViolationError'. No content_policy_fallback set. Defaulting \
to fallbacks, if available.{}".format(
error_message
) )
verbose_router_logger.info( )
"Successful fallback b/w models."
) e.message += "\n{}".format(error_message)
return response if fallbacks is not None:
except Exception as e:
pass
elif fallbacks is not None:
verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}") verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}")
generic_fallback_idx: Optional[int] = None generic_fallback_idx: Optional[int] = None
## check for specific model group-specific fallbacks ## check for specific model group-specific fallbacks

View file

@ -1129,7 +1129,9 @@ async def test_router_content_policy_fallbacks(
mock_response = Exception("content filtering policy") mock_response = Exception("content filtering policy")
else: else:
mock_response = litellm.ModelResponse( mock_response = litellm.ModelResponse(
choices=[litellm.Choices(finish_reason="content_filter")] choices=[litellm.Choices(finish_reason="content_filter")],
model="gpt-3.5-turbo",
usage=litellm.Usage(prompt_tokens=10, completion_tokens=0, total_tokens=10),
) )
router = Router( router = Router(
model_list=[ model_list=[