forked from phoenix/litellm-mirror
fix(router.py): improve error message returned for fallbacks
This commit is contained in:
parent
2bd993039b
commit
cccc55213b
3 changed files with 85 additions and 63 deletions
|
@ -45,7 +45,7 @@ litellm_settings:
|
||||||
request_timeout: 120
|
request_timeout: 120
|
||||||
allowed_fails: 3
|
allowed_fails: 3
|
||||||
# fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
|
# fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
|
||||||
context_window_fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
|
# context_window_fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2175,73 +2175,93 @@ class Router:
|
||||||
)
|
)
|
||||||
): # don't retry a malformed request
|
): # don't retry a malformed request
|
||||||
raise e
|
raise e
|
||||||
if (
|
if isinstance(e, litellm.ContextWindowExceededError):
|
||||||
isinstance(e, litellm.ContextWindowExceededError)
|
if context_window_fallbacks is not None:
|
||||||
and context_window_fallbacks is not None
|
fallback_model_group = None
|
||||||
):
|
for (
|
||||||
fallback_model_group = None
|
item
|
||||||
for (
|
) in context_window_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}]
|
||||||
item
|
if list(item.keys())[0] == model_group:
|
||||||
) in context_window_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}]
|
fallback_model_group = item[model_group]
|
||||||
if list(item.keys())[0] == model_group:
|
break
|
||||||
fallback_model_group = item[model_group]
|
|
||||||
break
|
|
||||||
|
|
||||||
if fallback_model_group is None:
|
if fallback_model_group is None:
|
||||||
raise original_exception
|
raise original_exception
|
||||||
|
|
||||||
for mg in fallback_model_group:
|
for mg in fallback_model_group:
|
||||||
"""
|
"""
|
||||||
Iterate through the model groups and try calling that deployment
|
Iterate through the model groups and try calling that deployment
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
kwargs["model"] = mg
|
kwargs["model"] = mg
|
||||||
kwargs.setdefault("metadata", {}).update(
|
kwargs.setdefault("metadata", {}).update(
|
||||||
{"model_group": mg}
|
{"model_group": mg}
|
||||||
) # update model_group used, if fallbacks are done
|
) # update model_group used, if fallbacks are done
|
||||||
response = await self.async_function_with_retries(
|
response = await self.async_function_with_retries(
|
||||||
*args, **kwargs
|
*args, **kwargs
|
||||||
|
)
|
||||||
|
verbose_router_logger.info(
|
||||||
|
"Successful fallback b/w models."
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
error_message = "model={}. context_window_fallbacks={}. fallbacks={}.\n\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
|
||||||
|
model_group, context_window_fallbacks, fallbacks
|
||||||
|
)
|
||||||
|
verbose_router_logger.info(
|
||||||
|
msg="Got 'ContextWindowExceededError'. No context_window_fallback set. Defaulting \
|
||||||
|
to fallbacks, if available.{}".format(
|
||||||
|
error_message
|
||||||
)
|
)
|
||||||
verbose_router_logger.info(
|
)
|
||||||
"Successful fallback b/w models."
|
|
||||||
)
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
elif (
|
|
||||||
isinstance(e, litellm.ContentPolicyViolationError)
|
|
||||||
and content_policy_fallbacks is not None
|
|
||||||
):
|
|
||||||
fallback_model_group = None
|
|
||||||
for (
|
|
||||||
item
|
|
||||||
) in content_policy_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}]
|
|
||||||
if list(item.keys())[0] == model_group:
|
|
||||||
fallback_model_group = item[model_group]
|
|
||||||
break
|
|
||||||
|
|
||||||
if fallback_model_group is None:
|
e.message += "\n{}".format(error_message)
|
||||||
raise original_exception
|
elif isinstance(e, litellm.ContentPolicyViolationError):
|
||||||
|
if content_policy_fallbacks is not None:
|
||||||
|
fallback_model_group = None
|
||||||
|
for (
|
||||||
|
item
|
||||||
|
) in content_policy_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}]
|
||||||
|
if list(item.keys())[0] == model_group:
|
||||||
|
fallback_model_group = item[model_group]
|
||||||
|
break
|
||||||
|
|
||||||
for mg in fallback_model_group:
|
if fallback_model_group is None:
|
||||||
"""
|
raise original_exception
|
||||||
Iterate through the model groups and try calling that deployment
|
|
||||||
"""
|
for mg in fallback_model_group:
|
||||||
try:
|
"""
|
||||||
kwargs["model"] = mg
|
Iterate through the model groups and try calling that deployment
|
||||||
kwargs.setdefault("metadata", {}).update(
|
"""
|
||||||
{"model_group": mg}
|
try:
|
||||||
) # update model_group used, if fallbacks are done
|
kwargs["model"] = mg
|
||||||
response = await self.async_function_with_retries(
|
kwargs.setdefault("metadata", {}).update(
|
||||||
*args, **kwargs
|
{"model_group": mg}
|
||||||
|
) # update model_group used, if fallbacks are done
|
||||||
|
response = await self.async_function_with_retries(
|
||||||
|
*args, **kwargs
|
||||||
|
)
|
||||||
|
verbose_router_logger.info(
|
||||||
|
"Successful fallback b/w models."
|
||||||
|
)
|
||||||
|
return response
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
error_message = "model={}. content_policy_fallback={}. fallbacks={}.\n\nSet 'content_policy_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format(
|
||||||
|
model_group, content_policy_fallbacks, fallbacks
|
||||||
|
)
|
||||||
|
verbose_router_logger.info(
|
||||||
|
msg="Got 'ContentPolicyViolationError'. No content_policy_fallback set. Defaulting \
|
||||||
|
to fallbacks, if available.{}".format(
|
||||||
|
error_message
|
||||||
)
|
)
|
||||||
verbose_router_logger.info(
|
)
|
||||||
"Successful fallback b/w models."
|
|
||||||
)
|
e.message += "\n{}".format(error_message)
|
||||||
return response
|
if fallbacks is not None:
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
elif fallbacks is not None:
|
|
||||||
verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}")
|
verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}")
|
||||||
generic_fallback_idx: Optional[int] = None
|
generic_fallback_idx: Optional[int] = None
|
||||||
## check for specific model group-specific fallbacks
|
## check for specific model group-specific fallbacks
|
||||||
|
|
|
@ -1129,7 +1129,9 @@ async def test_router_content_policy_fallbacks(
|
||||||
mock_response = Exception("content filtering policy")
|
mock_response = Exception("content filtering policy")
|
||||||
else:
|
else:
|
||||||
mock_response = litellm.ModelResponse(
|
mock_response = litellm.ModelResponse(
|
||||||
choices=[litellm.Choices(finish_reason="content_filter")]
|
choices=[litellm.Choices(finish_reason="content_filter")],
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
usage=litellm.Usage(prompt_tokens=10, completion_tokens=0, total_tokens=10),
|
||||||
)
|
)
|
||||||
router = Router(
|
router = Router(
|
||||||
model_list=[
|
model_list=[
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue