diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 75545bb60..938e74b5e 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -45,7 +45,7 @@ litellm_settings: request_timeout: 120 allowed_fails: 3 # fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}] - context_window_fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}] + # context_window_fallbacks: [{"summarize": ["summarize-l", "summarize-xl"]}, {"summarize-l": ["summarize-xl"]}] diff --git a/litellm/router.py b/litellm/router.py index 8256a6752..840df5b54 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2175,73 +2175,93 @@ class Router: ) ): # don't retry a malformed request raise e - if ( - isinstance(e, litellm.ContextWindowExceededError) - and context_window_fallbacks is not None - ): - fallback_model_group = None - for ( - item - ) in context_window_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}] - if list(item.keys())[0] == model_group: - fallback_model_group = item[model_group] - break + if isinstance(e, litellm.ContextWindowExceededError): + if context_window_fallbacks is not None: + fallback_model_group = None + for ( + item + ) in context_window_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}] + if list(item.keys())[0] == model_group: + fallback_model_group = item[model_group] + break - if fallback_model_group is None: - raise original_exception + if fallback_model_group is None: + raise original_exception - for mg in fallback_model_group: - """ - Iterate through the model groups and try calling that deployment - """ - try: - kwargs["model"] = mg - kwargs.setdefault("metadata", {}).update( - {"model_group": mg} - ) # update model_group used, if fallbacks are done - response = await self.async_function_with_retries( - *args, **kwargs + for mg in fallback_model_group: + """ + Iterate through the model groups and try calling that deployment + """ + try: + kwargs["model"] = mg + kwargs.setdefault("metadata", {}).update( + {"model_group": mg} + ) # update model_group used, if fallbacks are done + response = await self.async_function_with_retries( + *args, **kwargs + ) + verbose_router_logger.info( + "Successful fallback b/w models." + ) + return response + except Exception as e: + pass + else: + error_message = "model={}. context_window_fallbacks={}. fallbacks={}.\n\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format( + model_group, context_window_fallbacks, fallbacks + ) + verbose_router_logger.info( + msg="Got 'ContextWindowExceededError'. No context_window_fallback set. Defaulting \ + to fallbacks, if available.{}".format( + error_message ) - verbose_router_logger.info( - "Successful fallback b/w models." - ) - return response - except Exception as e: - pass - elif ( - isinstance(e, litellm.ContentPolicyViolationError) - and content_policy_fallbacks is not None - ): - fallback_model_group = None - for ( - item - ) in content_policy_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}] - if list(item.keys())[0] == model_group: - fallback_model_group = item[model_group] - break + ) - if fallback_model_group is None: - raise original_exception + e.message += "\n{}".format(error_message) + elif isinstance(e, litellm.ContentPolicyViolationError): + if content_policy_fallbacks is not None: + fallback_model_group = None + for ( + item + ) in content_policy_fallbacks: # [{"gpt-3.5-turbo": ["gpt-4"]}] + if list(item.keys())[0] == model_group: + fallback_model_group = item[model_group] + break - for mg in fallback_model_group: - """ - Iterate through the model groups and try calling that deployment - """ - try: - kwargs["model"] = mg - kwargs.setdefault("metadata", {}).update( - {"model_group": mg} - ) # update model_group used, if fallbacks are done - response = await self.async_function_with_retries( - *args, **kwargs + if fallback_model_group is None: + raise original_exception + + for mg in fallback_model_group: + """ + Iterate through the model groups and try calling that deployment + """ + try: + kwargs["model"] = mg + kwargs.setdefault("metadata", {}).update( + {"model_group": mg} + ) # update model_group used, if fallbacks are done + response = await self.async_function_with_retries( + *args, **kwargs + ) + verbose_router_logger.info( + "Successful fallback b/w models." + ) + return response + except Exception as e: + pass + else: + error_message = "model={}. content_policy_fallback={}. fallbacks={}.\n\nSet 'content_policy_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format( + model_group, content_policy_fallbacks, fallbacks + ) + verbose_router_logger.info( + msg="Got 'ContentPolicyViolationError'. No content_policy_fallback set. Defaulting \ + to fallbacks, if available.{}".format( + error_message ) - verbose_router_logger.info( - "Successful fallback b/w models." - ) - return response - except Exception as e: - pass - elif fallbacks is not None: + ) + + e.message += "\n{}".format(error_message) + if fallbacks is not None: verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}") generic_fallback_idx: Optional[int] = None ## check for specific model group-specific fallbacks diff --git a/litellm/tests/test_router_fallbacks.py b/litellm/tests/test_router_fallbacks.py index 99d2a600c..2c552a64b 100644 --- a/litellm/tests/test_router_fallbacks.py +++ b/litellm/tests/test_router_fallbacks.py @@ -1129,7 +1129,9 @@ async def test_router_content_policy_fallbacks( mock_response = Exception("content filtering policy") else: mock_response = litellm.ModelResponse( - choices=[litellm.Choices(finish_reason="content_filter")] + choices=[litellm.Choices(finish_reason="content_filter")], + model="gpt-3.5-turbo", + usage=litellm.Usage(prompt_tokens=10, completion_tokens=0, total_tokens=10), ) router = Router( model_list=[