diff --git a/litellm/router.py b/litellm/router.py index 7542108024..2f72b81428 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2337,7 +2337,7 @@ class Router: original_exception = e fallback_model_group = None try: - verbose_router_logger.debug(f"Trying to fallback b/w models") + verbose_router_logger.debug("Trying to fallback b/w models") if ( hasattr(e, "status_code") and e.status_code == 400 # type: ignore @@ -2346,6 +2346,9 @@ class Router: or isinstance(e, litellm.ContentPolicyViolationError) ) ): # don't retry a malformed request + verbose_router_logger.debug( + "Not retrying request as it's malformed. Status code=400." + ) raise e if isinstance(e, litellm.ContextWindowExceededError): if context_window_fallbacks is not None: @@ -2484,6 +2487,12 @@ class Router: except Exception as e: verbose_router_logger.error(f"An exception occurred - {str(e)}") verbose_router_logger.debug(traceback.format_exc()) + + if hasattr(original_exception, "message"): + # add the available fallbacks to the exception + original_exception.message += "\nReceived Model Group={}\nAvailable Model Group Fallbacks={}".format( + model_group, fallback_model_group + ) raise original_exception async def async_function_with_retries(self, *args, **kwargs): diff --git a/litellm/utils.py b/litellm/utils.py index 3265f15864..c31c053e79 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4319,6 +4319,7 @@ def get_formatted_prompt( prompt = data["prompt"] return prompt + def get_response_string(response_obj: ModelResponse) -> str: _choices: List[Union[Choices, StreamingChoices]] = response_obj.choices @@ -7720,11 +7721,6 @@ def exception_type( llm_provider="azure", model=model, litellm_debug_info=extra_information, - response=httpx.Response( - status_code=400, - content=str(original_exception), - request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"), # type: ignore - ), ) elif "This model's maximum context length is" in error_str: exception_mapping_worked = True @@ -7733,7 +7729,6 @@ def exception_type( llm_provider="azure", model=model, litellm_debug_info=extra_information, - response=original_exception.response, ) elif "DeploymentNotFound" in error_str: exception_mapping_worked = True @@ -7742,7 +7737,6 @@ def exception_type( llm_provider="azure", model=model, litellm_debug_info=extra_information, - response=original_exception.response, ) elif ( ( @@ -7762,7 +7756,6 @@ def exception_type( llm_provider="azure", model=model, litellm_debug_info=extra_information, - response=getattr(original_exception, "response", None), ) elif "invalid_request_error" in error_str: exception_mapping_worked = True @@ -7771,7 +7764,6 @@ def exception_type( llm_provider="azure", model=model, litellm_debug_info=extra_information, - response=getattr(original_exception, "response", None), ) elif ( "The api_key client option must be set either by passing api_key to the client or by setting" @@ -7783,7 +7775,6 @@ def exception_type( llm_provider=custom_llm_provider, model=model, litellm_debug_info=extra_information, - response=original_exception.response, ) elif hasattr(original_exception, "status_code"): exception_mapping_worked = True @@ -7794,7 +7785,6 @@ def exception_type( llm_provider="azure", model=model, litellm_debug_info=extra_information, - response=original_exception.response, ) elif original_exception.status_code == 401: exception_mapping_worked = True @@ -7803,7 +7793,6 @@ def exception_type( llm_provider="azure", model=model, litellm_debug_info=extra_information, - response=original_exception.response, ) elif original_exception.status_code == 408: exception_mapping_worked = True @@ -7820,7 +7809,6 @@ def exception_type( model=model, llm_provider="azure", litellm_debug_info=extra_information, - response=original_exception.response, ) elif original_exception.status_code == 429: exception_mapping_worked = True @@ -7829,7 +7817,6 @@ def exception_type( model=model, llm_provider="azure", litellm_debug_info=extra_information, - response=original_exception.response, ) elif original_exception.status_code == 503: exception_mapping_worked = True @@ -7838,7 +7825,6 @@ def exception_type( model=model, llm_provider="azure", litellm_debug_info=extra_information, - response=original_exception.response, ) elif original_exception.status_code == 504: # gateway timeout error exception_mapping_worked = True