From f6d97c25f2fc4f669d8a4170c9f028a6af4c174a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 20 Aug 2024 12:55:36 -0700 Subject: [PATCH] fix run sync fallbacks --- litellm/router.py | 71 +++++++------------ .../router_utils/fallback_event_handlers.py | 31 ++++++++ 2 files changed, 57 insertions(+), 45 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 0f3477520..27ac14b9f 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -63,6 +63,7 @@ from litellm.router_utils.fallback_event_handlers import ( log_failure_fallback_event, log_success_fallback_event, run_async_fallback, + run_sync_fallback, ) from litellm.router_utils.handle_error import send_llm_exception_alert from litellm.scheduler import FlowItem, Scheduler @@ -2700,6 +2701,7 @@ class Router: return response except Exception as e: original_exception = e + original_model_group = kwargs.get("model") verbose_router_logger.debug(f"An exception occurs {original_exception}") try: verbose_router_logger.debug( @@ -2721,21 +2723,14 @@ class Router: if fallback_model_group is None: raise original_exception - for mg in fallback_model_group: - """ - Iterate through the model groups and try calling that deployment - """ - try: - ## LOGGING - kwargs = self.log_retry(kwargs=kwargs, e=original_exception) - kwargs["model"] = mg - kwargs.setdefault("metadata", {}).update( - {"model_group": mg} - ) # update model_group used, if fallbacks are done - response = self.function_with_fallbacks(*args, **kwargs) - return response - except Exception as e: - pass + return run_sync_fallback( + *args, + litellm_router=self, + fallback_model_group=fallback_model_group, + original_model_group=original_model_group, + original_exception=original_exception, + **kwargs, + ) elif ( isinstance(e, litellm.ContentPolicyViolationError) and content_policy_fallbacks is not None @@ -2752,21 +2747,14 @@ class Router: if fallback_model_group is None: raise original_exception - for mg in fallback_model_group: - """ - Iterate through the model groups and try calling that deployment - """ - try: - ## LOGGING - kwargs = self.log_retry(kwargs=kwargs, e=original_exception) - kwargs["model"] = mg - kwargs.setdefault("metadata", {}).update( - {"model_group": mg} - ) # update model_group used, if fallbacks are done - response = self.function_with_fallbacks(*args, **kwargs) - return response - except Exception as e: - pass + return run_sync_fallback( + *args, + litellm_router=self, + fallback_model_group=fallback_model_group, + original_model_group=original_model_group, + original_exception=original_exception, + **kwargs, + ) elif fallbacks is not None: verbose_router_logger.debug(f"inside model fallbacks: {fallbacks}") fallback_model_group = None @@ -2790,21 +2778,14 @@ class Router: if fallback_model_group is None: raise original_exception - for mg in fallback_model_group: - """ - Iterate through the model groups and try calling that deployment - """ - try: - ## LOGGING - kwargs = self.log_retry(kwargs=kwargs, e=original_exception) - kwargs["model"] = mg - kwargs.setdefault("metadata", {}).update( - {"model_group": mg} - ) # update model_group used, if fallbacks are done - response = self.function_with_fallbacks(*args, **kwargs) - return response - except Exception as e: - raise e + return run_sync_fallback( + *args, + litellm_router=self, + fallback_model_group=fallback_model_group, + original_model_group=original_model_group, + original_exception=original_exception, + **kwargs, + ) except Exception as e: raise e raise original_exception diff --git a/litellm/router_utils/fallback_event_handlers.py b/litellm/router_utils/fallback_event_handlers.py index 02465a014..fc1ce648d 100644 --- a/litellm/router_utils/fallback_event_handlers.py +++ b/litellm/router_utils/fallback_event_handlers.py @@ -52,6 +52,37 @@ async def run_async_fallback( raise error_from_fallbacks +def run_sync_fallback( + litellm_router: LitellmRouter, + *args: Tuple[Any], + fallback_model_group: List[str], + original_model_group: str, + original_exception: Exception, + **kwargs, +) -> Any: + """ + Iterate through the model groups and try calling that deployment. + """ + error_from_fallbacks = original_exception + for mg in fallback_model_group: + if mg == original_model_group: + continue + try: + # LOGGING + kwargs = litellm_router.log_retry(kwargs=kwargs, e=original_exception) + verbose_router_logger.info(f"Falling back to model_group = {mg}") + kwargs["model"] = mg + kwargs.setdefault("metadata", {}).update( + {"model_group": mg} + ) # update model_group used, if fallbacks are done + response = litellm_router.function_with_fallbacks(*args, **kwargs) + verbose_router_logger.info("Successful fallback b/w models.") + return response + except Exception as e: + error_from_fallbacks = e + raise error_from_fallbacks + + async def log_success_fallback_event(original_model_group: str, kwargs: dict): for _callback in litellm.callbacks: if isinstance(_callback, CustomLogger):