diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py index bf089c3646..98b0da25c5 100644 --- a/litellm/integrations/custom_logger.py +++ b/litellm/integrations/custom_logger.py @@ -58,6 +58,13 @@ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callbac def pre_call_check(self, deployment: dict) -> Optional[dict]: pass + #### Fallback Events - router/proxy only #### + async def log_success_fallback_event(self, original_model_group: str, kwargs: dict): + pass + + async def log_failure_fallback_event(self, original_model_group: str, kwargs: dict): + pass + #### ADAPTERS #### Allow calling 100+ LLMs in custom format - https://github.com/BerriAI/litellm/pulls def translate_completion_input_params( diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 1425b101c0..8e31bca137 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -170,6 +170,17 @@ class PrometheusLogger(CustomLogger): labelnames=_logged_llm_labels, ) + self.llm_deployment_successful_fallbacks = Counter( + "llm_deployment_successful_fallbacks", + "LLM Deployment Analytics - Number of successful fallback workloads", + ["primary_model", "fallback_model"], + ) + self.llm_deployment_failed_fallbacks = Counter( + "llm_deployment_failed_fallbacks", + "LLM Deployment Analytics - Number of failed fallback workloads", + ["primary_model", "fallback_model"], + ) + except Exception as e: print_verbose(f"Got exception on init prometheus client {str(e)}") raise e @@ -479,6 +490,28 @@ class PrometheusLogger(CustomLogger): ) return + async def log_success_fallback_event(self, original_model_group: str, kwargs: dict): + verbose_logger.debug( + "Prometheus: log_success_fallback_event, original_model_group: %s, kwargs: %s", + original_model_group, + kwargs, + ) + _new_model = kwargs.get("model") + self.llm_deployment_successful_fallbacks.labels( + primary_model=original_model_group, fallback_model=_new_model + ).inc() + + async def log_failure_fallback_event(self, original_model_group: str, kwargs: dict): + verbose_logger.debug( + "Prometheus: log_failure_fallback_event, original_model_group: %s, kwargs: %s", + original_model_group, + kwargs, + ) + _new_model = kwargs.get("model") + self.llm_deployment_failed_fallbacks.labels( + primary_model=original_model_group, fallback_model=_new_model + ).inc() + def set_deployment_state( self, state: int, diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index c16d09e72c..8d499c1c0f 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -37,5 +37,6 @@ general_settings: master_key: sk-1234 litellm_settings: + fallbacks: [{"gemini-1.5-pro-001": ["gpt-4o"]}] success_callback: ["langfuse", "prometheus"] failure_callback: ["prometheus"] diff --git a/litellm/router.py b/litellm/router.py index 0af5c10485..3f2be7cb26 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -59,6 +59,10 @@ from litellm.router_utils.client_initalization_utils import ( should_initialize_sync_client, ) from litellm.router_utils.cooldown_callbacks import router_cooldown_handler +from litellm.router_utils.fallback_event_handlers import ( + log_failure_fallback_event, + log_success_fallback_event, +) from litellm.router_utils.handle_error import send_llm_exception_alert from litellm.scheduler import FlowItem, Scheduler from litellm.types.llms.openai import ( @@ -2361,6 +2365,7 @@ class Router: verbose_router_logger.debug(f"Traceback{traceback.format_exc()}") original_exception = e fallback_model_group = None + original_model_group = kwargs.get("model") fallback_failure_exception_str = "" try: verbose_router_logger.debug("Trying to fallback b/w models") @@ -2392,8 +2397,18 @@ class Router: verbose_router_logger.info( "Successful fallback b/w models." ) + # callback for successfull_fallback_event(): + await log_success_fallback_event( + original_model_group=original_model_group, + kwargs=kwargs, + ) + return response except Exception as e: + await log_failure_fallback_event( + original_model_group=original_model_group, + kwargs=kwargs, + ) pass else: error_message = "model={}. context_window_fallbacks={}. fallbacks={}.\n\nSet 'context_window_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format( @@ -2435,8 +2450,17 @@ class Router: verbose_router_logger.info( "Successful fallback b/w models." ) + # callback for successfull_fallback_event(): + await log_success_fallback_event( + original_model_group=original_model_group, + kwargs=kwargs, + ) return response except Exception as e: + await log_failure_fallback_event( + original_model_group=original_model_group, + kwargs=kwargs, + ) pass else: error_message = "model={}. content_policy_fallback={}. fallbacks={}.\n\nSet 'content_policy_fallback' - https://docs.litellm.ai/docs/routing#fallbacks".format( @@ -2497,8 +2521,18 @@ class Router: verbose_router_logger.info( "Successful fallback b/w models." ) + # callback for successfull_fallback_event(): + await log_success_fallback_event( + original_model_group=original_model_group, + kwargs=kwargs, + ) + return response except Exception as e: + await log_failure_fallback_event( + original_model_group=original_model_group, + kwargs=kwargs, + ) raise e except Exception as new_exception: verbose_router_logger.error( diff --git a/litellm/router_utils/fallback_event_handlers.py b/litellm/router_utils/fallback_event_handlers.py new file mode 100644 index 0000000000..98d9cd92de --- /dev/null +++ b/litellm/router_utils/fallback_event_handlers.py @@ -0,0 +1,33 @@ +from typing import TYPE_CHECKING, Any + +import litellm +from litellm._logging import verbose_router_logger +from litellm.integrations.custom_logger import CustomLogger + + +async def log_success_fallback_event(original_model_group: str, kwargs: dict): + for _callback in litellm.callbacks: + if isinstance(_callback, CustomLogger): + try: + await _callback.log_success_fallback_event( + original_model_group=original_model_group, kwargs=kwargs + ) + except Exception as e: + verbose_router_logger.error( + f"Error in log_success_fallback_event: {(str(e))}" + ) + pass + + +async def log_failure_fallback_event(original_model_group: str, kwargs: dict): + for _callback in litellm.callbacks: + if isinstance(_callback, CustomLogger): + try: + await _callback.log_failure_fallback_event( + original_model_group=original_model_group, kwargs=kwargs + ) + except Exception as e: + verbose_router_logger.error( + f"Error in log_failure_fallback_event: {(str(e))}" + ) + pass