diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 21415fb6d6..f37f5070f4 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -1453,7 +1453,7 @@ Model Info: pass else: verbose_proxy_logger.debug( - "Error sending slack alert. Error=", response.text + "Error sending slack alert. Error={}".format(response.text) ) async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): diff --git a/litellm/router.py b/litellm/router.py index 4d7a36a386..491a34d1f1 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -66,6 +66,7 @@ from litellm.types.llms.openai import ( ) from litellm.scheduler import Scheduler, FlowItem from typing import Iterable +from litellm.router_utils.handle_error import send_llm_exception_alert class Router: @@ -576,6 +577,14 @@ class Router: return response except Exception as e: + asyncio.create_task( + send_llm_exception_alert( + litellm_router_instance=self, + request_kwargs=kwargs, + error_traceback_str=traceback.format_exc(), + original_exception=e, + ) + ) raise e async def _acompletion( @@ -1097,6 +1106,14 @@ class Router: return response except Exception as e: + asyncio.create_task( + send_llm_exception_alert( + litellm_router_instance=self, + request_kwargs=kwargs, + error_traceback_str=traceback.format_exc(), + original_exception=e, + ) + ) raise e async def _aimage_generation(self, prompt: str, model: str, **kwargs): @@ -1221,6 +1238,14 @@ class Router: return response except Exception as e: + asyncio.create_task( + send_llm_exception_alert( + litellm_router_instance=self, + request_kwargs=kwargs, + error_traceback_str=traceback.format_exc(), + original_exception=e, + ) + ) raise e async def _atranscription(self, file: BinaryIO, model: str, **kwargs): @@ -1387,6 +1412,14 @@ class Router: return response except Exception as e: + asyncio.create_task( + send_llm_exception_alert( + litellm_router_instance=self, + request_kwargs=kwargs, + error_traceback_str=traceback.format_exc(), + original_exception=e, + ) + ) raise e async def amoderation(self, model: str, input: str, **kwargs): @@ -1402,6 +1435,14 @@ class Router: return response except Exception as e: + asyncio.create_task( + send_llm_exception_alert( + litellm_router_instance=self, + request_kwargs=kwargs, + error_traceback_str=traceback.format_exc(), + original_exception=e, + ) + ) raise e async def _amoderation(self, model: str, input: str, **kwargs): @@ -1546,6 +1587,14 @@ class Router: return response except Exception as e: + asyncio.create_task( + send_llm_exception_alert( + litellm_router_instance=self, + request_kwargs=kwargs, + error_traceback_str=traceback.format_exc(), + original_exception=e, + ) + ) raise e async def _atext_completion(self, model: str, prompt: str, **kwargs): @@ -1741,6 +1790,14 @@ class Router: response = await self.async_function_with_fallbacks(**kwargs) return response except Exception as e: + asyncio.create_task( + send_llm_exception_alert( + litellm_router_instance=self, + request_kwargs=kwargs, + error_traceback_str=traceback.format_exc(), + original_exception=e, + ) + ) raise e async def _aembedding(self, input: Union[str, List], model: str, **kwargs): @@ -4570,6 +4627,8 @@ class Router: default_webhook_url=router_alerting_config.webhook_url, ) + self.slack_alerting_logger = _slack_alerting_logger + litellm.callbacks.append(_slack_alerting_logger) litellm.success_callback.append( _slack_alerting_logger.response_taking_too_long_callback diff --git a/litellm/router_utils/handle_error.py b/litellm/router_utils/handle_error.py new file mode 100644 index 0000000000..d848fd82b1 --- /dev/null +++ b/litellm/router_utils/handle_error.py @@ -0,0 +1,53 @@ +import asyncio +import traceback +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from litellm.router import Router as _Router + + LitellmRouter = _Router +else: + LitellmRouter = Any + + +async def send_llm_exception_alert( + litellm_router_instance: LitellmRouter, + request_kwargs: dict, + error_traceback_str: str, + original_exception, +): + """ + Sends a Slack / MS Teams alert for the LLM API call failure. + + Parameters: + litellm_router_instance (_Router): The LitellmRouter instance. + original_exception (Any): The original exception that occurred. + + Returns: + None + """ + if litellm_router_instance is None: + return + + if not hasattr(litellm_router_instance, "slack_alerting_logger"): + return + + if litellm_router_instance.slack_alerting_logger is None: + return + + if "proxy_server_request" in request_kwargs: + # Do not send any alert if it's a request from litellm proxy server request + # the proxy is already instrumented to send LLM API call failures + return + + litellm_debug_info = getattr(original_exception, "litellm_debug_info", None) + exception_str = str(original_exception) + if litellm_debug_info is not None: + exception_str += litellm_debug_info + exception_str += f"\n\n{error_traceback_str[:2000]}" + + await litellm_router_instance.slack_alerting_logger.send_alert( + message=f"LLM API call failed: `{exception_str}`", + level="High", + alert_type="llm_exceptions", + ) diff --git a/litellm/tests/test_alerting.py b/litellm/tests/test_alerting.py index 9dfec3dcfa..0cbf6cb6d3 100644 --- a/litellm/tests/test_alerting.py +++ b/litellm/tests/test_alerting.py @@ -25,6 +25,9 @@ import pytest from litellm.router import AlertingConfig, Router from litellm.proxy._types import CallInfo from openai import APIError +from litellm.router import AlertingConfig +import litellm +import os @pytest.mark.parametrize( @@ -743,3 +746,37 @@ async def test_region_outage_alerting_called( mock_send_alert.assert_called_once() else: mock_send_alert.assert_not_called() + + +@pytest.mark.asyncio +@pytest.mark.skip(reason="test only needs to run locally ") +async def test_alerting(): + router = litellm.Router( + model_list=[ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo", + "api_key": "bad_key", + }, + } + ], + debug_level="DEBUG", + set_verbose=True, + alerting_config=AlertingConfig( + alerting_threshold=10, # threshold for slow / hanging llm responses (in seconds). Defaults to 300 seconds + webhook_url=os.getenv( + "SLACK_WEBHOOK_URL" + ), # webhook you want to send alerts to + ), + ) + try: + await router.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hey, how's it going?"}], + ) + + except: + pass + finally: + await asyncio.sleep(3)