fix - send alert on router level exceptions

This commit is contained in:
Ishaan Jaff 2024-06-14 08:41:12 -07:00
parent 8c7aa24959
commit bd341c69b5
3 changed files with 59 additions and 1 deletions

View file

@ -1453,7 +1453,7 @@ Model Info:
pass
else:
verbose_proxy_logger.debug(
"Error sending slack alert. Error=", response.text
"Error sending slack alert. Error={}".format(response.text)
)
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):

View file

@ -66,6 +66,7 @@ from litellm.types.llms.openai import (
)
from litellm.scheduler import Scheduler, FlowItem
from typing import Iterable
from litellm.router_utils.handle_error import send_llm_exception_alert
class Router:
@ -576,6 +577,14 @@ class Router:
return response
except Exception as e:
asyncio.create_task(
send_llm_exception_alert(
litellm_router_instance=self,
request_kwargs=kwargs,
error_traceback_str=traceback.format_exc(),
original_exception=e,
)
)
raise e
async def _acompletion(
@ -4570,6 +4579,8 @@ class Router:
default_webhook_url=router_alerting_config.webhook_url,
)
self.slack_alerting_logger = _slack_alerting_logger
litellm.callbacks.append(_slack_alerting_logger)
litellm.success_callback.append(
_slack_alerting_logger.response_taking_too_long_callback

View file

@ -0,0 +1,47 @@
import asyncio
import traceback
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from litellm.router import Router as _Router
LitellmRouter = _Router
else:
LitellmRouter = Any
async def send_llm_exception_alert(
litellm_router_instance: LitellmRouter,
request_kwargs: dict,
error_traceback_str: str,
original_exception,
):
"""
Sends a Slack / MS Teams alert for the LLM API call failure.
Parameters:
litellm_router_instance (_Router): The LitellmRouter instance.
original_exception (Any): The original exception that occurred.
Returns:
None
"""
if litellm_router_instance.slack_alerting_logger is None:
return
if "proxy_server_request" in request_kwargs:
# Do not send any alert if it's a request from litellm proxy server request
# the proxy is already instrumented to send LLM API call failures
return
litellm_debug_info = getattr(original_exception, "litellm_debug_info", None)
exception_str = str(original_exception)
if litellm_debug_info is not None:
exception_str += litellm_debug_info
exception_str += f"\n\n{error_traceback_str[:2000]}"
await litellm_router_instance.slack_alerting_logger.send_alert(
message=f"LLM API call failed: `{exception_str}`",
level="High",
alert_type="llm_exceptions",
)