mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
fix - send alert on router level exceptions
This commit is contained in:
parent
056913fd70
commit
a0ecc6f414
3 changed files with 59 additions and 1 deletions
|
@ -1453,7 +1453,7 @@ Model Info:
|
|||
pass
|
||||
else:
|
||||
verbose_proxy_logger.debug(
|
||||
"Error sending slack alert. Error=", response.text
|
||||
"Error sending slack alert. Error={}".format(response.text)
|
||||
)
|
||||
|
||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||
|
|
|
@ -66,6 +66,7 @@ from litellm.types.llms.openai import (
|
|||
)
|
||||
from litellm.scheduler import Scheduler, FlowItem
|
||||
from typing import Iterable
|
||||
from litellm.router_utils.handle_error import send_llm_exception_alert
|
||||
|
||||
|
||||
class Router:
|
||||
|
@ -576,6 +577,14 @@ class Router:
|
|||
|
||||
return response
|
||||
except Exception as e:
|
||||
asyncio.create_task(
|
||||
send_llm_exception_alert(
|
||||
litellm_router_instance=self,
|
||||
request_kwargs=kwargs,
|
||||
error_traceback_str=traceback.format_exc(),
|
||||
original_exception=e,
|
||||
)
|
||||
)
|
||||
raise e
|
||||
|
||||
async def _acompletion(
|
||||
|
@ -4570,6 +4579,8 @@ class Router:
|
|||
default_webhook_url=router_alerting_config.webhook_url,
|
||||
)
|
||||
|
||||
self.slack_alerting_logger = _slack_alerting_logger
|
||||
|
||||
litellm.callbacks.append(_slack_alerting_logger)
|
||||
litellm.success_callback.append(
|
||||
_slack_alerting_logger.response_taking_too_long_callback
|
||||
|
|
47
litellm/router_utils/handle_error.py
Normal file
47
litellm/router_utils/handle_error.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
import asyncio
|
||||
import traceback
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from litellm.router import Router as _Router
|
||||
|
||||
LitellmRouter = _Router
|
||||
else:
|
||||
LitellmRouter = Any
|
||||
|
||||
|
||||
async def send_llm_exception_alert(
|
||||
litellm_router_instance: LitellmRouter,
|
||||
request_kwargs: dict,
|
||||
error_traceback_str: str,
|
||||
original_exception,
|
||||
):
|
||||
"""
|
||||
Sends a Slack / MS Teams alert for the LLM API call failure.
|
||||
|
||||
Parameters:
|
||||
litellm_router_instance (_Router): The LitellmRouter instance.
|
||||
original_exception (Any): The original exception that occurred.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
if litellm_router_instance.slack_alerting_logger is None:
|
||||
return
|
||||
|
||||
if "proxy_server_request" in request_kwargs:
|
||||
# Do not send any alert if it's a request from litellm proxy server request
|
||||
# the proxy is already instrumented to send LLM API call failures
|
||||
return
|
||||
|
||||
litellm_debug_info = getattr(original_exception, "litellm_debug_info", None)
|
||||
exception_str = str(original_exception)
|
||||
if litellm_debug_info is not None:
|
||||
exception_str += litellm_debug_info
|
||||
exception_str += f"\n\n{error_traceback_str[:2000]}"
|
||||
|
||||
await litellm_router_instance.slack_alerting_logger.send_alert(
|
||||
message=f"LLM API call failed: `{exception_str}`",
|
||||
level="High",
|
||||
alert_type="llm_exceptions",
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue