mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
Merge pull request #4197 from BerriAI/litellm_fix_router_level_alerting
[Fix + Refactor] - Router Alerting for llm exceptions + use separate util for sending alert
This commit is contained in:
commit
14f7ca899a
4 changed files with 150 additions and 1 deletions
|
@ -1453,7 +1453,7 @@ Model Info:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
verbose_proxy_logger.debug(
|
verbose_proxy_logger.debug(
|
||||||
"Error sending slack alert. Error=", response.text
|
"Error sending slack alert. Error={}".format(response.text)
|
||||||
)
|
)
|
||||||
|
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
|
|
|
@ -66,6 +66,7 @@ from litellm.types.llms.openai import (
|
||||||
)
|
)
|
||||||
from litellm.scheduler import Scheduler, FlowItem
|
from litellm.scheduler import Scheduler, FlowItem
|
||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
from litellm.router_utils.handle_error import send_llm_exception_alert
|
||||||
|
|
||||||
|
|
||||||
class Router:
|
class Router:
|
||||||
|
@ -576,6 +577,14 @@ class Router:
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
asyncio.create_task(
|
||||||
|
send_llm_exception_alert(
|
||||||
|
litellm_router_instance=self,
|
||||||
|
request_kwargs=kwargs,
|
||||||
|
error_traceback_str=traceback.format_exc(),
|
||||||
|
original_exception=e,
|
||||||
|
)
|
||||||
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def _acompletion(
|
async def _acompletion(
|
||||||
|
@ -1097,6 +1106,14 @@ class Router:
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
asyncio.create_task(
|
||||||
|
send_llm_exception_alert(
|
||||||
|
litellm_router_instance=self,
|
||||||
|
request_kwargs=kwargs,
|
||||||
|
error_traceback_str=traceback.format_exc(),
|
||||||
|
original_exception=e,
|
||||||
|
)
|
||||||
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def _aimage_generation(self, prompt: str, model: str, **kwargs):
|
async def _aimage_generation(self, prompt: str, model: str, **kwargs):
|
||||||
|
@ -1221,6 +1238,14 @@ class Router:
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
asyncio.create_task(
|
||||||
|
send_llm_exception_alert(
|
||||||
|
litellm_router_instance=self,
|
||||||
|
request_kwargs=kwargs,
|
||||||
|
error_traceback_str=traceback.format_exc(),
|
||||||
|
original_exception=e,
|
||||||
|
)
|
||||||
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def _atranscription(self, file: BinaryIO, model: str, **kwargs):
|
async def _atranscription(self, file: BinaryIO, model: str, **kwargs):
|
||||||
|
@ -1387,6 +1412,14 @@ class Router:
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
asyncio.create_task(
|
||||||
|
send_llm_exception_alert(
|
||||||
|
litellm_router_instance=self,
|
||||||
|
request_kwargs=kwargs,
|
||||||
|
error_traceback_str=traceback.format_exc(),
|
||||||
|
original_exception=e,
|
||||||
|
)
|
||||||
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def amoderation(self, model: str, input: str, **kwargs):
|
async def amoderation(self, model: str, input: str, **kwargs):
|
||||||
|
@ -1402,6 +1435,14 @@ class Router:
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
asyncio.create_task(
|
||||||
|
send_llm_exception_alert(
|
||||||
|
litellm_router_instance=self,
|
||||||
|
request_kwargs=kwargs,
|
||||||
|
error_traceback_str=traceback.format_exc(),
|
||||||
|
original_exception=e,
|
||||||
|
)
|
||||||
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def _amoderation(self, model: str, input: str, **kwargs):
|
async def _amoderation(self, model: str, input: str, **kwargs):
|
||||||
|
@ -1546,6 +1587,14 @@ class Router:
|
||||||
|
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
asyncio.create_task(
|
||||||
|
send_llm_exception_alert(
|
||||||
|
litellm_router_instance=self,
|
||||||
|
request_kwargs=kwargs,
|
||||||
|
error_traceback_str=traceback.format_exc(),
|
||||||
|
original_exception=e,
|
||||||
|
)
|
||||||
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def _atext_completion(self, model: str, prompt: str, **kwargs):
|
async def _atext_completion(self, model: str, prompt: str, **kwargs):
|
||||||
|
@ -1741,6 +1790,14 @@ class Router:
|
||||||
response = await self.async_function_with_fallbacks(**kwargs)
|
response = await self.async_function_with_fallbacks(**kwargs)
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
asyncio.create_task(
|
||||||
|
send_llm_exception_alert(
|
||||||
|
litellm_router_instance=self,
|
||||||
|
request_kwargs=kwargs,
|
||||||
|
error_traceback_str=traceback.format_exc(),
|
||||||
|
original_exception=e,
|
||||||
|
)
|
||||||
|
)
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
async def _aembedding(self, input: Union[str, List], model: str, **kwargs):
|
async def _aembedding(self, input: Union[str, List], model: str, **kwargs):
|
||||||
|
@ -4570,6 +4627,8 @@ class Router:
|
||||||
default_webhook_url=router_alerting_config.webhook_url,
|
default_webhook_url=router_alerting_config.webhook_url,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.slack_alerting_logger = _slack_alerting_logger
|
||||||
|
|
||||||
litellm.callbacks.append(_slack_alerting_logger)
|
litellm.callbacks.append(_slack_alerting_logger)
|
||||||
litellm.success_callback.append(
|
litellm.success_callback.append(
|
||||||
_slack_alerting_logger.response_taking_too_long_callback
|
_slack_alerting_logger.response_taking_too_long_callback
|
||||||
|
|
53
litellm/router_utils/handle_error.py
Normal file
53
litellm/router_utils/handle_error.py
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
import asyncio
|
||||||
|
import traceback
|
||||||
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from litellm.router import Router as _Router
|
||||||
|
|
||||||
|
LitellmRouter = _Router
|
||||||
|
else:
|
||||||
|
LitellmRouter = Any
|
||||||
|
|
||||||
|
|
||||||
|
async def send_llm_exception_alert(
|
||||||
|
litellm_router_instance: LitellmRouter,
|
||||||
|
request_kwargs: dict,
|
||||||
|
error_traceback_str: str,
|
||||||
|
original_exception,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Sends a Slack / MS Teams alert for the LLM API call failure.
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
litellm_router_instance (_Router): The LitellmRouter instance.
|
||||||
|
original_exception (Any): The original exception that occurred.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
"""
|
||||||
|
if litellm_router_instance is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
if not hasattr(litellm_router_instance, "slack_alerting_logger"):
|
||||||
|
return
|
||||||
|
|
||||||
|
if litellm_router_instance.slack_alerting_logger is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
if "proxy_server_request" in request_kwargs:
|
||||||
|
# Do not send any alert if it's a request from litellm proxy server request
|
||||||
|
# the proxy is already instrumented to send LLM API call failures
|
||||||
|
return
|
||||||
|
|
||||||
|
litellm_debug_info = getattr(original_exception, "litellm_debug_info", None)
|
||||||
|
exception_str = str(original_exception)
|
||||||
|
if litellm_debug_info is not None:
|
||||||
|
exception_str += litellm_debug_info
|
||||||
|
exception_str += f"\n\n{error_traceback_str[:2000]}"
|
||||||
|
|
||||||
|
await litellm_router_instance.slack_alerting_logger.send_alert(
|
||||||
|
message=f"LLM API call failed: `{exception_str}`",
|
||||||
|
level="High",
|
||||||
|
alert_type="llm_exceptions",
|
||||||
|
)
|
|
@ -25,6 +25,9 @@ import pytest
|
||||||
from litellm.router import AlertingConfig, Router
|
from litellm.router import AlertingConfig, Router
|
||||||
from litellm.proxy._types import CallInfo
|
from litellm.proxy._types import CallInfo
|
||||||
from openai import APIError
|
from openai import APIError
|
||||||
|
from litellm.router import AlertingConfig
|
||||||
|
import litellm
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
@ -743,3 +746,37 @@ async def test_region_outage_alerting_called(
|
||||||
mock_send_alert.assert_called_once()
|
mock_send_alert.assert_called_once()
|
||||||
else:
|
else:
|
||||||
mock_send_alert.assert_not_called()
|
mock_send_alert.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.skip(reason="test only needs to run locally ")
|
||||||
|
async def test_alerting():
|
||||||
|
router = litellm.Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"api_key": "bad_key",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
],
|
||||||
|
debug_level="DEBUG",
|
||||||
|
set_verbose=True,
|
||||||
|
alerting_config=AlertingConfig(
|
||||||
|
alerting_threshold=10, # threshold for slow / hanging llm responses (in seconds). Defaults to 300 seconds
|
||||||
|
webhook_url=os.getenv(
|
||||||
|
"SLACK_WEBHOOK_URL"
|
||||||
|
), # webhook you want to send alerts to
|
||||||
|
),
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
await router.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
)
|
||||||
|
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue