From f8350b9461f1d15b534a2e8a7997a3c9535c8b08 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 24 May 2024 16:59:16 -0700 Subject: [PATCH 1/4] fix(slack_alerting.py): support region based outage alerting --- litellm/integrations/slack_alerting.py | 209 +++++++++++++++++++++---- litellm/main.py | 6 +- litellm/proxy/utils.py | 33 +--- litellm/tests/test_alerting.py | 97 +++++++++++- litellm/utils.py | 25 ++- 5 files changed, 309 insertions(+), 61 deletions(-) diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index a4ff0620a..0cea0fee2 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -1,6 +1,6 @@ #### What this does #### # Class for sending Slack Alerts # -import dotenv, os +import dotenv, os, traceback from litellm.proxy._types import UserAPIKeyAuth, CallInfo from litellm._logging import verbose_logger, verbose_proxy_logger import litellm, threading @@ -15,6 +15,35 @@ from enum import Enum from datetime import datetime as dt, timedelta, timezone from litellm.integrations.custom_logger import CustomLogger import random +from typing import TypedDict +from openai import APIError + +import litellm.types +import litellm.types.router + + +class OutageModel(TypedDict): + provider: str + region_name: str + alerts: List[str] + deployment_ids: List[str] + minor_alert_sent: bool + major_alert_sent: bool + last_updated_at: float + + +AlertType = Literal[ + "llm_exceptions", + "llm_too_slow", + "llm_requests_hanging", + "budget_alerts", + "db_exceptions", + "daily_reports", + "spend_reports", + "cooldown_deployment", + "new_model_added", + "outage_alerts", +] class LiteLLMBase(BaseModel): @@ -37,6 +66,10 @@ class SlackAlertingArgs(LiteLLMBase): ) report_check_interval: int = 5 * 60 # 5 minutes budget_alert_ttl: int = 24 * 60 * 60 # 24 hours + outage_alert_ttl: int = 1 * 60 * 60 # 1 hour + minor_outage_alert_threshold: int = 3 + major_outage_alert_threshold: int = 10 + max_outage_alert_list_size: int = 10 # prevent memory leak class WebhookEvent(CallInfo): @@ -86,19 +119,7 @@ class SlackAlerting(CustomLogger): internal_usage_cache: Optional[DualCache] = None, alerting_threshold: float = 300, # threshold for slow / hanging llm responses (in seconds) alerting: Optional[List] = [], - alert_types: List[ - Literal[ - "llm_exceptions", - "llm_too_slow", - "llm_requests_hanging", - "budget_alerts", - "db_exceptions", - "daily_reports", - "spend_reports", - "cooldown_deployment", - "new_model_added", - ] - ] = [ + alert_types: List[AlertType] = [ "llm_exceptions", "llm_too_slow", "llm_requests_hanging", @@ -108,6 +129,7 @@ class SlackAlerting(CustomLogger): "spend_reports", "cooldown_deployment", "new_model_added", + "outage_alerts", ], alert_to_webhook_url: Optional[ Dict @@ -696,6 +718,99 @@ class SlackAlerting(CustomLogger): return return + async def outage_alerts( + self, + provider: str, + region_name: str, + exception: APIError, + deployment_id: str, + ) -> None: + """ + Send slack alert if provider region (e.g. azure east-us-1) is having an outage (408 or >500 errors). + + key = (provider + region) + + value = { + - provider + - region + - threshold + - alerts [] + } + + ttl = 1hr + max_alerts_size = 10 + """ + _id = provider + region_name + + outage_value: Optional[OutageModel] = await self.internal_usage_cache.async_get_cache(key=_id) # type: ignore + + if ( + getattr(exception, "status_code", None) is not None + and exception.status_code != 408 # type: ignore + and exception.status_code < 500 # type: ignore + ): + return + + if outage_value is None: + outage_value = OutageModel( + provider=provider, + region_name=region_name, + alerts=[exception.message], + deployment_ids=[deployment_id], + minor_alert_sent=False, + major_alert_sent=False, + last_updated_at=time.time(), + ) + + ## add to cache ## + await self.internal_usage_cache.async_set_cache( + key=_id, value=outage_value, ttl=self.alerting_args.outage_alert_ttl + ) + return + + outage_value["alerts"].append(exception.message) + outage_value["deployment_ids"].append(deployment_id) + outage_value["last_updated_at"] = time.time() + + ## MINOR OUTAGE ALERT SENT ## + if ( + outage_value["minor_alert_sent"] == False + and len(outage_value["alerts"]) + > self.alerting_args.minor_outage_alert_threshold + ): + msg = "{} {} is having a **Minor Service Outage**.\n\n**Errors**\n{}\n\nLast Check:{}".format( + provider, + region_name, + outage_value["alerts"], + outage_value["last_updated_at"], + ) + # send minor alert + _result_val = self.send_alert( + message=msg, level="Medium", alert_type="outage_alerts" + ) + if _result_val is not None: + await _result_val + # set to true + outage_value["minor_alert_sent"] = True + elif ( + outage_value["major_alert_sent"] == False + and len(outage_value["alerts"]) + > self.alerting_args.major_outage_alert_threshold + ): + msg = "{} {} is having a **Major Service Outage**.\n\n**Errors**\n{}\n\nLast Check:{}".format( + provider, + region_name, + outage_value["alerts"], + outage_value["last_updated_at"], + ) + # send minor alert + await self.send_alert(message=msg, level="High", alert_type="outage_alerts") + # set to true + outage_value["major_alert_sent"] = True + + ## update cache ## + await self.internal_usage_cache.async_set_cache(key=_id, value=outage_value) + async def model_added_alert( self, model_name: str, litellm_model_name: str, passed_model_info: Any ): @@ -745,10 +860,12 @@ Model Info: ``` """ - await self.send_alert( + alert_val = self.send_alert( message=message, level="Low", alert_type="new_model_added" ) - pass + + if alert_val is not None and asyncio.iscoroutine(alert_val): + await alert_val async def model_removed_alert(self, model_name: str): pass @@ -795,6 +912,7 @@ Model Info: "spend_reports", "new_model_added", "cooldown_deployment", + "outage_alerts", ], user_info: Optional[WebhookEvent] = None, **kwargs, @@ -910,18 +1028,55 @@ Model Info: async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): """Log failure + deployment latency""" - if "daily_reports" in self.alert_types: - model_id = ( - kwargs.get("litellm_params", {}).get("model_info", {}).get("id", "") - ) - await self.async_update_daily_reports( - DeploymentMetrics( - id=model_id, - failed_request=True, - latency_per_output_token=None, - updated_at=litellm.utils.get_utc_datetime(), + _litellm_params = kwargs.get("litellm_params", {}) + _model_info = _litellm_params.get("model_info", {}) or {} + model_id = _model_info.get("id", "") + try: + if "daily_reports" in self.alert_types: + try: + await self.async_update_daily_reports( + DeploymentMetrics( + id=model_id, + failed_request=True, + latency_per_output_token=None, + updated_at=litellm.utils.get_utc_datetime(), + ) + ) + except Exception as e: + verbose_logger.debug(f"Exception raises -{str(e)}") + + if "outage_alerts" in self.alert_types and isinstance( + kwargs.get("exception", ""), APIError + ): + _litellm_params = litellm.types.router.LiteLLM_Params( + model=kwargs.get("model", ""), + **kwargs.get("litellm_params", {}), + **kwargs.get("optional_params", {}), ) - ) + _region_name = litellm.utils._get_model_region( + custom_llm_provider=kwargs.get("custom_llm_provider", ""), + litellm_params=_litellm_params, + ) + # if region name not known, default to api base # + if _region_name is None: + _region_name = litellm.get_api_base( + model=kwargs.get("model", ""), + optional_params={ + **kwargs.get("litellm_params", {}), + **kwargs.get("optional_params", {}), + }, + ) + if _region_name is None: + _region_name = "" + + await self.outage_alerts( + provider=kwargs.get("custom_llm_provider", "") or "", + region_name=_region_name, + exception=kwargs["exception"], + deployment_id=model_id, + ) + except Exception as e: + pass async def _run_scheduler_helper(self, llm_router) -> bool: """ diff --git a/litellm/main.py b/litellm/main.py index dc4cf0001..9ff474af1 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -420,6 +420,8 @@ def mock_completion( api_key="mock-key", ) if isinstance(mock_response, Exception): + if isinstance(mock_response, openai.APIError): + raise mock_response raise litellm.APIError( status_code=500, # type: ignore message=str(mock_response), @@ -463,7 +465,9 @@ def mock_completion( return model_response - except: + except Exception as e: + if isinstance(e, openai.APIError): + raise e traceback.print_exc() raise Exception("Mock completion response failed") diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index b22134dab..33e2114c2 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -42,7 +42,7 @@ import smtplib, re from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart from datetime import datetime, timedelta -from litellm.integrations.slack_alerting import SlackAlerting +from litellm.integrations.slack_alerting import SlackAlerting, AlertType from typing_extensions import overload @@ -78,19 +78,7 @@ class ProxyLogging: self.cache_control_check = _PROXY_CacheControlCheck() self.alerting: Optional[List] = None self.alerting_threshold: float = 300 # default to 5 min. threshold - self.alert_types: List[ - Literal[ - "llm_exceptions", - "llm_too_slow", - "llm_requests_hanging", - "budget_alerts", - "db_exceptions", - "daily_reports", - "spend_reports", - "cooldown_deployment", - "new_model_added", - ] - ] = [ + self.alert_types: List[AlertType] = [ "llm_exceptions", "llm_too_slow", "llm_requests_hanging", @@ -100,6 +88,7 @@ class ProxyLogging: "spend_reports", "cooldown_deployment", "new_model_added", + "outage_alerts", ] self.slack_alerting_instance = SlackAlerting( alerting_threshold=self.alerting_threshold, @@ -113,21 +102,7 @@ class ProxyLogging: alerting: Optional[List], alerting_threshold: Optional[float], redis_cache: Optional[RedisCache], - alert_types: Optional[ - List[ - Literal[ - "llm_exceptions", - "llm_too_slow", - "llm_requests_hanging", - "budget_alerts", - "db_exceptions", - "daily_reports", - "spend_reports", - "cooldown_deployment", - "new_model_added", - ] - ] - ] = None, + alert_types: Optional[List[AlertType]] = None, alerting_args: Optional[dict] = None, ): self.alerting = alerting diff --git a/litellm/tests/test_alerting.py b/litellm/tests/test_alerting.py index 2f8d7f3ef..7a4214bbd 100644 --- a/litellm/tests/test_alerting.py +++ b/litellm/tests/test_alerting.py @@ -1,10 +1,11 @@ # What is this? ## Tests slack alerting on proxy logging object -import sys, json, uuid, random +import sys, json, uuid, random, httpx import os import io, asyncio from datetime import datetime, timedelta +from typing import Optional # import logging # logging.basicConfig(level=logging.DEBUG) @@ -23,6 +24,7 @@ from unittest.mock import AsyncMock import pytest from litellm.router import AlertingConfig, Router from litellm.proxy._types import CallInfo +from openai import APIError @pytest.mark.parametrize( @@ -495,3 +497,96 @@ async def test_webhook_alerting(alerting_type): user_info=user_info, ) mock_send_alert.assert_awaited_once() + + +@pytest.mark.parametrize( + "model, api_base, llm_provider, vertex_project, vertex_location", + [ + ("gpt-3.5-turbo", None, "openai", None, None), + ( + "azure/gpt-3.5-turbo", + "https://openai-gpt-4-test-v-1.openai.azure.com", + "azure", + None, + None, + ), + ("gemini-pro", None, "vertex_ai", "hardy-device-38811", "us-central1"), + ], +) +@pytest.mark.parametrize("error_code", [500, 408, 400]) +@pytest.mark.asyncio +async def test_outage_alerting_called( + model, api_base, llm_provider, vertex_project, vertex_location, error_code +): + """ + If call fails, outage alert is called + + If multiple calls fail, outage alert is sent + """ + slack_alerting = SlackAlerting(alerting=["webhook"]) + + litellm.callbacks = [slack_alerting] + + error_to_raise: Optional[APIError] = None + + if error_code == 400: + print("RAISING 400 ERROR CODE") + error_to_raise = litellm.BadRequestError( + message="this is a bad request", + model=model, + llm_provider=llm_provider, + ) + elif error_code == 408: + print("RAISING 408 ERROR CODE") + error_to_raise = litellm.Timeout( + message="A timeout occurred", model=model, llm_provider=llm_provider + ) + elif error_code == 500: + print("RAISING 500 ERROR CODE") + error_to_raise = litellm.ServiceUnavailableError( + message="API is unavailable", + model=model, + llm_provider=llm_provider, + response=httpx.Response( + status_code=503, + request=httpx.Request( + method="completion", + url="https://github.com/BerriAI/litellm", + ), + ), + ) + with patch.object( + slack_alerting, "outage_alerts", new=AsyncMock() + ) as mock_send_alert: + try: + await litellm.acompletion( + model=model, + messages=[{"role": "user", "content": "Hey!"}], + api_base=api_base, + vertex_location=vertex_location, + vertex_project=vertex_project, + mock_response=error_to_raise, + ) + except Exception as e: + pass + + mock_send_alert.assert_called_once() + + with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert: + for _ in range(3): + try: + await litellm.acompletion( + model=model, + messages=[{"role": "user", "content": "Hey!"}], + api_base=api_base, + vertex_location=vertex_location, + vertex_project=vertex_project, + mock_response=error_to_raise, + ) + except Exception as e: + pass + + if error_code == 500 or error_code == 408: + mock_send_alert.assert_called_once() + else: + mock_send_alert.assert_not_called() diff --git a/litellm/utils.py b/litellm/utils.py index 0f2a46f68..1ed70a942 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -8632,7 +8632,16 @@ def exception_type( ) elif hasattr(original_exception, "status_code"): exception_mapping_worked = True - if original_exception.status_code == 401: + if original_exception.status_code == 400: + exception_mapping_worked = True + raise BadRequestError( + message=f"{exception_provider} - {message}", + llm_provider=custom_llm_provider, + model=model, + response=original_exception.response, + litellm_debug_info=extra_information, + ) + elif original_exception.status_code == 401: exception_mapping_worked = True raise AuthenticationError( message=f"{exception_provider} - {message}", @@ -9145,6 +9154,7 @@ def exception_type( ), ), ) + if hasattr(original_exception, "status_code"): if original_exception.status_code == 400: exception_mapping_worked = True @@ -9825,7 +9835,16 @@ def exception_type( ) elif hasattr(original_exception, "status_code"): exception_mapping_worked = True - if original_exception.status_code == 401: + if original_exception.status_code == 400: + exception_mapping_worked = True + raise BadRequestError( + message=f"AzureException - {original_exception.message}", + llm_provider="azure", + model=model, + litellm_debug_info=extra_information, + response=original_exception.response, + ) + elif original_exception.status_code == 401: exception_mapping_worked = True raise AuthenticationError( message=f"AzureException - {original_exception.message}", @@ -9842,7 +9861,7 @@ def exception_type( litellm_debug_info=extra_information, llm_provider="azure", ) - if original_exception.status_code == 422: + elif original_exception.status_code == 422: exception_mapping_worked = True raise BadRequestError( message=f"AzureException - {original_exception.message}", From 2cdb0584d1c71f3c62dcb6d2cd70bf3753978398 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 24 May 2024 17:17:17 -0700 Subject: [PATCH 2/4] fix(slack_alerting.py): fixes for outage alerting --- litellm/integrations/slack_alerting.py | 131 +++++++++++++------------ litellm/tests/test_alerting.py | 29 ++++-- 2 files changed, 88 insertions(+), 72 deletions(-) diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 0cea0fee2..2a2ec4ab7 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -740,76 +740,81 @@ class SlackAlerting(CustomLogger): ttl = 1hr max_alerts_size = 10 """ - _id = provider + region_name + try: - outage_value: Optional[OutageModel] = await self.internal_usage_cache.async_get_cache(key=_id) # type: ignore + _id = provider + region_name - if ( - getattr(exception, "status_code", None) is not None - and exception.status_code != 408 # type: ignore - and exception.status_code < 500 # type: ignore - ): - return + outage_value: Optional[OutageModel] = await self.internal_usage_cache.async_get_cache(key=_id) # type: ignore - if outage_value is None: - outage_value = OutageModel( - provider=provider, - region_name=region_name, - alerts=[exception.message], - deployment_ids=[deployment_id], - minor_alert_sent=False, - major_alert_sent=False, - last_updated_at=time.time(), - ) + if ( + getattr(exception, "status_code", None) is not None + and exception.status_code != 408 # type: ignore + and exception.status_code < 500 # type: ignore + ): + return - ## add to cache ## - await self.internal_usage_cache.async_set_cache( - key=_id, value=outage_value, ttl=self.alerting_args.outage_alert_ttl - ) - return + if outage_value is None: + outage_value = OutageModel( + provider=provider, + region_name=region_name, + alerts=[exception.message], + deployment_ids=[deployment_id], + minor_alert_sent=False, + major_alert_sent=False, + last_updated_at=time.time(), + ) - outage_value["alerts"].append(exception.message) - outage_value["deployment_ids"].append(deployment_id) - outage_value["last_updated_at"] = time.time() + ## add to cache ## + await self.internal_usage_cache.async_set_cache( + key=_id, value=outage_value, ttl=self.alerting_args.outage_alert_ttl + ) + return - ## MINOR OUTAGE ALERT SENT ## - if ( - outage_value["minor_alert_sent"] == False - and len(outage_value["alerts"]) - > self.alerting_args.minor_outage_alert_threshold - ): - msg = "{} {} is having a **Minor Service Outage**.\n\n**Errors**\n{}\n\nLast Check:{}".format( - provider, - region_name, - outage_value["alerts"], - outage_value["last_updated_at"], - ) - # send minor alert - _result_val = self.send_alert( - message=msg, level="Medium", alert_type="outage_alerts" - ) - if _result_val is not None: - await _result_val - # set to true - outage_value["minor_alert_sent"] = True - elif ( - outage_value["major_alert_sent"] == False - and len(outage_value["alerts"]) - > self.alerting_args.major_outage_alert_threshold - ): - msg = "{} {} is having a **Major Service Outage**.\n\n**Errors**\n{}\n\nLast Check:{}".format( - provider, - region_name, - outage_value["alerts"], - outage_value["last_updated_at"], - ) - # send minor alert - await self.send_alert(message=msg, level="High", alert_type="outage_alerts") - # set to true - outage_value["major_alert_sent"] = True + outage_value["alerts"].append(exception.message) + outage_value["deployment_ids"].append(deployment_id) + outage_value["last_updated_at"] = time.time() + ## MINOR OUTAGE ALERT SENT ## + if ( + outage_value["minor_alert_sent"] == False + and len(outage_value["alerts"]) + >= self.alerting_args.minor_outage_alert_threshold + ): + msg = "{} {} is having a **Minor Service Outage**.\n\n**Errors**\n{}\n\nLast Check:{}".format( + provider, + region_name, + outage_value["alerts"], + outage_value["last_updated_at"], + ) + # send minor alert + _result_val = self.send_alert( + message=msg, level="Medium", alert_type="outage_alerts" + ) + if _result_val is not None: + await _result_val + # set to true + outage_value["minor_alert_sent"] = True + elif ( + outage_value["major_alert_sent"] == False + and len(outage_value["alerts"]) + >= self.alerting_args.major_outage_alert_threshold + ): + msg = "{} {} is having a **Major Service Outage**.\n\n**Errors**\n{}\n\nLast Check:{}".format( + provider, + region_name, + outage_value["alerts"], + outage_value["last_updated_at"], + ) + # send minor alert + await self.send_alert( + message=msg, level="High", alert_type="outage_alerts" + ) + # set to true + outage_value["major_alert_sent"] = True - ## update cache ## - await self.internal_usage_cache.async_set_cache(key=_id, value=outage_value) + ## update cache ## + await self.internal_usage_cache.async_set_cache(key=_id, value=outage_value) + except Exception as e: + pass async def model_added_alert( self, model_name: str, litellm_model_name: str, passed_model_info: Any diff --git a/litellm/tests/test_alerting.py b/litellm/tests/test_alerting.py index 7a4214bbd..703d26137 100644 --- a/litellm/tests/test_alerting.py +++ b/litellm/tests/test_alerting.py @@ -555,16 +555,30 @@ async def test_outage_alerting_called( ), ), ) + + router = Router( + model_list=[ + { + "model_name": model, + "litellm_params": { + "model": model, + "api_key": os.getenv("AZURE_API_KEY"), + "api_base": api_base, + "vertex_location": vertex_location, + "vertex_project": vertex_project, + }, + } + ], + num_retries=0, + allowed_fails=100, + ) with patch.object( slack_alerting, "outage_alerts", new=AsyncMock() ) as mock_send_alert: try: - await litellm.acompletion( + await router.acompletion( model=model, messages=[{"role": "user", "content": "Hey!"}], - api_base=api_base, - vertex_location=vertex_location, - vertex_project=vertex_project, mock_response=error_to_raise, ) except Exception as e: @@ -575,17 +589,14 @@ async def test_outage_alerting_called( with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert: for _ in range(3): try: - await litellm.acompletion( + await router.acompletion( model=model, messages=[{"role": "user", "content": "Hey!"}], - api_base=api_base, - vertex_location=vertex_location, - vertex_project=vertex_project, mock_response=error_to_raise, ) except Exception as e: pass - + await asyncio.sleep(3) if error_code == 500 or error_code == 408: mock_send_alert.assert_called_once() else: From 8dec87425e97866cac24959067d1de304439f5c5 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 24 May 2024 19:10:33 -0700 Subject: [PATCH 3/4] feat(slack_alerting.py): refactor region outage alerting to do model based alerting instead Unable to extract azure region from api base, makes sense to start with model alerting and then move to region --- litellm/integrations/slack_alerting.py | 129 +++++++++++++++++------- litellm/main.py | 1 + litellm/proxy/_super_secret_config.yaml | 6 +- litellm/proxy/proxy_server.py | 8 +- litellm/router.py | 14 +-- litellm/tests/test_alerting.py | 2 + litellm/utils.py | 10 +- 7 files changed, 119 insertions(+), 51 deletions(-) diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 2a2ec4ab7..1a8eef24d 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -23,9 +23,8 @@ import litellm.types.router class OutageModel(TypedDict): - provider: str - region_name: str - alerts: List[str] + model_id: str + alerts: List[int] deployment_ids: List[str] minor_alert_sent: bool major_alert_sent: bool @@ -146,6 +145,7 @@ class SlackAlerting(CustomLogger): self.is_running = False self.alerting_args = SlackAlertingArgs(**alerting_args) self.default_webhook_url = default_webhook_url + self.llm_router: Optional[litellm.Router] = None def update_values( self, @@ -154,6 +154,7 @@ class SlackAlerting(CustomLogger): alert_types: Optional[List] = None, alert_to_webhook_url: Optional[Dict] = None, alerting_args: Optional[Dict] = None, + llm_router: Optional[litellm.Router] = None, ): if alerting is not None: self.alerting = alerting @@ -169,6 +170,8 @@ class SlackAlerting(CustomLogger): self.alert_to_webhook_url = alert_to_webhook_url else: self.alert_to_webhook_url.update(alert_to_webhook_url) + if llm_router is not None: + self.llm_router = llm_router async def deployment_in_cooldown(self): pass @@ -718,21 +721,42 @@ class SlackAlerting(CustomLogger): return return + def _count_outage_alerts(self, alerts: List[int]) -> str: + """ + Parameters: + - alerts: List[int] -> list of error codes (either 408 or 500+) + + Returns: + - str -> formatted string. This is an alert message, giving a human-friendly description of the errors. + """ + error_breakdown = {"Timeout Errors": 0, "API Errors": 0, "Unknown Errors": 0} + for alert in alerts: + if alert == 408: + error_breakdown["Timeout Errors"] += 1 + elif alert >= 500: + error_breakdown["API Errors"] += 1 + else: + error_breakdown["Unknown Errors"] += 1 + + error_msg = "" + for key, value in error_breakdown.items(): + if value > 0: + error_msg += "\n{}: {}\n".format(key, value) + + return error_msg + async def outage_alerts( self, - provider: str, - region_name: str, exception: APIError, deployment_id: str, ) -> None: """ - Send slack alert if provider region (e.g. azure east-us-1) is having an outage (408 or >500 errors). + Send slack alert if model is badly configured / having an outage (408, 401, 429, >=500). - key = (provider + region) + key = model_id value = { - - provider - - region + - model_id - threshold - alerts [] } @@ -741,23 +765,37 @@ class SlackAlerting(CustomLogger): max_alerts_size = 10 """ try: - - _id = provider + region_name - - outage_value: Optional[OutageModel] = await self.internal_usage_cache.async_get_cache(key=_id) # type: ignore - + outage_value: Optional[OutageModel] = await self.internal_usage_cache.async_get_cache(key=deployment_id) # type: ignore if ( - getattr(exception, "status_code", None) is not None - and exception.status_code != 408 # type: ignore - and exception.status_code < 500 # type: ignore + getattr(exception, "status_code", None) is None + or ( + exception.status_code != 408 # type: ignore + and exception.status_code < 500 # type: ignore + ) + or self.llm_router is None ): return + ### EXTRACT MODEL DETAILS ### + deployment = self.llm_router.get_deployment(model_id=deployment_id) + if deployment is None: + return + + model = deployment.litellm_params.model + provider = deployment.litellm_params.custom_llm_provider + if provider is None: + try: + model, provider, _, _ = litellm.get_llm_provider(model=model) + except Exception as e: + provider = "" + api_base = litellm.get_api_base( + model=model, optional_params=deployment.litellm_params + ) + if outage_value is None: outage_value = OutageModel( - provider=provider, - region_name=region_name, - alerts=[exception.message], + model_id=deployment_id, + alerts=[exception.status_code], # type: ignore deployment_ids=[deployment_id], minor_alert_sent=False, major_alert_sent=False, @@ -766,25 +804,35 @@ class SlackAlerting(CustomLogger): ## add to cache ## await self.internal_usage_cache.async_set_cache( - key=_id, value=outage_value, ttl=self.alerting_args.outage_alert_ttl + key=deployment_id, + value=outage_value, + ttl=self.alerting_args.outage_alert_ttl, ) return - outage_value["alerts"].append(exception.message) + outage_value["alerts"].append(exception.status_code) # type: ignore outage_value["deployment_ids"].append(deployment_id) outage_value["last_updated_at"] = time.time() + ## MINOR OUTAGE ALERT SENT ## if ( outage_value["minor_alert_sent"] == False and len(outage_value["alerts"]) >= self.alerting_args.minor_outage_alert_threshold ): - msg = "{} {} is having a **Minor Service Outage**.\n\n**Errors**\n{}\n\nLast Check:{}".format( - provider, - region_name, - outage_value["alerts"], - outage_value["last_updated_at"], - ) + msg = f"""\n\n +*⚠️ Minor Service Outage* + +*Model Name:* `{model}` +*Provider:* `{provider}` +*API Base:* `{api_base}` + +*Errors:* +{self._count_outage_alerts(alerts=outage_value["alerts"])} + + +*Last Check:* `{round(time.time() - outage_value["last_updated_at"], 4)}s ago`\n\n +""" # send minor alert _result_val = self.send_alert( message=msg, level="Medium", alert_type="outage_alerts" @@ -798,12 +846,19 @@ class SlackAlerting(CustomLogger): and len(outage_value["alerts"]) >= self.alerting_args.major_outage_alert_threshold ): - msg = "{} {} is having a **Major Service Outage**.\n\n**Errors**\n{}\n\nLast Check:{}".format( - provider, - region_name, - outage_value["alerts"], - outage_value["last_updated_at"], - ) + msg = f"""\n\n +*⚠️ Major Service Outage* + +*Model Name:* `{model}` +*Provider:* `{provider}` +*API Base:* `{api_base}` + +*Errors:* +{self._count_outage_alerts(alerts=outage_value["alerts"])} + + +*Last Check:* `{round(time.time() - outage_value["last_updated_at"], 4)}s ago`\n\n +""" # send minor alert await self.send_alert( message=msg, level="High", alert_type="outage_alerts" @@ -812,7 +867,9 @@ class SlackAlerting(CustomLogger): outage_value["major_alert_sent"] = True ## update cache ## - await self.internal_usage_cache.async_set_cache(key=_id, value=outage_value) + await self.internal_usage_cache.async_set_cache( + key=deployment_id, value=outage_value + ) except Exception as e: pass @@ -1075,8 +1132,6 @@ Model Info: _region_name = "" await self.outage_alerts( - provider=kwargs.get("custom_llm_provider", "") or "", - region_name=_region_name, exception=kwargs["exception"], deployment_id=model_id, ) diff --git a/litellm/main.py b/litellm/main.py index 9ff474af1..7e6919f0e 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -868,6 +868,7 @@ def completion( user=user, optional_params=optional_params, litellm_params=litellm_params, + custom_llm_provider=custom_llm_provider, ) if mock_response: return mock_completion( diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml index 105e00e18..290f2fd70 100644 --- a/litellm/proxy/_super_secret_config.yaml +++ b/litellm/proxy/_super_secret_config.yaml @@ -24,7 +24,7 @@ litellm_settings: general_settings: alerting: ["slack"] - alerting_args: - report_check_interval: 10 - enable_jwt_auth: True + # alerting_args: + # report_check_interval: 10 + # enable_jwt_auth: True diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index d9c85623a..9a0fe07fe 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -3007,7 +3007,7 @@ class ProxyConfig: general_settings["alert_types"] = _general_settings["alert_types"] proxy_logging_obj.alert_types = general_settings["alert_types"] proxy_logging_obj.slack_alerting_instance.update_values( - alert_types=general_settings["alert_types"] + alert_types=general_settings["alert_types"], llm_router=llm_router ) if "alert_to_webhook_url" in _general_settings: @@ -3015,7 +3015,8 @@ class ProxyConfig: "alert_to_webhook_url" ] proxy_logging_obj.slack_alerting_instance.update_values( - alert_to_webhook_url=general_settings["alert_to_webhook_url"] + alert_to_webhook_url=general_settings["alert_to_webhook_url"], + llm_router=llm_router, ) async def _update_general_settings(self, db_general_settings: Optional[Json]): @@ -3583,6 +3584,9 @@ async def startup_event(): ## Error Tracking ## error_tracking() + ## UPDATE SLACK ALERTING ## + proxy_logging_obj.slack_alerting_instance.update_values(llm_router=llm_router) + db_writer_client = HTTPHandler() proxy_logging_obj._init_litellm_callbacks() # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made diff --git a/litellm/router.py b/litellm/router.py index 4c3361051..d9563877c 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -3876,13 +3876,13 @@ class Router: _api_base = litellm.get_api_base( model=_model_name, optional_params=temp_litellm_params ) - asyncio.create_task( - proxy_logging_obj.slack_alerting_instance.send_alert( - message=f"Router: Cooling down Deployment:\nModel Name: `{_model_name}`\nAPI Base: `{_api_base}`\nCooldown Time: `{cooldown_time} seconds`\nException Status Code: `{str(exception_status)}`\n\nChange 'cooldown_time' + 'allowed_fails' under 'Router Settings' on proxy UI, or via config - https://docs.litellm.ai/docs/proxy/reliability#fallbacks--retries--timeouts--cooldowns", - alert_type="cooldown_deployment", - level="Low", - ) - ) + # asyncio.create_task( + # proxy_logging_obj.slack_alerting_instance.send_alert( + # message=f"Router: Cooling down Deployment:\nModel Name: `{_model_name}`\nAPI Base: `{_api_base}`\nCooldown Time: `{cooldown_time} seconds`\nException Status Code: `{str(exception_status)}`\n\nChange 'cooldown_time' + 'allowed_fails' under 'Router Settings' on proxy UI, or via config - https://docs.litellm.ai/docs/proxy/reliability#fallbacks--retries--timeouts--cooldowns", + # alert_type="cooldown_deployment", + # level="Low", + # ) + # ) except Exception as e: pass diff --git a/litellm/tests/test_alerting.py b/litellm/tests/test_alerting.py index 703d26137..b3292904a 100644 --- a/litellm/tests/test_alerting.py +++ b/litellm/tests/test_alerting.py @@ -572,6 +572,8 @@ async def test_outage_alerting_called( num_retries=0, allowed_fails=100, ) + + slack_alerting.update_values(llm_router=router) with patch.object( slack_alerting, "outage_alerts", new=AsyncMock() ) as mock_send_alert: diff --git a/litellm/utils.py b/litellm/utils.py index 1ed70a942..8ef527642 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6286,7 +6286,9 @@ def get_model_region( return None -def get_api_base(model: str, optional_params: dict) -> Optional[str]: +def get_api_base( + model: str, optional_params: Union[dict, LiteLLM_Params] +) -> Optional[str]: """ Returns the api base used for calling the model. @@ -6306,7 +6308,9 @@ def get_api_base(model: str, optional_params: dict) -> Optional[str]: """ try: - if "model" in optional_params: + if isinstance(optional_params, LiteLLM_Params): + _optional_params = optional_params + elif "model" in optional_params: _optional_params = LiteLLM_Params(**optional_params) else: # prevent needing to copy and pop the dict _optional_params = LiteLLM_Params( @@ -6699,6 +6703,8 @@ def get_llm_provider( Returns the provider for a given model name - e.g. 'azure/chatgpt-v-2' -> 'azure' For router -> Can also give the whole litellm param dict -> this function will extract the relevant details + + Raises Error - if unable to map model to a provider """ try: ## IF LITELLM PARAMS GIVEN ## From e8df9c4041662e01b158ac51b50db10ba2023bea Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Fri, 24 May 2024 19:12:09 -0700 Subject: [PATCH 4/4] fix(factory.py): fix linting error --- litellm/llms/prompt_templates/factory.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index d86c19c5c..41ecb486c 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -126,7 +126,7 @@ def convert_to_ollama_image(openai_image_url: str): else: base64_data = openai_image_url - return base64_data; + return base64_data except Exception as e: if "Error: Unable to fetch image from URL" in str(e): raise e @@ -134,6 +134,7 @@ def convert_to_ollama_image(openai_image_url: str): """Image url not in expected format. Example Expected input - "image_url": "data:image/jpeg;base64,{base64_image}". """ ) + def ollama_pt( model, messages ): # https://github.com/ollama/ollama/blob/af4cf55884ac54b9e637cd71dadfe9b7a5685877/docs/modelfile.md#template @@ -166,7 +167,9 @@ def ollama_pt( if element["type"] == "text": prompt += element["text"] elif element["type"] == "image_url": - base64_image = convert_to_ollama_image(element["image_url"]["url"]) + base64_image = convert_to_ollama_image( + element["image_url"]["url"] + ) images.append(base64_image) return {"prompt": prompt, "images": images} else: @@ -1528,11 +1531,12 @@ def _gemini_vision_convert_messages(messages: list): raise Exception( "gemini image conversion failed please run `pip install Pillow`" ) - + if "base64" in img: # Case 2: Base64 image data import base64 import io + # Extract the base64 image data base64_data = img.split("base64,")[1]