diff --git a/docs/my-website/docs/proxy/config_settings.md b/docs/my-website/docs/proxy/config_settings.md
index ea5d104a71..4c54aebc66 100644
--- a/docs/my-website/docs/proxy/config_settings.md
+++ b/docs/my-website/docs/proxy/config_settings.md
@@ -452,6 +452,7 @@ router_settings:
| OTEL_HEADERS | Headers for OpenTelemetry requests
| OTEL_SERVICE_NAME | Service name identifier for OpenTelemetry
| OTEL_TRACER_NAME | Tracer name for OpenTelemetry tracing
+| PAGERDUTY_API_KEY | API key for PagerDuty Alerting
| POD_NAME | Pod name for the server, this will be [emitted to `datadog` logs](https://docs.litellm.ai/docs/proxy/logging#datadog) as `POD_NAME`
| PREDIBASE_API_BASE | Base URL for Predibase API
| PRESIDIO_ANALYZER_API_BASE | Base URL for Presidio Analyzer service
diff --git a/docs/my-website/docs/proxy/pagerduty.md b/docs/my-website/docs/proxy/pagerduty.md
new file mode 100644
index 0000000000..70686deebd
--- /dev/null
+++ b/docs/my-website/docs/proxy/pagerduty.md
@@ -0,0 +1,106 @@
+import Image from '@theme/IdealImage';
+
+# PagerDuty Alerting
+
+:::info
+
+✨ PagerDuty Alerting is on LiteLLM Enterprise
+
+[Enterprise Pricing](https://www.litellm.ai/#pricing)
+
+[Get free 7-day trial key](https://www.litellm.ai/#trial)
+
+:::
+
+Handles two types of alerts:
+- High LLM API Failure Rate. Configure X fails in Y seconds to trigger an alert.
+- High Number of Hanging LLM Requests. Configure X hangs in Y seconds to trigger an alert.
+
+
+## Quick Start
+
+1. Set `PAGERDUTY_API_KEY="d8bxxxxx"` in your environment variables.
+
+```
+PAGERDUTY_API_KEY="d8bxxxxx"
+```
+
+2. Set PagerDuty Alerting in your config file.
+
+```yaml
+model_list:
+ - model_name: "openai/*"
+ litellm_params:
+ model: "openai/*"
+ api_key: os.environ/OPENAI_API_KEY
+
+general_settings:
+ alerting: ["pagerduty"]
+ alerting_args:
+ failure_threshold: 1 # Number of requests failing in a window
+ failure_threshold_window_seconds: 10 # Window in seconds
+
+ # Requests hanging threshold
+ hanging_threshold_seconds: 0.0000001 # Number of seconds of waiting for a response before a request is considered hanging
+ hanging_threshold_window_seconds: 10 # Window in seconds
+```
+
+
+3. Test it
+
+
+Start LiteLLM Proxy
+
+```shell
+litellm --config config.yaml
+```
+
+### LLM API Failure Alert
+Try sending a bad request to proxy
+
+```shell
+curl -i --location 'http://0.0.0.0:4000/chat/completions' \
+--header 'Content-Type: application/json' \
+--header 'Authorization: Bearer sk-1234' \
+--data ' {
+ "model": "gpt-4o",
+ "user": "hi",
+ "messages": [
+ {
+ "role": "user",
+ "bad_param": "i like coffee"
+ }
+ ]
+ }
+'
+```
+
+
+
+### LLM Hanging Alert
+
+Try sending a hanging request to proxy
+
+Since our hanging threshold is 0.0000001 seconds, you should see an alert.
+
+```shell
+curl -i --location 'http://0.0.0.0:4000/chat/completions' \
+--header 'Content-Type: application/json' \
+--header 'Authorization: Bearer sk-1234' \
+--data ' {
+ "model": "gpt-4o",
+ "user": "hi",
+ "messages": [
+ {
+ "role": "user",
+ "content": "i like coffee"
+ }
+ ]
+ }
+'
+```
+
+
+
+
+
diff --git a/docs/my-website/img/pagerduty_fail.png b/docs/my-website/img/pagerduty_fail.png
new file mode 100644
index 0000000000..0889557ce2
Binary files /dev/null and b/docs/my-website/img/pagerduty_fail.png differ
diff --git a/docs/my-website/img/pagerduty_hanging.png b/docs/my-website/img/pagerduty_hanging.png
new file mode 100644
index 0000000000..ea5c75dcd8
Binary files /dev/null and b/docs/my-website/img/pagerduty_hanging.png differ
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 0b1ee925ab..a3d1c602ed 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -118,7 +118,13 @@ const sidebars = {
{
type: "category",
label: "Logging, Alerting, Metrics",
- items: ["proxy/logging", "proxy/logging_spec", "proxy/team_logging","proxy/alerting", "proxy/prometheus"],
+ items: [
+ "proxy/logging",
+ "proxy/logging_spec",
+ "proxy/team_logging",
+ "proxy/prometheus",
+ "proxy/alerting",
+ "proxy/pagerduty"],
},
{
type: "category",
diff --git a/litellm/__init__.py b/litellm/__init__.py
index 59af448855..1e2c1c4e75 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -74,6 +74,7 @@ _custom_logger_compatible_callbacks_literal = Literal[
"argilla",
"mlflow",
"langfuse",
+ "pagerduty",
"humanloop",
]
logged_real_time_event_types: Optional[Union[List[str], Literal["*"]]] = None
diff --git a/litellm/integrations/SlackAlerting/slack_alerting.py b/litellm/integrations/SlackAlerting/slack_alerting.py
index 3c71332de7..526610f0af 100644
--- a/litellm/integrations/SlackAlerting/slack_alerting.py
+++ b/litellm/integrations/SlackAlerting/slack_alerting.py
@@ -6,7 +6,7 @@ import os
import random
import time
from datetime import timedelta
-from typing import Any, Dict, List, Literal, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union
from openai import APIError
@@ -25,13 +25,19 @@ from litellm.llms.custom_httpx.http_handler import (
httpxSpecialProvider,
)
from litellm.proxy._types import AlertType, CallInfo, VirtualKeyEvent, WebhookEvent
-from litellm.router import Router
from litellm.types.integrations.slack_alerting import *
from ..email_templates.templates import *
from .batching_handler import send_to_webhook, squash_payloads
from .utils import _add_langfuse_trace_id_to_alert, process_slack_alerting_variables
+if TYPE_CHECKING:
+ from litellm.router import Router as _Router
+
+ Router = _Router
+else:
+ Router = Any
+
class SlackAlerting(CustomBatchLogger):
"""
@@ -465,18 +471,10 @@ class SlackAlerting(CustomBatchLogger):
self.alerting_threshold
) # Set it to 5 minutes - i'd imagine this might be different for streaming, non-streaming, non-completion (embedding + img) requests
alerting_metadata: dict = {}
- if (
- request_data is not None
- and request_data.get("litellm_status", "") != "success"
- and request_data.get("litellm_status", "") != "fail"
- ):
- ## CHECK IF CACHE IS UPDATED
- litellm_call_id = request_data.get("litellm_call_id", "")
- status: Optional[str] = await self.internal_usage_cache.async_get_cache(
- key="request_status:{}".format(litellm_call_id), local_only=True
- )
- if status is not None and (status == "success" or status == "fail"):
- return
+ if await self._request_is_completed(request_data=request_data) is True:
+ return
+
+ if request_data is not None:
if request_data.get("deployment", None) is not None and isinstance(
request_data["deployment"], dict
):
@@ -1753,3 +1751,23 @@ Model Info:
)
return
+
+ async def _request_is_completed(self, request_data: Optional[dict]) -> bool:
+ """
+ Returns True if the request is completed - either as a success or failure
+ """
+ if request_data is None:
+ return False
+
+ if (
+ request_data.get("litellm_status", "") != "success"
+ and request_data.get("litellm_status", "") != "fail"
+ ):
+ ## CHECK IF CACHE IS UPDATED
+ litellm_call_id = request_data.get("litellm_call_id", "")
+ status: Optional[str] = await self.internal_usage_cache.async_get_cache(
+ key="request_status:{}".format(litellm_call_id), local_only=True
+ )
+ if status is not None and (status == "success" or status == "fail"):
+ return True
+ return False
diff --git a/litellm/integrations/SlackAlerting/utils.py b/litellm/integrations/SlackAlerting/utils.py
index 87e78afa90..0dc8bae5a6 100644
--- a/litellm/integrations/SlackAlerting/utils.py
+++ b/litellm/integrations/SlackAlerting/utils.py
@@ -3,12 +3,18 @@ Utils used for slack alerting
"""
import asyncio
-from typing import Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
-from litellm.litellm_core_utils.litellm_logging import Logging
from litellm.proxy._types import AlertType
from litellm.secret_managers.main import get_secret
+if TYPE_CHECKING:
+ from litellm.litellm_core_utils.litellm_logging import Logging as _Logging
+
+ Logging = _Logging
+else:
+ Logging = Any
+
def process_slack_alerting_variables(
alert_to_webhook_url: Optional[Dict[AlertType, Union[List[str], str]]]
diff --git a/litellm/integrations/pagerduty/pagerduty.py b/litellm/integrations/pagerduty/pagerduty.py
new file mode 100644
index 0000000000..2eeb318c9d
--- /dev/null
+++ b/litellm/integrations/pagerduty/pagerduty.py
@@ -0,0 +1,303 @@
+"""
+PagerDuty Alerting Integration
+
+Handles two types of alerts:
+- High LLM API Failure Rate. Configure X fails in Y seconds to trigger an alert.
+- High Number of Hanging LLM Requests. Configure X hangs in Y seconds to trigger an alert.
+"""
+
+import asyncio
+import os
+from datetime import datetime, timedelta, timezone
+from typing import List, Literal, Optional, Union
+
+from litellm._logging import verbose_logger
+from litellm.caching import DualCache
+from litellm.integrations.SlackAlerting.slack_alerting import SlackAlerting
+from litellm.llms.custom_httpx.http_handler import (
+ AsyncHTTPHandler,
+ get_async_httpx_client,
+ httpxSpecialProvider,
+)
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.types.integrations.pagerduty import (
+ AlertingConfig,
+ PagerDutyInternalEvent,
+ PagerDutyPayload,
+ PagerDutyRequestBody,
+)
+from litellm.types.utils import (
+ StandardLoggingPayload,
+ StandardLoggingPayloadErrorInformation,
+)
+
+PAGERDUTY_DEFAULT_FAILURE_THRESHOLD = 60
+PAGERDUTY_DEFAULT_FAILURE_THRESHOLD_WINDOW_SECONDS = 60
+PAGERDUTY_DEFAULT_HANGING_THRESHOLD_SECONDS = 60
+PAGERDUTY_DEFAULT_HANGING_THRESHOLD_WINDOW_SECONDS = 600
+
+
+class PagerDutyAlerting(SlackAlerting):
+ """
+ Tracks failed requests and hanging requests separately.
+ If threshold is crossed for either type, triggers a PagerDuty alert.
+ """
+
+ def __init__(
+ self, alerting_args: Optional[Union[AlertingConfig, dict]] = None, **kwargs
+ ):
+ from litellm.proxy.proxy_server import CommonProxyErrors, premium_user
+
+ super().__init__()
+ _api_key = os.getenv("PAGERDUTY_API_KEY")
+ if not _api_key:
+ raise ValueError("PAGERDUTY_API_KEY is not set")
+
+ self.api_key: str = _api_key
+ alerting_args = alerting_args or {}
+ self.alerting_args: AlertingConfig = AlertingConfig(
+ failure_threshold=alerting_args.get(
+ "failure_threshold", PAGERDUTY_DEFAULT_FAILURE_THRESHOLD
+ ),
+ failure_threshold_window_seconds=alerting_args.get(
+ "failure_threshold_window_seconds",
+ PAGERDUTY_DEFAULT_FAILURE_THRESHOLD_WINDOW_SECONDS,
+ ),
+ hanging_threshold_seconds=alerting_args.get(
+ "hanging_threshold_seconds", PAGERDUTY_DEFAULT_HANGING_THRESHOLD_SECONDS
+ ),
+ hanging_threshold_window_seconds=alerting_args.get(
+ "hanging_threshold_window_seconds",
+ PAGERDUTY_DEFAULT_HANGING_THRESHOLD_WINDOW_SECONDS,
+ ),
+ )
+
+ # Separate storage for failures vs. hangs
+ self._failure_events: List[PagerDutyInternalEvent] = []
+ self._hanging_events: List[PagerDutyInternalEvent] = []
+
+ # premium user check
+ if premium_user is not True:
+ raise ValueError(
+ f"PagerDutyAlerting is only available for LiteLLM Enterprise users. {CommonProxyErrors.not_premium_user.value}"
+ )
+
+ # ------------------ MAIN LOGIC ------------------ #
+
+ async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
+ """
+ Record a failure event. Only send an alert to PagerDuty if the
+ configured *failure* threshold is exceeded in the specified window.
+ """
+ now = datetime.now(timezone.utc)
+ standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
+ "standard_logging_object"
+ )
+ if not standard_logging_payload:
+ raise ValueError(
+ "standard_logging_object is required for PagerDutyAlerting"
+ )
+
+ # Extract error details
+ error_info: Optional[StandardLoggingPayloadErrorInformation] = (
+ standard_logging_payload.get("error_information") or {}
+ )
+ _meta = standard_logging_payload.get("metadata") or {}
+
+ self._failure_events.append(
+ PagerDutyInternalEvent(
+ failure_event_type="failed_response",
+ timestamp=now,
+ error_class=error_info.get("error_class"),
+ error_code=error_info.get("error_code"),
+ error_llm_provider=error_info.get("llm_provider"),
+ user_api_key_hash=_meta.get("user_api_key_hash"),
+ user_api_key_alias=_meta.get("user_api_key_alias"),
+ user_api_key_org_id=_meta.get("user_api_key_org_id"),
+ user_api_key_team_id=_meta.get("user_api_key_team_id"),
+ user_api_key_user_id=_meta.get("user_api_key_user_id"),
+ user_api_key_team_alias=_meta.get("user_api_key_team_alias"),
+ user_api_key_end_user_id=_meta.get("user_api_key_end_user_id"),
+ )
+ )
+
+ # Prune + Possibly alert
+ window_seconds = self.alerting_args.get("failure_threshold_window_seconds", 60)
+ threshold = self.alerting_args.get("failure_threshold", 1)
+
+ # If threshold is crossed, send PD alert for failures
+ await self._send_alert_if_thresholds_crossed(
+ events=self._failure_events,
+ window_seconds=window_seconds,
+ threshold=threshold,
+ alert_prefix="High LLM API Failure Rate",
+ )
+
+ async def async_pre_call_hook(
+ self,
+ user_api_key_dict: UserAPIKeyAuth,
+ cache: DualCache,
+ data: dict,
+ call_type: Literal[
+ "completion",
+ "text_completion",
+ "embeddings",
+ "image_generation",
+ "moderation",
+ "audio_transcription",
+ "pass_through_endpoint",
+ "rerank",
+ ],
+ ) -> Optional[Union[Exception, str, dict]]:
+ """
+ Example of detecting hanging requests by waiting a given threshold.
+ If the request didn't finish by then, we treat it as 'hanging'.
+ """
+ verbose_logger.info("Inside Proxy Logging Pre-call hook!")
+ asyncio.create_task(
+ self.hanging_response_handler(
+ request_data=data, user_api_key_dict=user_api_key_dict
+ )
+ )
+ return None
+
+ async def hanging_response_handler(
+ self, request_data: Optional[dict], user_api_key_dict: UserAPIKeyAuth
+ ):
+ """
+ Checks if request completed by the time 'hanging_threshold_seconds' elapses.
+ If not, we classify it as a hanging request.
+ """
+ verbose_logger.debug(
+ f"Inside Hanging Response Handler!..sleeping for {self.alerting_args.get('hanging_threshold_seconds', PAGERDUTY_DEFAULT_HANGING_THRESHOLD_SECONDS)} seconds"
+ )
+ await asyncio.sleep(
+ self.alerting_args.get(
+ "hanging_threshold_seconds", PAGERDUTY_DEFAULT_HANGING_THRESHOLD_SECONDS
+ )
+ )
+
+ if await self._request_is_completed(request_data=request_data):
+ return # It's not hanging if completed
+
+ # Otherwise, record it as hanging
+ self._hanging_events.append(
+ PagerDutyInternalEvent(
+ failure_event_type="hanging_response",
+ timestamp=datetime.now(timezone.utc),
+ error_class="HangingRequest",
+ error_code="HangingRequest",
+ error_llm_provider="HangingRequest",
+ user_api_key_hash=user_api_key_dict.api_key,
+ user_api_key_alias=user_api_key_dict.key_alias,
+ user_api_key_org_id=user_api_key_dict.org_id,
+ user_api_key_team_id=user_api_key_dict.team_id,
+ user_api_key_user_id=user_api_key_dict.user_id,
+ user_api_key_team_alias=user_api_key_dict.team_alias,
+ user_api_key_end_user_id=user_api_key_dict.end_user_id,
+ )
+ )
+
+ # Prune + Possibly alert
+ window_seconds = self.alerting_args.get(
+ "hanging_threshold_window_seconds",
+ PAGERDUTY_DEFAULT_HANGING_THRESHOLD_WINDOW_SECONDS,
+ )
+ threshold: int = self.alerting_args.get(
+ "hanging_threshold_fails", PAGERDUTY_DEFAULT_HANGING_THRESHOLD_SECONDS
+ )
+
+ # If threshold is crossed, send PD alert for hangs
+ await self._send_alert_if_thresholds_crossed(
+ events=self._hanging_events,
+ window_seconds=window_seconds,
+ threshold=threshold,
+ alert_prefix="High Number of Hanging LLM Requests",
+ )
+
+ # ------------------ HELPERS ------------------ #
+
+ async def _send_alert_if_thresholds_crossed(
+ self,
+ events: List[PagerDutyInternalEvent],
+ window_seconds: int,
+ threshold: int,
+ alert_prefix: str,
+ ):
+ """
+ 1. Prune old events
+ 2. If threshold is reached, build alert, send to PagerDuty
+ 3. Clear those events
+ """
+ cutoff = datetime.now(timezone.utc) - timedelta(seconds=window_seconds)
+ pruned = [e for e in events if e.get("timestamp", datetime.min) > cutoff]
+
+ # Update the reference list
+ events.clear()
+ events.extend(pruned)
+
+ # Check threshold
+ verbose_logger.debug(
+ f"Have {len(events)} events in the last {window_seconds} seconds. Threshold is {threshold}"
+ )
+ if len(events) >= threshold:
+ # Build short summary of last N events
+ error_summaries = self._build_error_summaries(events, max_errors=5)
+ alert_message = (
+ f"{alert_prefix}: {len(events)} in the last {window_seconds} seconds."
+ )
+ custom_details = {"recent_errors": error_summaries}
+
+ await self.send_alert_to_pagerduty(
+ alert_message=alert_message,
+ custom_details=custom_details,
+ )
+
+ # Clear them after sending an alert, so we don't spam
+ events.clear()
+
+ def _build_error_summaries(
+ self, events: List[PagerDutyInternalEvent], max_errors: int = 5
+ ) -> List[PagerDutyInternalEvent]:
+ """
+ Build short text summaries for the last `max_errors`.
+ Example: "ValueError (code: 500, provider: openai)"
+ """
+ recent = events[-max_errors:]
+ summaries = []
+ for fe in recent:
+ # If any of these is None, show "N/A" to avoid messing up the summary string
+ fe.pop("timestamp")
+ summaries.append(fe)
+ return summaries
+
+ async def send_alert_to_pagerduty(self, alert_message: str, custom_details: dict):
+ """
+ Send [critical] Alert to PagerDuty
+
+ https://developer.pagerduty.com/api-reference/YXBpOjI3NDgyNjU-pager-duty-v2-events-api
+ """
+ try:
+ verbose_logger.debug(f"Sending alert to PagerDuty: {alert_message}")
+ async_client: AsyncHTTPHandler = get_async_httpx_client(
+ llm_provider=httpxSpecialProvider.LoggingCallback
+ )
+ payload: PagerDutyRequestBody = PagerDutyRequestBody(
+ payload=PagerDutyPayload(
+ summary=alert_message,
+ severity="critical",
+ source="LiteLLM Alert",
+ component="LiteLLM",
+ custom_details=custom_details,
+ ),
+ routing_key=self.api_key,
+ event_action="trigger",
+ )
+
+ return await async_client.post(
+ url="https://events.pagerduty.com/v2/enqueue",
+ json=dict(payload),
+ headers={"Content-Type": "application/json"},
+ )
+ except Exception as e:
+ verbose_logger.exception(f"Error sending alert to PagerDuty: {e}")
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index 9753078fed..97b8799284 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -30,6 +30,7 @@ from litellm.cost_calculator import _select_model_name_for_cost_calc
from litellm.integrations.custom_guardrail import CustomGuardrail
from litellm.integrations.custom_logger import CustomLogger
from litellm.integrations.mlflow import MlflowLogger
+from litellm.integrations.pagerduty.pagerduty import PagerDutyAlerting
from litellm.litellm_core_utils.redact_messages import (
redact_message_input_output_from_custom_logger,
redact_message_input_output_from_logging,
@@ -1992,7 +1993,7 @@ class Logging(LiteLLMLoggingBaseClass):
)
except Exception as e:
verbose_logger.exception(
- "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success \
+ "LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure \
logging {}\nCallback={}".format(
str(e), callback
)
@@ -2163,7 +2164,12 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
llm_router: Optional[
Any
], # expect litellm.Router, but typing errors due to circular import
+ custom_logger_init_args: Optional[dict] = {},
) -> Optional[CustomLogger]:
+ """
+ Initialize a custom logger compatible class
+ """
+ custom_logger_init_args = custom_logger_init_args or {}
if logging_integration == "lago":
for callback in _in_memory_loggers:
if isinstance(callback, LagoLogger):
@@ -2386,6 +2392,13 @@ def _init_custom_logger_compatible_class( # noqa: PLR0915
langfuse_logger = LangfusePromptManagement()
_in_memory_loggers.append(langfuse_logger)
return langfuse_logger # type: ignore
+ elif logging_integration == "pagerduty":
+ for callback in _in_memory_loggers:
+ if isinstance(callback, PagerDutyAlerting):
+ return callback
+ pagerduty_logger = PagerDutyAlerting(**custom_logger_init_args)
+ _in_memory_loggers.append(pagerduty_logger)
+ return pagerduty_logger # type: ignore
elif logging_integration == "humanloop":
for callback in _in_memory_loggers:
if isinstance(callback, HumanloopLogger):
@@ -2509,6 +2522,10 @@ def get_custom_logger_compatible_class( # noqa: PLR0915
for callback in _in_memory_loggers:
if isinstance(callback, MlflowLogger):
return callback
+ elif logging_integration == "pagerduty":
+ for callback in _in_memory_loggers:
+ if isinstance(callback, PagerDutyAlerting):
+ return callback
return None
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 74d2523acd..9f970364b5 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -11,9 +11,21 @@ litellm_settings:
callbacks: ["datadog"]
+general_settings:
+ alerting: ["pagerduty"]
+ alerting_args:
+ failure_threshold: 4 # Number of requests failing in a window
+ failure_threshold_window_seconds: 10 # Window in seconds
+
+ # Requests hanging threshold
+ hanging_threshold_seconds: 0.0000001 # Number of seconds of waiting for a response before a request is considered hanging
+ hanging_threshold_window_seconds: 10 # Window in seconds
+
+
# For /fine_tuning/jobs endpoints
finetune_settings:
- custom_llm_provider: "vertex_ai"
vertex_project: "adroit-crow-413218"
vertex_location: "us-central1"
- vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json"
\ No newline at end of file
+ vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json"
+
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index ad8408001b..d0d6621cca 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1939,15 +1939,7 @@ class ProxyConfig:
use_azure_key_vault = general_settings.get("use_azure_key_vault", False)
load_from_azure_key_vault(use_azure_key_vault=use_azure_key_vault)
### ALERTING ###
-
- proxy_logging_obj.update_values(
- alerting=general_settings.get("alerting", None),
- alerting_threshold=general_settings.get("alerting_threshold", 600),
- alert_types=general_settings.get("alert_types", None),
- alert_to_webhook_url=general_settings.get("alert_to_webhook_url", None),
- alerting_args=general_settings.get("alerting_args", None),
- redis_cache=redis_usage_cache,
- )
+ self._load_alerting_settings(general_settings=general_settings)
### CONNECT TO DATABASE ###
database_url = general_settings.get("database_url", None)
if database_url and database_url.startswith("os.environ/"):
@@ -2135,6 +2127,46 @@ class ProxyConfig:
)
return router, router.get_model_list(), general_settings
+ def _load_alerting_settings(self, general_settings: dict):
+ """
+ Initialize alerting settings
+ """
+ from litellm.litellm_core_utils.litellm_logging import (
+ _init_custom_logger_compatible_class,
+ )
+
+ _alerting_callbacks = general_settings.get("alerting", None)
+ verbose_proxy_logger.debug(f"_alerting_callbacks: {general_settings}")
+ if _alerting_callbacks is None:
+ return
+ for _alert in _alerting_callbacks:
+ if _alert == "slack":
+ # [OLD] v0 implementation
+ proxy_logging_obj.update_values(
+ alerting=general_settings.get("alerting", None),
+ alerting_threshold=general_settings.get("alerting_threshold", 600),
+ alert_types=general_settings.get("alert_types", None),
+ alert_to_webhook_url=general_settings.get(
+ "alert_to_webhook_url", None
+ ),
+ alerting_args=general_settings.get("alerting_args", None),
+ redis_cache=redis_usage_cache,
+ )
+ else:
+ # [NEW] v1 implementation - init as a custom logger
+ if _alert in litellm._known_custom_logger_compatible_callbacks:
+ _logger = _init_custom_logger_compatible_class(
+ logging_integration=_alert,
+ internal_usage_cache=None,
+ llm_router=None,
+ custom_logger_init_args={
+ "alerting_args": general_settings.get("alerting_args", None)
+ },
+ )
+ if _logger is not None:
+ litellm.callbacks.append(_logger)
+ pass
+
def get_model_info_with_id(self, model, db_model=False) -> RouterModelInfo:
"""
Common logic across add + delete router models
diff --git a/litellm/types/integrations/pagerduty.py b/litellm/types/integrations/pagerduty.py
new file mode 100644
index 0000000000..22fd16654a
--- /dev/null
+++ b/litellm/types/integrations/pagerduty.py
@@ -0,0 +1,62 @@
+from datetime import datetime
+from typing import List, Literal, Optional, TypedDict, Union
+
+from litellm.types.utils import StandardLoggingUserAPIKeyMetadata
+
+
+class LinkDict(TypedDict, total=False):
+ href: str
+ text: Optional[str]
+
+
+class ImageDict(TypedDict, total=False):
+ src: str
+ href: Optional[str]
+ alt: Optional[str]
+
+
+class PagerDutyPayload(TypedDict, total=False):
+ summary: str
+ timestamp: Optional[str] # ISO 8601 date-time format
+ severity: Literal["critical", "warning", "error", "info"]
+ source: str
+ component: Optional[str]
+ group: Optional[str]
+ class_: Optional[str] # Using class_ since 'class' is a reserved keyword
+ custom_details: Optional[dict]
+
+
+class PagerDutyRequestBody(TypedDict, total=False):
+ payload: PagerDutyPayload
+ routing_key: str
+ event_action: Literal["trigger", "acknowledge", "resolve"]
+ dedup_key: Optional[str]
+ client: Optional[str]
+ client_url: Optional[str]
+ links: Optional[List[LinkDict]]
+ images: Optional[List[ImageDict]]
+
+
+class AlertingConfig(TypedDict, total=False):
+ """
+ Config for alerting thresholds
+ """
+
+ # Requests failing threshold
+ failure_threshold: int # Number of requests failing in a window
+ failure_threshold_window_seconds: int # Window in seconds
+
+ # Requests hanging threshold
+ hanging_threshold_seconds: float # Number of seconds of waiting for a response before a request is considered hanging
+ hanging_threshold_fails: int # Number of requests hanging in a window
+ hanging_threshold_window_seconds: int # Window in seconds
+
+
+class PagerDutyInternalEvent(StandardLoggingUserAPIKeyMetadata, total=False):
+ """Simple structure to hold timestamp and error info."""
+
+ failure_event_type: Literal["failed_response", "hanging_response"]
+ timestamp: datetime
+ error_class: Optional[str]
+ error_code: Optional[str]
+ error_llm_provider: Optional[str]
diff --git a/tests/logging_callback_tests/test_pagerduty_alerting.py b/tests/logging_callback_tests/test_pagerduty_alerting.py
new file mode 100644
index 0000000000..00e427d01d
--- /dev/null
+++ b/tests/logging_callback_tests/test_pagerduty_alerting.py
@@ -0,0 +1,96 @@
+import asyncio
+import os
+import random
+import sys
+from datetime import datetime, timedelta
+from typing import Optional
+
+sys.path.insert(0, os.path.abspath("../.."))
+import pytest
+import litellm
+from litellm.integrations.pagerduty.pagerduty import PagerDutyAlerting, AlertingConfig
+from litellm.proxy._types import UserAPIKeyAuth
+
+
+@pytest.mark.asyncio
+async def test_pagerduty_alerting():
+ pagerduty = PagerDutyAlerting(
+ alerting_args=AlertingConfig(
+ failure_threshold=1, failure_threshold_window_seconds=10
+ )
+ )
+ litellm.callbacks = [pagerduty]
+
+ try:
+ await litellm.acompletion(
+ model="gpt-3.5-turbo",
+ messages=[{"role": "user", "content": "hi"}],
+ mock_response="litellm.RateLimitError",
+ )
+ except litellm.RateLimitError:
+ pass
+
+ await asyncio.sleep(2)
+
+
+@pytest.mark.asyncio
+async def test_pagerduty_alerting_high_failure_rate():
+ pagerduty = PagerDutyAlerting(
+ alerting_args=AlertingConfig(
+ failure_threshold=3, failure_threshold_window_seconds=600
+ )
+ )
+ litellm.callbacks = [pagerduty]
+
+ try:
+ await litellm.acompletion(
+ model="gpt-3.5-turbo",
+ messages=[{"role": "user", "content": "hi"}],
+ mock_response="litellm.RateLimitError",
+ )
+ except litellm.RateLimitError:
+ pass
+
+ await asyncio.sleep(2)
+
+ # make 3 more fails
+ for _ in range(3):
+ try:
+ await litellm.acompletion(
+ model="gpt-3.5-turbo",
+ messages=[{"role": "user", "content": "hi"}],
+ mock_response="litellm.RateLimitError",
+ )
+ except litellm.RateLimitError:
+ pass
+
+ await asyncio.sleep(2)
+
+
+@pytest.mark.asyncio
+async def test_pagerduty_hanging_request_alerting():
+ pagerduty = PagerDutyAlerting(
+ alerting_args=AlertingConfig(hanging_threshold_seconds=0.0000001)
+ )
+ litellm.callbacks = [pagerduty]
+
+ await pagerduty.async_pre_call_hook(
+ cache=None,
+ user_api_key_dict=UserAPIKeyAuth(
+ api_key="test",
+ key_alias="test-pagerduty",
+ team_alias="test-team",
+ org_id="test-org",
+ user_id="test-user",
+ end_user_id="test-end-user",
+ ),
+ data={"model": "gpt-4o", "messages": [{"role": "user", "content": "hi"}]},
+ call_type="completion",
+ )
+
+ await litellm.acompletion(
+ model="gpt-4o",
+ messages=[{"role": "user", "content": "hi"}],
+ )
+
+ await asyncio.sleep(1)
diff --git a/tests/logging_callback_tests/test_unit_tests_init_callbacks.py b/tests/logging_callback_tests/test_unit_tests_init_callbacks.py
index 453c4b58ab..f5728b95b2 100644
--- a/tests/logging_callback_tests/test_unit_tests_init_callbacks.py
+++ b/tests/logging_callback_tests/test_unit_tests_init_callbacks.py
@@ -20,6 +20,7 @@ from prometheus_client import REGISTRY, CollectorRegistry
from litellm.integrations.lago import LagoLogger
from litellm.integrations.openmeter import OpenMeterLogger
from litellm.integrations.braintrust_logging import BraintrustLogger
+from litellm.integrations.pagerduty.pagerduty import PagerDutyAlerting
from litellm.integrations.galileo import GalileoObserve
from litellm.integrations.langsmith import LangsmithLogger
from litellm.integrations.literal_ai import LiteralAILogger
@@ -68,6 +69,7 @@ callback_class_str_to_classType = {
"mlflow": MlflowLogger,
"langfuse": LangfusePromptManagement,
"otel": OpenTelemetry,
+ "pagerduty": PagerDutyAlerting,
}
expected_env_vars = {
@@ -87,6 +89,7 @@ expected_env_vars = {
"ARIZE_SPACE_KEY": "arize_space_key",
"ARIZE_API_KEY": "arize_api_key",
"ARGILLA_API_KEY": "argilla_api_key",
+ "PAGERDUTY_API_KEY": "pagerduty_api_key",
}