feat(proxy_server.py): schedule slack daily report if enabled

if user enabled daily_reports, send them a slack report every 12 hours
2025-04-26 03:04:13 +00:00 · 2024-05-06 18:25:48 -07:00 · 2024-05-06 18:25:48 -07:00 · 6b9b4f05ba
commit 6b9b4f05ba
parent 718f423d7d
4 changed files with 114 additions and 35 deletions
--- a/litellm/integrations/slack_alerting.py
+++ b/litellm/integrations/slack_alerting.py
@ -17,6 +17,7 @@ from pydantic import BaseModel
 from enum import Enum
 from datetime import datetime as dt, timedelta
 from litellm.integrations.custom_logger import CustomLogger
+import random


 class LiteLLMBase(BaseModel):
@ -32,8 +33,9 @@ class LiteLLMBase(BaseModel):
            return self.dict()


-class SlackArgs(LiteLLMBase):
+class SlackAlertingArgs(LiteLLMBase):
    daily_report_frequency: int = 12 * 60 * 60  # 12 hours
+    report_check_interval: int = 5 * 60  # 5 minutes


 class DeploymentMetrics(LiteLLMBase):
@ -63,6 +65,7 @@ class SlackAlertingCacheKeys(Enum):

    failed_requests_key = "failed_requests_daily_metrics"
    latency_key = "latency_daily_metrics"
+    report_sent_key = "daily_metrics_report_sent"


 class SlackAlerting(CustomLogger):
@ -94,6 +97,7 @@ class SlackAlerting(CustomLogger):
        alert_to_webhook_url: Optional[
            Dict
        ] = None,  # if user wants to separate alerts to diff channels
+        alerting_args={},
    ):
        self.alerting_threshold = alerting_threshold
        self.alerting = alerting
@ -102,6 +106,7 @@ class SlackAlerting(CustomLogger):
        self.async_http_handler = AsyncHTTPHandler()
        self.alert_to_webhook_url = alert_to_webhook_url
        self.is_running = False
+        self.alerting_args = SlackAlertingArgs(**alerting_args)

    def update_values(
        self,
@ -109,6 +114,7 @@ class SlackAlerting(CustomLogger):
        alerting_threshold: Optional[float] = None,
        alert_types: Optional[List] = None,
        alert_to_webhook_url: Optional[Dict] = None,
+        alerting_args: Optional[Dict] = None,
    ):
        if alerting is not None:
            self.alerting = alerting
@ -116,7 +122,8 @@ class SlackAlerting(CustomLogger):
            self.alerting_threshold = alerting_threshold
        if alert_types is not None:
            self.alert_types = alert_types
-
+        if alerting_args is not None:
+            self.alerting_args = SlackAlertingArgs(**alerting_args)
        if alert_to_webhook_url is not None:
            # update the dict
            if self.alert_to_webhook_url is None:
@ -356,7 +363,7 @@ class SlackAlerting(CustomLogger):

        # find top 5 slowest
        # Replace None values with a placeholder value (-1 in this case)
-        placeholder_value = -1
+        placeholder_value = 0
        replaced_slowest_values = [
            value if value is not None else placeholder_value
            for value in latency_values
@ -406,8 +413,8 @@ class SlackAlerting(CustomLogger):
                    _deployment["litellm_params"] if _deployment is not None else {}
                ),
            )
-            value = replaced_slowest_values[top_5_slowest[i]]
-            message += f"\t{i+1}. Deployment: `{deployment_name}`, Latency: `{value}`,  API Base: `{api_base}`\n\n"
+            value = round(replaced_slowest_values[top_5_slowest[i]], 3)
+            message += f"\t{i+1}. Deployment: `{deployment_name}`, Latency per output token: `{value}s/token`,  API Base: `{api_base}`\n\n"

        # cache cleanup -> reset values to 0
        latency_cache_keys = [(key, 0) for key in latency_keys]
@ -698,15 +705,18 @@ class SlackAlerting(CustomLogger):

    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
        """Log deployment latency"""
-        model_id = kwargs.get("litellm_params", {}).get("model_info", {}).get("id", "")
-        response_ms: timedelta = end_time - start_time
+        if "daily_reports" in self.alert_types:
+            model_id = (
+                kwargs.get("litellm_params", {}).get("model_info", {}).get("id", "")
+            )
+            response_s: timedelta = end_time - start_time

-        final_value = response_ms
+            final_value = response_s
            total_tokens = 0

            if isinstance(response_obj, litellm.ModelResponse):
                completion_tokens = response_obj.usage.completion_tokens
-            final_value = float(response_ms.total_seconds() / completion_tokens)
+                final_value = float(response_s.total_seconds() / completion_tokens)

            await self.async_update_daily_reports(
                DeploymentMetrics(
@ -719,7 +729,10 @@ class SlackAlerting(CustomLogger):

    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
        """Log failure + deployment latency"""
-        model_id = kwargs.get("litellm_params", {}).get("model_info", {}).get("id", "")
+        if "daily_reports" in self.alert_types:
+            model_id = (
+                kwargs.get("litellm_params", {}).get("model_info", {}).get("id", "")
+            )
            await self.async_update_daily_reports(
                DeploymentMetrics(
                    id=model_id,
@ -728,3 +741,46 @@ class SlackAlerting(CustomLogger):
                    updated_at=litellm.utils.get_utc_datetime(),
                )
            )
+
+    async def _run_scheduled_daily_report(self, llm_router: Optional[litellm.Router]):
+        """
+        If 'daily_reports' enabled
+
+        Ping redis cache every 5 minutes to check if we should send the report
+
+        If yes -> call send_daily_report()
+        """
+        if llm_router is None or self.alert_types is None:
+            return
+
+        if "daily_reports" in self.alert_types:
+            while True:
+                report_sent = await self.internal_usage_cache.async_get_cache(
+                    key=SlackAlertingCacheKeys.report_sent_key.value
+                )  # None | datetime
+
+                if report_sent is None:
+                    await self.internal_usage_cache.async_set_cache(
+                        key=SlackAlertingCacheKeys.report_sent_key.value,
+                        value=litellm.utils.get_utc_datetime(),
+                    )
+                else:
+                    # check if current time - interval >= time last sent
+                    current_time = litellm.utils.get_utc_datetime()
+                    delta = current_time - timedelta(
+                        seconds=self.alerting_args.daily_report_frequency
+                    )
+                    if delta >= report_sent:
+                        # Sneak in the reporting logic here
+                        await self.send_daily_reports(router=llm_router)
+                        # Also, don't forget to update the report_sent time after sending the report!
+                        await self.internal_usage_cache.async_set_cache(
+                            key=SlackAlertingCacheKeys.report_sent_key.value,
+                            value=litellm.utils.get_utc_datetime(),
+                        )
+                interval = random.randint(
+                    self.alerting_args.report_check_interval - 3,
+                    self.alerting_args.report_check_interval + 3,
+                )  # shuffle to prevent collisions
+                await asyncio.sleep(interval)
+        return
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@ -4,6 +4,16 @@ model_list:
    api_key: my-fake-key
    model: openai/my-fake-model
  model_name: fake-openai-endpoint
+- litellm_params:
+    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
+    api_key: my-fake-key-2
+    model: openai/my-fake-model-2
+  model_name: fake-openai-endpoint
+- litellm_params:
+    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
+    api_key: my-fake-key-3
+    model: openai/my-fake-model-3
+  model_name: fake-openai-endpoint
 router_settings:
  num_retries: 0
  enable_pre_call_checks: true
@ -20,3 +30,6 @@ litellm_settings:
 general_settings:
  alerting: ["slack"]
  alert_types: ["llm_exceptions", "daily_reports"]
+  alerting_args:
+    daily_report_frequency: 60 # every minute
+    report_check_interval: 5 # every 5s 
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1900,9 +1900,6 @@ async def _run_background_health_check():
        await asyncio.sleep(health_check_interval)


-semaphore = asyncio.Semaphore(1)
-
-
 class ProxyConfig:
    """
    Abstraction class on top of config loading/updating logic. Gives us one place to control all config updating logic.
@ -2377,6 +2374,7 @@ class ProxyConfig:
                alerting=general_settings.get("alerting", None),
                alerting_threshold=general_settings.get("alerting_threshold", 600),
                alert_types=general_settings.get("alert_types", None),
+                alerting_args=general_settings.get("alerting_args", None),
                redis_cache=redis_usage_cache,
            )
            ### CONNECT TO DATABASE ###
@ -2501,7 +2499,7 @@ class ProxyConfig:
            for k, v in router_settings.items():
                if k in available_args:
                    router_params[k] = v
-        router = litellm.Router(**router_params, semaphore=semaphore)  # type:ignore
+        router = litellm.Router(**router_params)  # type:ignore
        return router, model_list, general_settings

    def get_model_info_with_id(self, model) -> RouterModelInfo:
@ -3273,6 +3271,13 @@ async def startup_event():

    proxy_logging_obj._init_litellm_callbacks()  # INITIALIZE LITELLM CALLBACKS ON SERVER STARTUP <- do this to catch any logging errors on startup, not when calls are being made

+    if "daily_reports" in proxy_logging_obj.slack_alerting_instance.alert_types:
+        asyncio.create_task(
+            proxy_logging_obj.slack_alerting_instance._run_scheduled_daily_report(
+                llm_router=llm_router
+            )
+        )  # RUN DAILY REPORT (if scheduled)
+
    ## JWT AUTH ##
    if general_settings.get("litellm_jwtauth", None) is not None:
        for k, v in general_settings["litellm_jwtauth"].items():
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -107,6 +107,7 @@ class ProxyLogging:
                ]
            ]
        ] = None,
+        alerting_args: Optional[dict] = None,
    ):
        self.alerting = alerting
        if alerting_threshold is not None:
@ -118,8 +119,12 @@ class ProxyLogging:
            alerting=self.alerting,
            alerting_threshold=self.alerting_threshold,
            alert_types=self.alert_types,
+            alerting_args=alerting_args,
        )

+        if "daily_reports" in self.alert_types:
+            litellm.callbacks.append(self.slack_alerting_instance)  # type: ignore
+
        if redis_cache is not None:
            self.internal_usage_cache.redis_cache = redis_cache