(fix) slack alerting - don't spam the failed cost tracking alert for the same model (#6543)

* fix use failing_model as cache key for failed_tracking_alert * fix use standard logging payload for getting response cost * fix kwargs.get("response_cost") * fix getting response cost
2024-11-01 18:36:17 +05:30 · 2024-11-01 18:36:17 +05:30 · 9545b0e5cd
commit 9545b0e5cd
parent b73039b283
4 changed files with 24 additions and 12 deletions
--- a/litellm/integrations/SlackAlerting/slack_alerting.py
+++ b/litellm/integrations/SlackAlerting/slack_alerting.py
@ -550,8 +550,14 @@ class SlackAlerting(CustomBatchLogger):
                    alerting_metadata=alerting_metadata,
                )

-    async def failed_tracking_alert(self, error_message: str):
-        """Raise alert when tracking failed for specific model"""
+    async def failed_tracking_alert(self, error_message: str, failing_model: str):
+        """
+        Raise alert when tracking failed for specific model
+
+        Args:
+            error_message (str): Error message
+            failing_model (str): Model that failed tracking
+        """
        if self.alerting is None or self.alert_types is None:
            # do nothing if alerting is not switched on
            return
@ -560,7 +566,7 @@ class SlackAlerting(CustomBatchLogger):

        _cache: DualCache = self.internal_usage_cache
        message = "Failed Tracking Cost for " + error_message
-        _cache_key = "budget_alerts:failed_tracking:{}".format(message)
+        _cache_key = "budget_alerts:failed_tracking:{}".format(failing_model)
        result = await _cache.async_get_cache(key=_cache_key)
        if result is None:
            await self.send_alert(
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -1,11 +1,11 @@
 model_list:
  - model_name: gpt-4o
    litellm_params:
-      model: gpt-4o
+      model: openai/gpt-5
      api_key: os.environ/OPENAI_API_KEY
      api_base: https://exampleopenaiendpoint-production.up.railway.app/

-litellm_settings:
-  callbacks: ["prometheus"]
-  service_callback: ["prometheus_system"]
-  cache: true
+
+general_settings: 
+    alerting: ["slack"]
+    alerting_threshold: 0.001
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -265,6 +265,7 @@ from litellm.types.llms.anthropic import (
 )
 from litellm.types.llms.openai import HttpxBinaryResponseContent
 from litellm.types.router import RouterGeneralSettings
+from litellm.types.utils import StandardLoggingPayload

 try:
    from litellm._version import version
@ -778,7 +779,6 @@ async def _PROXY_track_cost_callback(
        if kwargs.get("response_cost", None) is not None:
            response_cost = kwargs["response_cost"]
            user_api_key = metadata.get("user_api_key", None)
-
            if kwargs.get("cache_hit", False) is True:
                response_cost = 0.0
                verbose_proxy_logger.info(
@ -838,13 +838,14 @@ async def _PROXY_track_cost_callback(
                    f"Cost tracking failed for model={model}.\nDebug info - {cost_tracking_failure_debug_info}\nAdd custom pricing - https://docs.litellm.ai/docs/proxy/custom_pricing"
                )
    except Exception as e:
-        error_msg = f"error in tracking cost callback - {traceback.format_exc()}"
+        error_msg = f"Error in tracking cost callback - {str(e)}\n Traceback:{traceback.format_exc()}"
        model = kwargs.get("model", "")
        metadata = kwargs.get("litellm_params", {}).get("metadata", {})
        error_msg += f"\n Args to _PROXY_track_cost_callback\n model: {model}\n metadata: {metadata}\n"
        asyncio.create_task(
            proxy_logging_obj.failed_tracking_alert(
                error_message=error_msg,
+                failing_model=model,
            )
        )
        verbose_proxy_logger.debug("error in tracking cost callback - %s", e)
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -667,13 +667,18 @@ class ProxyLogging:
                raise e
        return data

-    async def failed_tracking_alert(self, error_message: str):
+    async def failed_tracking_alert(
+        self,
+        error_message: str,
+        failing_model: str,
+    ):
        if self.alerting is None:
            return

        if self.slack_alerting_instance:
            await self.slack_alerting_instance.failed_tracking_alert(
-                error_message=error_message
+                error_message=error_message,
+                failing_model=failing_model,
            )

    async def budget_alerts(