(fix) slack alerting - don't spam the failed cost tracking alert for the same model (#6543)

* fix use failing_model as cache key for failed_tracking_alert

* fix use standard logging payload for getting response cost

* fix  kwargs.get("response_cost")

* fix getting response cost
This commit is contained in:
Ishaan Jaff 2024-11-01 18:36:17 +05:30 committed by GitHub
parent b73039b283
commit 9545b0e5cd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 24 additions and 12 deletions

View file

@ -550,8 +550,14 @@ class SlackAlerting(CustomBatchLogger):
alerting_metadata=alerting_metadata, alerting_metadata=alerting_metadata,
) )
async def failed_tracking_alert(self, error_message: str): async def failed_tracking_alert(self, error_message: str, failing_model: str):
"""Raise alert when tracking failed for specific model""" """
Raise alert when tracking failed for specific model
Args:
error_message (str): Error message
failing_model (str): Model that failed tracking
"""
if self.alerting is None or self.alert_types is None: if self.alerting is None or self.alert_types is None:
# do nothing if alerting is not switched on # do nothing if alerting is not switched on
return return
@ -560,7 +566,7 @@ class SlackAlerting(CustomBatchLogger):
_cache: DualCache = self.internal_usage_cache _cache: DualCache = self.internal_usage_cache
message = "Failed Tracking Cost for " + error_message message = "Failed Tracking Cost for " + error_message
_cache_key = "budget_alerts:failed_tracking:{}".format(message) _cache_key = "budget_alerts:failed_tracking:{}".format(failing_model)
result = await _cache.async_get_cache(key=_cache_key) result = await _cache.async_get_cache(key=_cache_key)
if result is None: if result is None:
await self.send_alert( await self.send_alert(

View file

@ -1,11 +1,11 @@
model_list: model_list:
- model_name: gpt-4o - model_name: gpt-4o
litellm_params: litellm_params:
model: gpt-4o model: openai/gpt-5
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
api_base: https://exampleopenaiendpoint-production.up.railway.app/ api_base: https://exampleopenaiendpoint-production.up.railway.app/
litellm_settings:
callbacks: ["prometheus"] general_settings:
service_callback: ["prometheus_system"] alerting: ["slack"]
cache: true alerting_threshold: 0.001

View file

@ -265,6 +265,7 @@ from litellm.types.llms.anthropic import (
) )
from litellm.types.llms.openai import HttpxBinaryResponseContent from litellm.types.llms.openai import HttpxBinaryResponseContent
from litellm.types.router import RouterGeneralSettings from litellm.types.router import RouterGeneralSettings
from litellm.types.utils import StandardLoggingPayload
try: try:
from litellm._version import version from litellm._version import version
@ -778,7 +779,6 @@ async def _PROXY_track_cost_callback(
if kwargs.get("response_cost", None) is not None: if kwargs.get("response_cost", None) is not None:
response_cost = kwargs["response_cost"] response_cost = kwargs["response_cost"]
user_api_key = metadata.get("user_api_key", None) user_api_key = metadata.get("user_api_key", None)
if kwargs.get("cache_hit", False) is True: if kwargs.get("cache_hit", False) is True:
response_cost = 0.0 response_cost = 0.0
verbose_proxy_logger.info( verbose_proxy_logger.info(
@ -838,13 +838,14 @@ async def _PROXY_track_cost_callback(
f"Cost tracking failed for model={model}.\nDebug info - {cost_tracking_failure_debug_info}\nAdd custom pricing - https://docs.litellm.ai/docs/proxy/custom_pricing" f"Cost tracking failed for model={model}.\nDebug info - {cost_tracking_failure_debug_info}\nAdd custom pricing - https://docs.litellm.ai/docs/proxy/custom_pricing"
) )
except Exception as e: except Exception as e:
error_msg = f"error in tracking cost callback - {traceback.format_exc()}" error_msg = f"Error in tracking cost callback - {str(e)}\n Traceback:{traceback.format_exc()}"
model = kwargs.get("model", "") model = kwargs.get("model", "")
metadata = kwargs.get("litellm_params", {}).get("metadata", {}) metadata = kwargs.get("litellm_params", {}).get("metadata", {})
error_msg += f"\n Args to _PROXY_track_cost_callback\n model: {model}\n metadata: {metadata}\n" error_msg += f"\n Args to _PROXY_track_cost_callback\n model: {model}\n metadata: {metadata}\n"
asyncio.create_task( asyncio.create_task(
proxy_logging_obj.failed_tracking_alert( proxy_logging_obj.failed_tracking_alert(
error_message=error_msg, error_message=error_msg,
failing_model=model,
) )
) )
verbose_proxy_logger.debug("error in tracking cost callback - %s", e) verbose_proxy_logger.debug("error in tracking cost callback - %s", e)

View file

@ -667,13 +667,18 @@ class ProxyLogging:
raise e raise e
return data return data
async def failed_tracking_alert(self, error_message: str): async def failed_tracking_alert(
self,
error_message: str,
failing_model: str,
):
if self.alerting is None: if self.alerting is None:
return return
if self.slack_alerting_instance: if self.slack_alerting_instance:
await self.slack_alerting_instance.failed_tracking_alert( await self.slack_alerting_instance.failed_tracking_alert(
error_message=error_message error_message=error_message,
failing_model=failing_model,
) )
async def budget_alerts( async def budget_alerts(