forked from phoenix/litellm-mirror
(fix) slack alerting - don't spam the failed cost tracking alert for the same model (#6543)
* fix use failing_model as cache key for failed_tracking_alert * fix use standard logging payload for getting response cost * fix kwargs.get("response_cost") * fix getting response cost
This commit is contained in:
parent
b73039b283
commit
9545b0e5cd
4 changed files with 24 additions and 12 deletions
|
@ -550,8 +550,14 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
alerting_metadata=alerting_metadata,
|
alerting_metadata=alerting_metadata,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def failed_tracking_alert(self, error_message: str):
|
async def failed_tracking_alert(self, error_message: str, failing_model: str):
|
||||||
"""Raise alert when tracking failed for specific model"""
|
"""
|
||||||
|
Raise alert when tracking failed for specific model
|
||||||
|
|
||||||
|
Args:
|
||||||
|
error_message (str): Error message
|
||||||
|
failing_model (str): Model that failed tracking
|
||||||
|
"""
|
||||||
if self.alerting is None or self.alert_types is None:
|
if self.alerting is None or self.alert_types is None:
|
||||||
# do nothing if alerting is not switched on
|
# do nothing if alerting is not switched on
|
||||||
return
|
return
|
||||||
|
@ -560,7 +566,7 @@ class SlackAlerting(CustomBatchLogger):
|
||||||
|
|
||||||
_cache: DualCache = self.internal_usage_cache
|
_cache: DualCache = self.internal_usage_cache
|
||||||
message = "Failed Tracking Cost for " + error_message
|
message = "Failed Tracking Cost for " + error_message
|
||||||
_cache_key = "budget_alerts:failed_tracking:{}".format(message)
|
_cache_key = "budget_alerts:failed_tracking:{}".format(failing_model)
|
||||||
result = await _cache.async_get_cache(key=_cache_key)
|
result = await _cache.async_get_cache(key=_cache_key)
|
||||||
if result is None:
|
if result is None:
|
||||||
await self.send_alert(
|
await self.send_alert(
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: gpt-4o
|
- model_name: gpt-4o
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: gpt-4o
|
model: openai/gpt-5
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
|
||||||
litellm_settings:
|
|
||||||
callbacks: ["prometheus"]
|
general_settings:
|
||||||
service_callback: ["prometheus_system"]
|
alerting: ["slack"]
|
||||||
cache: true
|
alerting_threshold: 0.001
|
||||||
|
|
|
@ -265,6 +265,7 @@ from litellm.types.llms.anthropic import (
|
||||||
)
|
)
|
||||||
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
from litellm.types.llms.openai import HttpxBinaryResponseContent
|
||||||
from litellm.types.router import RouterGeneralSettings
|
from litellm.types.router import RouterGeneralSettings
|
||||||
|
from litellm.types.utils import StandardLoggingPayload
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from litellm._version import version
|
from litellm._version import version
|
||||||
|
@ -778,7 +779,6 @@ async def _PROXY_track_cost_callback(
|
||||||
if kwargs.get("response_cost", None) is not None:
|
if kwargs.get("response_cost", None) is not None:
|
||||||
response_cost = kwargs["response_cost"]
|
response_cost = kwargs["response_cost"]
|
||||||
user_api_key = metadata.get("user_api_key", None)
|
user_api_key = metadata.get("user_api_key", None)
|
||||||
|
|
||||||
if kwargs.get("cache_hit", False) is True:
|
if kwargs.get("cache_hit", False) is True:
|
||||||
response_cost = 0.0
|
response_cost = 0.0
|
||||||
verbose_proxy_logger.info(
|
verbose_proxy_logger.info(
|
||||||
|
@ -838,13 +838,14 @@ async def _PROXY_track_cost_callback(
|
||||||
f"Cost tracking failed for model={model}.\nDebug info - {cost_tracking_failure_debug_info}\nAdd custom pricing - https://docs.litellm.ai/docs/proxy/custom_pricing"
|
f"Cost tracking failed for model={model}.\nDebug info - {cost_tracking_failure_debug_info}\nAdd custom pricing - https://docs.litellm.ai/docs/proxy/custom_pricing"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f"error in tracking cost callback - {traceback.format_exc()}"
|
error_msg = f"Error in tracking cost callback - {str(e)}\n Traceback:{traceback.format_exc()}"
|
||||||
model = kwargs.get("model", "")
|
model = kwargs.get("model", "")
|
||||||
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
|
metadata = kwargs.get("litellm_params", {}).get("metadata", {})
|
||||||
error_msg += f"\n Args to _PROXY_track_cost_callback\n model: {model}\n metadata: {metadata}\n"
|
error_msg += f"\n Args to _PROXY_track_cost_callback\n model: {model}\n metadata: {metadata}\n"
|
||||||
asyncio.create_task(
|
asyncio.create_task(
|
||||||
proxy_logging_obj.failed_tracking_alert(
|
proxy_logging_obj.failed_tracking_alert(
|
||||||
error_message=error_msg,
|
error_message=error_msg,
|
||||||
|
failing_model=model,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
verbose_proxy_logger.debug("error in tracking cost callback - %s", e)
|
verbose_proxy_logger.debug("error in tracking cost callback - %s", e)
|
||||||
|
|
|
@ -667,13 +667,18 @@ class ProxyLogging:
|
||||||
raise e
|
raise e
|
||||||
return data
|
return data
|
||||||
|
|
||||||
async def failed_tracking_alert(self, error_message: str):
|
async def failed_tracking_alert(
|
||||||
|
self,
|
||||||
|
error_message: str,
|
||||||
|
failing_model: str,
|
||||||
|
):
|
||||||
if self.alerting is None:
|
if self.alerting is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.slack_alerting_instance:
|
if self.slack_alerting_instance:
|
||||||
await self.slack_alerting_instance.failed_tracking_alert(
|
await self.slack_alerting_instance.failed_tracking_alert(
|
||||||
error_message=error_message
|
error_message=error_message,
|
||||||
|
failing_model=failing_model,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def budget_alerts(
|
async def budget_alerts(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue