forked from phoenix/litellm-mirror
Merge pull request #3599 from taralika/patch-1
Ignore 0 failures and 0s latency in daily slack reports
This commit is contained in:
commit
adaafd72be
2 changed files with 59 additions and 6 deletions
|
@ -347,8 +347,9 @@ class SlackAlerting(CustomLogger):
|
||||||
|
|
||||||
all_none = True
|
all_none = True
|
||||||
for val in combined_metrics_values:
|
for val in combined_metrics_values:
|
||||||
if val is not None:
|
if val is not None and val > 0:
|
||||||
all_none = False
|
all_none = False
|
||||||
|
break
|
||||||
|
|
||||||
if all_none:
|
if all_none:
|
||||||
return False
|
return False
|
||||||
|
@ -366,13 +367,14 @@ class SlackAlerting(CustomLogger):
|
||||||
for value in failed_request_values
|
for value in failed_request_values
|
||||||
]
|
]
|
||||||
|
|
||||||
## Get the indices of top 5 keys with the highest numerical values (ignoring None values)
|
## Get the indices of top 5 keys with the highest numerical values (ignoring None and 0 values)
|
||||||
top_5_failed = sorted(
|
top_5_failed = sorted(
|
||||||
range(len(replaced_failed_values)),
|
range(len(replaced_failed_values)),
|
||||||
key=lambda i: replaced_failed_values[i],
|
key=lambda i: replaced_failed_values[i],
|
||||||
reverse=True,
|
reverse=True,
|
||||||
)[:5]
|
)[:5]
|
||||||
|
top_5_failed = [index for index in top_5_failed if replaced_failed_values[index] > 0]
|
||||||
|
|
||||||
# find top 5 slowest
|
# find top 5 slowest
|
||||||
# Replace None values with a placeholder value (-1 in this case)
|
# Replace None values with a placeholder value (-1 in this case)
|
||||||
placeholder_value = 0
|
placeholder_value = 0
|
||||||
|
@ -381,17 +383,20 @@ class SlackAlerting(CustomLogger):
|
||||||
for value in latency_values
|
for value in latency_values
|
||||||
]
|
]
|
||||||
|
|
||||||
# Get the indices of top 5 values with the highest numerical values (ignoring None values)
|
# Get the indices of top 5 values with the highest numerical values (ignoring None and 0 values)
|
||||||
top_5_slowest = sorted(
|
top_5_slowest = sorted(
|
||||||
range(len(replaced_slowest_values)),
|
range(len(replaced_slowest_values)),
|
||||||
key=lambda i: replaced_slowest_values[i],
|
key=lambda i: replaced_slowest_values[i],
|
||||||
reverse=True,
|
reverse=True,
|
||||||
)[:5]
|
)[:5]
|
||||||
|
top_5_slowest = [index for index in top_5_slowest if replaced_slowest_values[index] > 0]
|
||||||
|
|
||||||
# format alert -> return the litellm model name + api base
|
# format alert -> return the litellm model name + api base
|
||||||
message = f"\n\nHere are today's key metrics 📈: \n\n"
|
message = f"\n\nHere are today's key metrics 📈: \n\n"
|
||||||
|
|
||||||
message += "\n\n*❗️ Top 5 Deployments with Most Failed Requests:*\n\n"
|
message += "\n\n*❗️ Top Deployments with Most Failed Requests:*\n\n"
|
||||||
|
if not top_5_failed:
|
||||||
|
message += "\tNone\n"
|
||||||
for i in range(len(top_5_failed)):
|
for i in range(len(top_5_failed)):
|
||||||
key = failed_request_keys[top_5_failed[i]].split(":")[0]
|
key = failed_request_keys[top_5_failed[i]].split(":")[0]
|
||||||
_deployment = router.get_model_info(key)
|
_deployment = router.get_model_info(key)
|
||||||
|
@ -411,7 +416,9 @@ class SlackAlerting(CustomLogger):
|
||||||
value = replaced_failed_values[top_5_failed[i]]
|
value = replaced_failed_values[top_5_failed[i]]
|
||||||
message += f"\t{i+1}. Deployment: `{deployment_name}`, Failed Requests: `{value}`, API Base: `{api_base}`\n"
|
message += f"\t{i+1}. Deployment: `{deployment_name}`, Failed Requests: `{value}`, API Base: `{api_base}`\n"
|
||||||
|
|
||||||
message += "\n\n*😅 Top 5 Slowest Deployments:*\n\n"
|
message += "\n\n*😅 Top Slowest Deployments:*\n\n"
|
||||||
|
if not top_5_slowest:
|
||||||
|
message += "\tNone\n"
|
||||||
for i in range(len(top_5_slowest)):
|
for i in range(len(top_5_slowest)):
|
||||||
key = latency_keys[top_5_slowest[i]].split(":")[0]
|
key = latency_keys[top_5_slowest[i]].split(":")[0]
|
||||||
_deployment = router.get_model_info(key)
|
_deployment = router.get_model_info(key)
|
||||||
|
|
|
@ -359,3 +359,49 @@ async def test_send_llm_exception_to_slack():
|
||||||
)
|
)
|
||||||
|
|
||||||
await asyncio.sleep(3)
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
|
||||||
|
# test models with 0 metrics are ignored
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_send_daily_reports_ignores_zero_values():
|
||||||
|
router = MagicMock()
|
||||||
|
router.get_model_ids.return_value = ['model1', 'model2', 'model3']
|
||||||
|
|
||||||
|
slack_alerting = SlackAlerting(internal_usage_cache=MagicMock())
|
||||||
|
# model1:failed=None, model2:failed=0, model3:failed=10, model1:latency=0; model2:latency=0; model3:latency=None
|
||||||
|
slack_alerting.internal_usage_cache.async_batch_get_cache = AsyncMock(return_value=[None, 0, 10, 0, 0, None])
|
||||||
|
slack_alerting.internal_usage_cache.async_batch_set_cache = AsyncMock()
|
||||||
|
|
||||||
|
router.get_model_info.side_effect = lambda x: {"litellm_params": {"model": x}}
|
||||||
|
|
||||||
|
with patch.object(slack_alerting, 'send_alert', new=AsyncMock()) as mock_send_alert:
|
||||||
|
result = await slack_alerting.send_daily_reports(router)
|
||||||
|
|
||||||
|
# Check that the send_alert method was called
|
||||||
|
mock_send_alert.assert_called_once()
|
||||||
|
message = mock_send_alert.call_args[1]['message']
|
||||||
|
|
||||||
|
# Ensure the message includes only the non-zero, non-None metrics
|
||||||
|
assert "model3" in message
|
||||||
|
assert "model2" not in message
|
||||||
|
assert "model1" not in message
|
||||||
|
|
||||||
|
assert result == True
|
||||||
|
|
||||||
|
|
||||||
|
# test no alert is sent if all None or 0 metrics
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_send_daily_reports_all_zero_or_none():
|
||||||
|
router = MagicMock()
|
||||||
|
router.get_model_ids.return_value = ['model1', 'model2', 'model3']
|
||||||
|
|
||||||
|
slack_alerting = SlackAlerting(internal_usage_cache=MagicMock())
|
||||||
|
slack_alerting.internal_usage_cache.async_batch_get_cache = AsyncMock(return_value=[None, 0, None, 0, None, 0])
|
||||||
|
|
||||||
|
with patch.object(slack_alerting, 'send_alert', new=AsyncMock()) as mock_send_alert:
|
||||||
|
result = await slack_alerting.send_daily_reports(router)
|
||||||
|
|
||||||
|
# Check that the send_alert method was not called
|
||||||
|
mock_send_alert.assert_not_called()
|
||||||
|
|
||||||
|
assert result == False
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue