feat(slack_alerting.py): enable provider-region based alerting

This commit is contained in:
Krrish Dholakia 2024-05-25 16:55:46 -07:00
parent 7694622007
commit e4629ba65d
3 changed files with 318 additions and 77 deletions

View file

@ -576,7 +576,9 @@ async def test_outage_alerting_called(
slack_alerting.update_values(llm_router=router)
with patch.object(
slack_alerting, "outage_alerts", new=AsyncMock()
) as mock_send_alert:
) as mock_outage_alert, patch.object(
slack_alerting, "region_outage_alerts", new=AsyncMock()
) as mock_region_alert:
try:
await router.acompletion(
model=model,
@ -586,7 +588,8 @@ async def test_outage_alerting_called(
except Exception as e:
pass
mock_send_alert.assert_called_once()
mock_outage_alert.assert_called_once()
mock_region_alert.assert_called_once()
with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
for _ in range(6):
@ -600,6 +603,112 @@ async def test_outage_alerting_called(
pass
await asyncio.sleep(3)
if error_code == 500 or error_code == 408:
assert (
mock_send_alert.assert_called_once()
) # only model alert. region alert should only trigger for 2+ models in same region
else:
mock_send_alert.assert_not_called()
@pytest.mark.parametrize(
"model, api_base, llm_provider, vertex_project, vertex_location",
[
("gpt-3.5-turbo", None, "openai", None, None),
(
"azure/gpt-3.5-turbo",
"https://openai-gpt-4-test-v-1.openai.azure.com",
"azure",
None,
None,
),
("gemini-pro", None, "vertex_ai", "hardy-device-38811", "us-central1"),
],
)
@pytest.mark.parametrize("error_code", [500, 408, 400])
@pytest.mark.asyncio
async def test_region_outage_alerting_called(
model, api_base, llm_provider, vertex_project, vertex_location, error_code
):
"""
If call fails, outage alert is called
If multiple calls fail, outage alert is sent
"""
slack_alerting = SlackAlerting(
alerting=["webhook"], alert_types=["region_outage_alerts"]
)
litellm.callbacks = [slack_alerting]
error_to_raise: Optional[APIError] = None
if error_code == 400:
print("RAISING 400 ERROR CODE")
error_to_raise = litellm.BadRequestError(
message="this is a bad request",
model=model,
llm_provider=llm_provider,
)
elif error_code == 408:
print("RAISING 408 ERROR CODE")
error_to_raise = litellm.Timeout(
message="A timeout occurred", model=model, llm_provider=llm_provider
)
elif error_code == 500:
print("RAISING 500 ERROR CODE")
error_to_raise = litellm.ServiceUnavailableError(
message="API is unavailable",
model=model,
llm_provider=llm_provider,
response=httpx.Response(
status_code=503,
request=httpx.Request(
method="completion",
url="https://github.com/BerriAI/litellm",
),
),
)
router = Router(
model_list=[
{
"model_name": model,
"litellm_params": {
"model": model,
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": api_base,
"vertex_location": vertex_location,
"vertex_project": vertex_project,
},
"model_info": {"id": "1"},
},
{
"model_name": model,
"litellm_params": {
"model": model,
"api_key": os.getenv("AZURE_API_KEY"),
"api_base": api_base,
"vertex_location": vertex_location,
"vertex_project": "vertex_project-2",
},
"model_info": {"id": "2"},
},
],
num_retries=0,
allowed_fails=100,
)
slack_alerting.update_values(llm_router=router)
with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
for idx in range(6):
if idx % 2 == 0:
deployment_id = "1"
else:
deployment_id = "2"
await slack_alerting.region_outage_alerts(
exception=error_to_raise, deployment_id=deployment_id # type: ignore
)
if model == "gemini-pro" and (error_code == 500 or error_code == 408):
mock_send_alert.assert_called_once()
else:
mock_send_alert.assert_not_called()