mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
feat(slack_alerting.py): enable provider-region based alerting
This commit is contained in:
parent
7694622007
commit
e4629ba65d
3 changed files with 318 additions and 77 deletions
|
@ -576,7 +576,9 @@ async def test_outage_alerting_called(
|
|||
slack_alerting.update_values(llm_router=router)
|
||||
with patch.object(
|
||||
slack_alerting, "outage_alerts", new=AsyncMock()
|
||||
) as mock_send_alert:
|
||||
) as mock_outage_alert, patch.object(
|
||||
slack_alerting, "region_outage_alerts", new=AsyncMock()
|
||||
) as mock_region_alert:
|
||||
try:
|
||||
await router.acompletion(
|
||||
model=model,
|
||||
|
@ -586,7 +588,8 @@ async def test_outage_alerting_called(
|
|||
except Exception as e:
|
||||
pass
|
||||
|
||||
mock_send_alert.assert_called_once()
|
||||
mock_outage_alert.assert_called_once()
|
||||
mock_region_alert.assert_called_once()
|
||||
|
||||
with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
|
||||
for _ in range(6):
|
||||
|
@ -600,6 +603,112 @@ async def test_outage_alerting_called(
|
|||
pass
|
||||
await asyncio.sleep(3)
|
||||
if error_code == 500 or error_code == 408:
|
||||
assert (
|
||||
mock_send_alert.assert_called_once()
|
||||
) # only model alert. region alert should only trigger for 2+ models in same region
|
||||
else:
|
||||
mock_send_alert.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model, api_base, llm_provider, vertex_project, vertex_location",
|
||||
[
|
||||
("gpt-3.5-turbo", None, "openai", None, None),
|
||||
(
|
||||
"azure/gpt-3.5-turbo",
|
||||
"https://openai-gpt-4-test-v-1.openai.azure.com",
|
||||
"azure",
|
||||
None,
|
||||
None,
|
||||
),
|
||||
("gemini-pro", None, "vertex_ai", "hardy-device-38811", "us-central1"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("error_code", [500, 408, 400])
|
||||
@pytest.mark.asyncio
|
||||
async def test_region_outage_alerting_called(
|
||||
model, api_base, llm_provider, vertex_project, vertex_location, error_code
|
||||
):
|
||||
"""
|
||||
If call fails, outage alert is called
|
||||
|
||||
If multiple calls fail, outage alert is sent
|
||||
"""
|
||||
slack_alerting = SlackAlerting(
|
||||
alerting=["webhook"], alert_types=["region_outage_alerts"]
|
||||
)
|
||||
|
||||
litellm.callbacks = [slack_alerting]
|
||||
|
||||
error_to_raise: Optional[APIError] = None
|
||||
|
||||
if error_code == 400:
|
||||
print("RAISING 400 ERROR CODE")
|
||||
error_to_raise = litellm.BadRequestError(
|
||||
message="this is a bad request",
|
||||
model=model,
|
||||
llm_provider=llm_provider,
|
||||
)
|
||||
elif error_code == 408:
|
||||
print("RAISING 408 ERROR CODE")
|
||||
error_to_raise = litellm.Timeout(
|
||||
message="A timeout occurred", model=model, llm_provider=llm_provider
|
||||
)
|
||||
elif error_code == 500:
|
||||
print("RAISING 500 ERROR CODE")
|
||||
error_to_raise = litellm.ServiceUnavailableError(
|
||||
message="API is unavailable",
|
||||
model=model,
|
||||
llm_provider=llm_provider,
|
||||
response=httpx.Response(
|
||||
status_code=503,
|
||||
request=httpx.Request(
|
||||
method="completion",
|
||||
url="https://github.com/BerriAI/litellm",
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": model,
|
||||
"litellm_params": {
|
||||
"model": model,
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": api_base,
|
||||
"vertex_location": vertex_location,
|
||||
"vertex_project": vertex_project,
|
||||
},
|
||||
"model_info": {"id": "1"},
|
||||
},
|
||||
{
|
||||
"model_name": model,
|
||||
"litellm_params": {
|
||||
"model": model,
|
||||
"api_key": os.getenv("AZURE_API_KEY"),
|
||||
"api_base": api_base,
|
||||
"vertex_location": vertex_location,
|
||||
"vertex_project": "vertex_project-2",
|
||||
},
|
||||
"model_info": {"id": "2"},
|
||||
},
|
||||
],
|
||||
num_retries=0,
|
||||
allowed_fails=100,
|
||||
)
|
||||
|
||||
slack_alerting.update_values(llm_router=router)
|
||||
with patch.object(slack_alerting, "send_alert", new=AsyncMock()) as mock_send_alert:
|
||||
for idx in range(6):
|
||||
if idx % 2 == 0:
|
||||
deployment_id = "1"
|
||||
else:
|
||||
deployment_id = "2"
|
||||
await slack_alerting.region_outage_alerts(
|
||||
exception=error_to_raise, deployment_id=deployment_id # type: ignore
|
||||
)
|
||||
if model == "gemini-pro" and (error_code == 500 or error_code == 408):
|
||||
mock_send_alert.assert_called_once()
|
||||
else:
|
||||
mock_send_alert.assert_not_called()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue