Merge pull request #3427 from BerriAI/litellm_test_alert_size

[Test] - Ensure only 1 slack callback + Size of of all callbacks do not grow
This commit is contained in:
Ishaan Jaff 2024-05-03 16:27:16 -07:00 committed by GitHub
commit 3dd1e8dfe7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 46 additions and 23 deletions

View file

@ -9206,6 +9206,10 @@ def _db_health_readiness_check():
dependencies=[Depends(user_api_key_auth)],
)
async def active_callbacks():
"""
Returns a list of active callbacks on litellm.callbacks, litellm.input_callback, litellm.failure_callback, litellm.success_callback
"""
global proxy_logging_obj
_alerting = str(general_settings.get("alerting"))
# get success callback
success_callback_names = []
@ -9222,12 +9226,33 @@ async def active_callbacks():
+ len(litellm.input_callback)
+ len(litellm.failure_callback)
+ len(litellm.success_callback)
+ len(litellm._async_failure_callback)
+ len(litellm._async_success_callback)
+ len(litellm._async_input_callback)
)
alerting = proxy_logging_obj.alerting
_num_alerting = 0
if alerting and isinstance(alerting, list):
_num_alerting = len(alerting)
return {
"alerting": _alerting,
"success_callbacks": success_callback_names,
"litellm.callbacks": [str(x) for x in litellm.callbacks],
"litellm.input_callback": [str(x) for x in litellm.input_callback],
"litellm.failure_callback": [str(x) for x in litellm.failure_callback],
"litellm.success_callback": [str(x) for x in litellm.success_callback],
"litellm._async_success_callback": [
str(x) for x in litellm._async_success_callback
],
"litellm._async_failure_callback": [
str(x) for x in litellm._async_failure_callback
],
"litellm._async_input_callback": [
str(x) for x in litellm._async_input_callback
],
"num_callbacks": _num_callbacks,
"num_alerting": _num_alerting,
}
@ -9243,17 +9268,6 @@ async def health_readiness():
global general_settings
try:
# get success callback
_num_callbacks = 0
try:
_num_callbacks = (
len(litellm.callbacks)
+ len(litellm.input_callback)
+ len(litellm.failure_callback)
+ len(litellm.success_callback)
)
except:
_num_callbacks = 0
success_callback_names = []
try:
@ -9289,7 +9303,6 @@ async def health_readiness():
"cache": cache_type,
"litellm_version": version,
"success_callbacks": success_callback_names,
"num_callbacks": _num_callbacks,
**db_health_status,
}
else:
@ -9299,7 +9312,6 @@ async def health_readiness():
"cache": cache_type,
"litellm_version": version,
"success_callbacks": success_callback_names,
"num_callbacks": _num_callbacks,
}
except Exception as e:
raise HTTPException(status_code=503, detail=f"Service Unhealthy ({str(e)})")

View file

@ -24,6 +24,12 @@ async def config_update(session, routing_strategy=None):
"router_settings": {
"routing_strategy": routing_strategy,
},
"general_settings": {
"alert_to_webhook_url": {
"llm_exceptions": "https://hooks.slack.com/services/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"
},
"alert_types": ["llm_exceptions", "db_exceptions"],
},
}
async with session.post(url, headers=headers, json=data) as response:
@ -39,15 +45,16 @@ async def config_update(session, routing_strategy=None):
async def get_active_callbacks(session):
url = "http://0.0.0.0:4000/health/readiness"
url = "http://0.0.0.0:4000/active/callbacks"
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer sk-1234",
}
async with session.get(url, headers=headers) as response:
status = response.status
response_text = await response.text()
print("response from /health/readiness")
print("response from /active/callbacks")
print(response_text)
print()
@ -57,8 +64,10 @@ async def get_active_callbacks(session):
_json_response = await response.json()
_num_callbacks = _json_response["num_callbacks"]
_num_alerts = _json_response["num_alerting"]
print("current number of callbacks: ", _num_callbacks)
return _num_callbacks
print("current number of alerts: ", _num_alerts)
return _num_callbacks, _num_alerts
async def get_current_routing_strategy(session):
@ -99,20 +108,20 @@ async def test_check_num_callbacks():
import uuid
async with aiohttp.ClientSession() as session:
num_callbacks_1 = await get_active_callbacks(session=session)
num_callbacks_1, _ = await get_active_callbacks(session=session)
assert (
num_callbacks_1 > 0
) # /health/readiness returns 0 when some calculation goes wrong
await asyncio.sleep(30)
num_callbacks_2 = await get_active_callbacks(session=session)
num_callbacks_2, _ = await get_active_callbacks(session=session)
assert num_callbacks_1 == num_callbacks_2
await asyncio.sleep(30)
num_callbacks_3 = await get_active_callbacks(session=session)
num_callbacks_3, _ = await get_active_callbacks(session=session)
assert num_callbacks_1 == num_callbacks_2 == num_callbacks_3
@ -136,21 +145,23 @@ async def test_check_num_callbacks_on_lowest_latency():
original_routing_strategy = await get_current_routing_strategy(session=session)
await config_update(session=session, routing_strategy="latency-based-routing")
num_callbacks_1 = await get_active_callbacks(session=session)
num_callbacks_1, num_alerts_1 = await get_active_callbacks(session=session)
assert (
num_callbacks_1 > 0
) # /health/readiness returns 0 when some calculation goes wrong
await asyncio.sleep(30)
num_callbacks_2 = await get_active_callbacks(session=session)
num_callbacks_2, num_alerts_2 = await get_active_callbacks(session=session)
assert num_callbacks_1 == num_callbacks_2
await asyncio.sleep(30)
num_callbacks_3 = await get_active_callbacks(session=session)
num_callbacks_3, num_alerts_3 = await get_active_callbacks(session=session)
assert num_callbacks_1 == num_callbacks_2 == num_callbacks_3
assert num_alerts_1 == num_alerts_2 == num_alerts_3
await config_update(session=session, routing_strategy=original_routing_strategy)