mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
Merge pull request #3427 from BerriAI/litellm_test_alert_size
[Test] - Ensure only 1 slack callback + Size of of all callbacks do not grow
This commit is contained in:
commit
3dd1e8dfe7
2 changed files with 46 additions and 23 deletions
|
@ -9206,6 +9206,10 @@ def _db_health_readiness_check():
|
||||||
dependencies=[Depends(user_api_key_auth)],
|
dependencies=[Depends(user_api_key_auth)],
|
||||||
)
|
)
|
||||||
async def active_callbacks():
|
async def active_callbacks():
|
||||||
|
"""
|
||||||
|
Returns a list of active callbacks on litellm.callbacks, litellm.input_callback, litellm.failure_callback, litellm.success_callback
|
||||||
|
"""
|
||||||
|
global proxy_logging_obj
|
||||||
_alerting = str(general_settings.get("alerting"))
|
_alerting = str(general_settings.get("alerting"))
|
||||||
# get success callback
|
# get success callback
|
||||||
success_callback_names = []
|
success_callback_names = []
|
||||||
|
@ -9222,12 +9226,33 @@ async def active_callbacks():
|
||||||
+ len(litellm.input_callback)
|
+ len(litellm.input_callback)
|
||||||
+ len(litellm.failure_callback)
|
+ len(litellm.failure_callback)
|
||||||
+ len(litellm.success_callback)
|
+ len(litellm.success_callback)
|
||||||
|
+ len(litellm._async_failure_callback)
|
||||||
|
+ len(litellm._async_success_callback)
|
||||||
|
+ len(litellm._async_input_callback)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
alerting = proxy_logging_obj.alerting
|
||||||
|
_num_alerting = 0
|
||||||
|
if alerting and isinstance(alerting, list):
|
||||||
|
_num_alerting = len(alerting)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"alerting": _alerting,
|
"alerting": _alerting,
|
||||||
"success_callbacks": success_callback_names,
|
"litellm.callbacks": [str(x) for x in litellm.callbacks],
|
||||||
|
"litellm.input_callback": [str(x) for x in litellm.input_callback],
|
||||||
|
"litellm.failure_callback": [str(x) for x in litellm.failure_callback],
|
||||||
|
"litellm.success_callback": [str(x) for x in litellm.success_callback],
|
||||||
|
"litellm._async_success_callback": [
|
||||||
|
str(x) for x in litellm._async_success_callback
|
||||||
|
],
|
||||||
|
"litellm._async_failure_callback": [
|
||||||
|
str(x) for x in litellm._async_failure_callback
|
||||||
|
],
|
||||||
|
"litellm._async_input_callback": [
|
||||||
|
str(x) for x in litellm._async_input_callback
|
||||||
|
],
|
||||||
"num_callbacks": _num_callbacks,
|
"num_callbacks": _num_callbacks,
|
||||||
|
"num_alerting": _num_alerting,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -9243,17 +9268,6 @@ async def health_readiness():
|
||||||
global general_settings
|
global general_settings
|
||||||
try:
|
try:
|
||||||
# get success callback
|
# get success callback
|
||||||
_num_callbacks = 0
|
|
||||||
try:
|
|
||||||
_num_callbacks = (
|
|
||||||
len(litellm.callbacks)
|
|
||||||
+ len(litellm.input_callback)
|
|
||||||
+ len(litellm.failure_callback)
|
|
||||||
+ len(litellm.success_callback)
|
|
||||||
)
|
|
||||||
except:
|
|
||||||
_num_callbacks = 0
|
|
||||||
|
|
||||||
success_callback_names = []
|
success_callback_names = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -9289,7 +9303,6 @@ async def health_readiness():
|
||||||
"cache": cache_type,
|
"cache": cache_type,
|
||||||
"litellm_version": version,
|
"litellm_version": version,
|
||||||
"success_callbacks": success_callback_names,
|
"success_callbacks": success_callback_names,
|
||||||
"num_callbacks": _num_callbacks,
|
|
||||||
**db_health_status,
|
**db_health_status,
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
|
@ -9299,7 +9312,6 @@ async def health_readiness():
|
||||||
"cache": cache_type,
|
"cache": cache_type,
|
||||||
"litellm_version": version,
|
"litellm_version": version,
|
||||||
"success_callbacks": success_callback_names,
|
"success_callbacks": success_callback_names,
|
||||||
"num_callbacks": _num_callbacks,
|
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(status_code=503, detail=f"Service Unhealthy ({str(e)})")
|
raise HTTPException(status_code=503, detail=f"Service Unhealthy ({str(e)})")
|
||||||
|
|
|
@ -24,6 +24,12 @@ async def config_update(session, routing_strategy=None):
|
||||||
"router_settings": {
|
"router_settings": {
|
||||||
"routing_strategy": routing_strategy,
|
"routing_strategy": routing_strategy,
|
||||||
},
|
},
|
||||||
|
"general_settings": {
|
||||||
|
"alert_to_webhook_url": {
|
||||||
|
"llm_exceptions": "https://hooks.slack.com/services/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"
|
||||||
|
},
|
||||||
|
"alert_types": ["llm_exceptions", "db_exceptions"],
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
async with session.post(url, headers=headers, json=data) as response:
|
async with session.post(url, headers=headers, json=data) as response:
|
||||||
|
@ -39,15 +45,16 @@ async def config_update(session, routing_strategy=None):
|
||||||
|
|
||||||
|
|
||||||
async def get_active_callbacks(session):
|
async def get_active_callbacks(session):
|
||||||
url = "http://0.0.0.0:4000/health/readiness"
|
url = "http://0.0.0.0:4000/active/callbacks"
|
||||||
headers = {
|
headers = {
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": "Bearer sk-1234",
|
||||||
}
|
}
|
||||||
|
|
||||||
async with session.get(url, headers=headers) as response:
|
async with session.get(url, headers=headers) as response:
|
||||||
status = response.status
|
status = response.status
|
||||||
response_text = await response.text()
|
response_text = await response.text()
|
||||||
print("response from /health/readiness")
|
print("response from /active/callbacks")
|
||||||
print(response_text)
|
print(response_text)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
@ -57,8 +64,10 @@ async def get_active_callbacks(session):
|
||||||
_json_response = await response.json()
|
_json_response = await response.json()
|
||||||
|
|
||||||
_num_callbacks = _json_response["num_callbacks"]
|
_num_callbacks = _json_response["num_callbacks"]
|
||||||
|
_num_alerts = _json_response["num_alerting"]
|
||||||
print("current number of callbacks: ", _num_callbacks)
|
print("current number of callbacks: ", _num_callbacks)
|
||||||
return _num_callbacks
|
print("current number of alerts: ", _num_alerts)
|
||||||
|
return _num_callbacks, _num_alerts
|
||||||
|
|
||||||
|
|
||||||
async def get_current_routing_strategy(session):
|
async def get_current_routing_strategy(session):
|
||||||
|
@ -99,20 +108,20 @@ async def test_check_num_callbacks():
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
num_callbacks_1 = await get_active_callbacks(session=session)
|
num_callbacks_1, _ = await get_active_callbacks(session=session)
|
||||||
assert (
|
assert (
|
||||||
num_callbacks_1 > 0
|
num_callbacks_1 > 0
|
||||||
) # /health/readiness returns 0 when some calculation goes wrong
|
) # /health/readiness returns 0 when some calculation goes wrong
|
||||||
|
|
||||||
await asyncio.sleep(30)
|
await asyncio.sleep(30)
|
||||||
|
|
||||||
num_callbacks_2 = await get_active_callbacks(session=session)
|
num_callbacks_2, _ = await get_active_callbacks(session=session)
|
||||||
|
|
||||||
assert num_callbacks_1 == num_callbacks_2
|
assert num_callbacks_1 == num_callbacks_2
|
||||||
|
|
||||||
await asyncio.sleep(30)
|
await asyncio.sleep(30)
|
||||||
|
|
||||||
num_callbacks_3 = await get_active_callbacks(session=session)
|
num_callbacks_3, _ = await get_active_callbacks(session=session)
|
||||||
|
|
||||||
assert num_callbacks_1 == num_callbacks_2 == num_callbacks_3
|
assert num_callbacks_1 == num_callbacks_2 == num_callbacks_3
|
||||||
|
|
||||||
|
@ -136,21 +145,23 @@ async def test_check_num_callbacks_on_lowest_latency():
|
||||||
original_routing_strategy = await get_current_routing_strategy(session=session)
|
original_routing_strategy = await get_current_routing_strategy(session=session)
|
||||||
await config_update(session=session, routing_strategy="latency-based-routing")
|
await config_update(session=session, routing_strategy="latency-based-routing")
|
||||||
|
|
||||||
num_callbacks_1 = await get_active_callbacks(session=session)
|
num_callbacks_1, num_alerts_1 = await get_active_callbacks(session=session)
|
||||||
assert (
|
assert (
|
||||||
num_callbacks_1 > 0
|
num_callbacks_1 > 0
|
||||||
) # /health/readiness returns 0 when some calculation goes wrong
|
) # /health/readiness returns 0 when some calculation goes wrong
|
||||||
|
|
||||||
await asyncio.sleep(30)
|
await asyncio.sleep(30)
|
||||||
|
|
||||||
num_callbacks_2 = await get_active_callbacks(session=session)
|
num_callbacks_2, num_alerts_2 = await get_active_callbacks(session=session)
|
||||||
|
|
||||||
assert num_callbacks_1 == num_callbacks_2
|
assert num_callbacks_1 == num_callbacks_2
|
||||||
|
|
||||||
await asyncio.sleep(30)
|
await asyncio.sleep(30)
|
||||||
|
|
||||||
num_callbacks_3 = await get_active_callbacks(session=session)
|
num_callbacks_3, num_alerts_3 = await get_active_callbacks(session=session)
|
||||||
|
|
||||||
assert num_callbacks_1 == num_callbacks_2 == num_callbacks_3
|
assert num_callbacks_1 == num_callbacks_2 == num_callbacks_3
|
||||||
|
|
||||||
|
assert num_alerts_1 == num_alerts_2 == num_alerts_3
|
||||||
|
|
||||||
await config_update(session=session, routing_strategy=original_routing_strategy)
|
await config_update(session=session, routing_strategy=original_routing_strategy)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue