diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 2ad3af7b9b..55202fd160 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -9206,6 +9206,10 @@ def _db_health_readiness_check(): dependencies=[Depends(user_api_key_auth)], ) async def active_callbacks(): + """ + Returns a list of active callbacks on litellm.callbacks, litellm.input_callback, litellm.failure_callback, litellm.success_callback + """ + global proxy_logging_obj _alerting = str(general_settings.get("alerting")) # get success callback success_callback_names = [] @@ -9222,12 +9226,33 @@ async def active_callbacks(): + len(litellm.input_callback) + len(litellm.failure_callback) + len(litellm.success_callback) + + len(litellm._async_failure_callback) + + len(litellm._async_success_callback) + + len(litellm._async_input_callback) ) + alerting = proxy_logging_obj.alerting + _num_alerting = 0 + if alerting and isinstance(alerting, list): + _num_alerting = len(alerting) + return { "alerting": _alerting, - "success_callbacks": success_callback_names, + "litellm.callbacks": [str(x) for x in litellm.callbacks], + "litellm.input_callback": [str(x) for x in litellm.input_callback], + "litellm.failure_callback": [str(x) for x in litellm.failure_callback], + "litellm.success_callback": [str(x) for x in litellm.success_callback], + "litellm._async_success_callback": [ + str(x) for x in litellm._async_success_callback + ], + "litellm._async_failure_callback": [ + str(x) for x in litellm._async_failure_callback + ], + "litellm._async_input_callback": [ + str(x) for x in litellm._async_input_callback + ], "num_callbacks": _num_callbacks, + "num_alerting": _num_alerting, } @@ -9243,17 +9268,6 @@ async def health_readiness(): global general_settings try: # get success callback - _num_callbacks = 0 - try: - _num_callbacks = ( - len(litellm.callbacks) - + len(litellm.input_callback) - + len(litellm.failure_callback) - + len(litellm.success_callback) - ) - except: - _num_callbacks = 0 - success_callback_names = [] try: @@ -9289,7 +9303,6 @@ async def health_readiness(): "cache": cache_type, "litellm_version": version, "success_callbacks": success_callback_names, - "num_callbacks": _num_callbacks, **db_health_status, } else: @@ -9299,7 +9312,6 @@ async def health_readiness(): "cache": cache_type, "litellm_version": version, "success_callbacks": success_callback_names, - "num_callbacks": _num_callbacks, } except Exception as e: raise HTTPException(status_code=503, detail=f"Service Unhealthy ({str(e)})") diff --git a/tests/test_callbacks_on_proxy.py b/tests/test_callbacks_on_proxy.py index a4d31587de..c10b18ed1d 100644 --- a/tests/test_callbacks_on_proxy.py +++ b/tests/test_callbacks_on_proxy.py @@ -24,6 +24,12 @@ async def config_update(session, routing_strategy=None): "router_settings": { "routing_strategy": routing_strategy, }, + "general_settings": { + "alert_to_webhook_url": { + "llm_exceptions": "https://hooks.slack.com/services/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" + }, + "alert_types": ["llm_exceptions", "db_exceptions"], + }, } async with session.post(url, headers=headers, json=data) as response: @@ -39,15 +45,16 @@ async def config_update(session, routing_strategy=None): async def get_active_callbacks(session): - url = "http://0.0.0.0:4000/health/readiness" + url = "http://0.0.0.0:4000/active/callbacks" headers = { "Content-Type": "application/json", + "Authorization": "Bearer sk-1234", } async with session.get(url, headers=headers) as response: status = response.status response_text = await response.text() - print("response from /health/readiness") + print("response from /active/callbacks") print(response_text) print() @@ -57,8 +64,10 @@ async def get_active_callbacks(session): _json_response = await response.json() _num_callbacks = _json_response["num_callbacks"] + _num_alerts = _json_response["num_alerting"] print("current number of callbacks: ", _num_callbacks) - return _num_callbacks + print("current number of alerts: ", _num_alerts) + return _num_callbacks, _num_alerts async def get_current_routing_strategy(session): @@ -99,20 +108,20 @@ async def test_check_num_callbacks(): import uuid async with aiohttp.ClientSession() as session: - num_callbacks_1 = await get_active_callbacks(session=session) + num_callbacks_1, _ = await get_active_callbacks(session=session) assert ( num_callbacks_1 > 0 ) # /health/readiness returns 0 when some calculation goes wrong await asyncio.sleep(30) - num_callbacks_2 = await get_active_callbacks(session=session) + num_callbacks_2, _ = await get_active_callbacks(session=session) assert num_callbacks_1 == num_callbacks_2 await asyncio.sleep(30) - num_callbacks_3 = await get_active_callbacks(session=session) + num_callbacks_3, _ = await get_active_callbacks(session=session) assert num_callbacks_1 == num_callbacks_2 == num_callbacks_3 @@ -136,21 +145,23 @@ async def test_check_num_callbacks_on_lowest_latency(): original_routing_strategy = await get_current_routing_strategy(session=session) await config_update(session=session, routing_strategy="latency-based-routing") - num_callbacks_1 = await get_active_callbacks(session=session) + num_callbacks_1, num_alerts_1 = await get_active_callbacks(session=session) assert ( num_callbacks_1 > 0 ) # /health/readiness returns 0 when some calculation goes wrong await asyncio.sleep(30) - num_callbacks_2 = await get_active_callbacks(session=session) + num_callbacks_2, num_alerts_2 = await get_active_callbacks(session=session) assert num_callbacks_1 == num_callbacks_2 await asyncio.sleep(30) - num_callbacks_3 = await get_active_callbacks(session=session) + num_callbacks_3, num_alerts_3 = await get_active_callbacks(session=session) assert num_callbacks_1 == num_callbacks_2 == num_callbacks_3 + assert num_alerts_1 == num_alerts_2 == num_alerts_3 + await config_update(session=session, routing_strategy=original_routing_strategy)