feat(prometheus_services.py): emit proxy latency for successful llm api requests

uses prometheus histogram for this
This commit is contained in:
Krrish Dholakia 2024-04-18 16:04:35 -07:00
parent 1b98503be3
commit 7f5bcf38b7
6 changed files with 87 additions and 20 deletions

View file

@ -1,9 +1,12 @@
import litellm
import litellm, traceback
from litellm.proxy._types import UserAPIKeyAuth
from .types.services import ServiceTypes, ServiceLoggerPayload
from .integrations.prometheus_services import PrometheusServicesLogger
from .integrations.custom_logger import CustomLogger
from datetime import timedelta
class ServiceLogging:
class ServiceLogging(CustomLogger):
"""
Separate class used for monitoring health of litellm-adjacent services (redis/postgres).
"""
@ -14,7 +17,6 @@ class ServiceLogging:
self.mock_testing_async_success_hook = 0
self.mock_testing_sync_failure_hook = 0
self.mock_testing_async_failure_hook = 0
if "prometheus_system" in litellm.service_callback:
self.prometheusServicesLogger = PrometheusServicesLogger()
@ -34,7 +36,9 @@ class ServiceLogging:
if self.mock_testing:
self.mock_testing_sync_failure_hook += 1
async def async_service_success_hook(self, service: ServiceTypes, duration: float):
async def async_service_success_hook(
self, service: ServiceTypes, duration: float, call_type: str
):
"""
- For counting if the redis, postgres call is successful
"""
@ -42,7 +46,11 @@ class ServiceLogging:
self.mock_testing_async_success_hook += 1
payload = ServiceLoggerPayload(
is_error=False, error=None, service=service, duration=duration
is_error=False,
error=None,
service=service,
duration=duration,
call_type=call_type,
)
for callback in litellm.service_callback:
if callback == "prometheus_system":
@ -51,7 +59,7 @@ class ServiceLogging:
)
async def async_service_failure_hook(
self, service: ServiceTypes, duration: float, error: Exception
self, service: ServiceTypes, duration: float, error: Exception, call_type: str
):
"""
- For counting if the redis, postgres call is unsuccessful
@ -60,7 +68,11 @@ class ServiceLogging:
self.mock_testing_async_failure_hook += 1
payload = ServiceLoggerPayload(
is_error=True, error=str(error), service=service, duration=duration
is_error=True,
error=str(error),
service=service,
duration=duration,
call_type=call_type,
)
for callback in litellm.service_callback:
if callback == "prometheus_system":
@ -69,3 +81,37 @@ class ServiceLogging:
await self.prometheusServicesLogger.async_service_failure_hook(
payload=payload
)
async def async_post_call_failure_hook(
self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
):
"""
Hook to track failed litellm-service calls
"""
return await super().async_post_call_failure_hook(
original_exception, user_api_key_dict
)
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
"""
Hook to track latency for litellm proxy llm api calls
"""
try:
_duration = end_time - start_time
if isinstance(_duration, timedelta):
_duration = _duration.total_seconds()
elif isinstance(_duration, float):
pass
else:
raise Exception(
"Duration={} is not a float or timedelta object. type={}".format(
_duration, type(_duration)
)
) # invalid _duration value
await self.async_service_success_hook(
service=ServiceTypes.LITELLM,
duration=_duration,
call_type=kwargs["call_type"],
)
except Exception as e:
raise e