feat(prometheus_services.py): emit proxy latency for successful llm api requests

uses prometheus histogram for this
2025-04-26 11:14:04 +00:00 · 2024-04-18 16:04:35 -07:00 · 2024-04-18 16:04:35 -07:00 · 7f5bcf38b7
commit 7f5bcf38b7
parent 1b98503be3
6 changed files with 87 additions and 20 deletions
--- a/litellm/_service_logger.py
+++ b/litellm/_service_logger.py
@ -1,9 +1,12 @@
-import litellm
+import litellm, traceback
+from litellm.proxy._types import UserAPIKeyAuth
 from .types.services import ServiceTypes, ServiceLoggerPayload
 from .integrations.prometheus_services import PrometheusServicesLogger
+from .integrations.custom_logger import CustomLogger
+from datetime import timedelta


-class ServiceLogging:
+class ServiceLogging(CustomLogger):
    """
    Separate class used for monitoring health of litellm-adjacent services (redis/postgres).
    """
@ -14,7 +17,6 @@ class ServiceLogging:
        self.mock_testing_async_success_hook = 0
        self.mock_testing_sync_failure_hook = 0
        self.mock_testing_async_failure_hook = 0
-
        if "prometheus_system" in litellm.service_callback:
            self.prometheusServicesLogger = PrometheusServicesLogger()

@ -34,7 +36,9 @@ class ServiceLogging:
        if self.mock_testing:
            self.mock_testing_sync_failure_hook += 1

-    async def async_service_success_hook(self, service: ServiceTypes, duration: float):
+    async def async_service_success_hook(
+        self, service: ServiceTypes, duration: float, call_type: str
+    ):
        """
        - For counting if the redis, postgres call is successful
        """
@ -42,7 +46,11 @@ class ServiceLogging:
            self.mock_testing_async_success_hook += 1

        payload = ServiceLoggerPayload(
-            is_error=False, error=None, service=service, duration=duration
+            is_error=False,
+            error=None,
+            service=service,
+            duration=duration,
+            call_type=call_type,
        )
        for callback in litellm.service_callback:
            if callback == "prometheus_system":
@ -51,7 +59,7 @@ class ServiceLogging:
                )

    async def async_service_failure_hook(
-        self, service: ServiceTypes, duration: float, error: Exception
+        self, service: ServiceTypes, duration: float, error: Exception, call_type: str
    ):
        """
        - For counting if the redis, postgres call is unsuccessful
@ -60,7 +68,11 @@ class ServiceLogging:
            self.mock_testing_async_failure_hook += 1

        payload = ServiceLoggerPayload(
-            is_error=True, error=str(error), service=service, duration=duration
+            is_error=True,
+            error=str(error),
+            service=service,
+            duration=duration,
+            call_type=call_type,
        )
        for callback in litellm.service_callback:
            if callback == "prometheus_system":
@ -69,3 +81,37 @@ class ServiceLogging:
                await self.prometheusServicesLogger.async_service_failure_hook(
                    payload=payload
                )
+
+    async def async_post_call_failure_hook(
+        self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
+    ):
+        """
+        Hook to track failed litellm-service calls
+        """
+        return await super().async_post_call_failure_hook(
+            original_exception, user_api_key_dict
+        )
+
+    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        """
+        Hook to track latency for litellm proxy llm api calls
+        """
+        try:
+            _duration = end_time - start_time
+            if isinstance(_duration, timedelta):
+                _duration = _duration.total_seconds()
+            elif isinstance(_duration, float):
+                pass
+            else:
+                raise Exception(
+                    "Duration={} is not a float or timedelta object. type={}".format(
+                        _duration, type(_duration)
+                    )
+                )  # invalid _duration value
+            await self.async_service_success_hook(
+                service=ServiceTypes.LITELLM,
+                duration=_duration,
+                call_type=kwargs["call_type"],
+            )
+        except Exception as e:
+            raise e