feat(prometheus_services.py): track when redis calls fail

This commit is contained in:
Krrish Dholakia 2024-04-13 18:31:35 -07:00
parent 4e81acf2c6
commit 9f42d15713

View file

@ -40,17 +40,13 @@ class PrometheusServicesLogger:
for service in self.services: for service in self.services:
histogram = self.create_histogram(service) histogram = self.create_histogram(service)
self.payload_to_prometheus_map[service] = histogram counter = self.create_counter(service)
self.payload_to_prometheus_map[service] = [histogram, counter]
self.prometheus_to_amount_map: dict = ( self.prometheus_to_amount_map: dict = (
{} {}
) # the field / value in ServiceLoggerPayload the object needs to be incremented by ) # the field / value in ServiceLoggerPayload the object needs to be incremented by
# self.payload_to_prometheus_map["service"] = [self.litellm_service_latency]
# self.prometheus_to_amount_map[self.litellm_service_latency._name] = (
# "duration"
# )
### MOCK TESTING ### ### MOCK TESTING ###
self.mock_testing = mock_testing self.mock_testing = mock_testing
self.mock_testing_success_calls = 0 self.mock_testing_success_calls = 0
@ -85,6 +81,17 @@ class PrometheusServicesLogger:
labelnames=[label], labelnames=[label],
) )
def create_counter(self, label: str):
metric_name = "litellm_{}_requests".format(label)
is_registered = self.is_metric_registered(metric_name)
if is_registered:
return self.get_metric(metric_name)
return self.Counter(
metric_name,
"Total failed requests for {} service".format(label),
labelnames=[label],
)
def observe_histogram( def observe_histogram(
self, self,
histogram, histogram,
@ -98,7 +105,7 @@ class PrometheusServicesLogger:
def increment_counter( def increment_counter(
self, self,
counter, counter,
labels: list, labels: str,
amount: float, amount: float,
): ):
assert isinstance(counter, self.Counter) assert isinstance(counter, self.Counter)
@ -110,16 +117,29 @@ class PrometheusServicesLogger:
self.mock_testing_success_calls += 1 self.mock_testing_success_calls += 1
if payload.service.value in self.payload_to_prometheus_map: if payload.service.value in self.payload_to_prometheus_map:
self.observe_histogram( prom_objects = self.payload_to_prometheus_map[payload.service.value]
histogram=self.payload_to_prometheus_map[payload.service.value], for obj in prom_objects:
labels=payload.service.value, if isinstance(obj, self.Histogram):
amount=payload.duration, self.observe_histogram(
) histogram=obj,
labels=payload.service.value,
amount=payload.duration,
)
def service_failure_hook(self, payload: ServiceLoggerPayload): def service_failure_hook(self, payload: ServiceLoggerPayload):
if self.mock_testing: if self.mock_testing:
self.mock_testing_failure_calls += 1 self.mock_testing_failure_calls += 1
if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects:
if isinstance(obj, self.Counter):
self.increment_counter(
counter=obj,
labels=payload.service.value,
amount=1, # LOG ERROR COUNT TO PROMETHEUS
)
async def async_service_success_hook(self, payload: ServiceLoggerPayload): async def async_service_success_hook(self, payload: ServiceLoggerPayload):
""" """
Log successful call to prometheus Log successful call to prometheus
@ -128,12 +148,25 @@ class PrometheusServicesLogger:
self.mock_testing_success_calls += 1 self.mock_testing_success_calls += 1
if payload.service.value in self.payload_to_prometheus_map: if payload.service.value in self.payload_to_prometheus_map:
self.observe_histogram( prom_objects = self.payload_to_prometheus_map[payload.service.value]
histogram=self.payload_to_prometheus_map[payload.service.value], for obj in prom_objects:
labels=payload.service.value, if isinstance(obj, self.Histogram):
amount=payload.duration, self.observe_histogram(
) histogram=obj,
labels=payload.service.value,
amount=payload.duration,
)
async def async_service_failure_hook(self, payload: ServiceLoggerPayload): async def async_service_failure_hook(self, payload: ServiceLoggerPayload):
if self.mock_testing: if self.mock_testing:
self.mock_testing_failure_calls += 1 self.mock_testing_failure_calls += 1
if payload.service.value in self.payload_to_prometheus_map:
prom_objects = self.payload_to_prometheus_map[payload.service.value]
for obj in prom_objects:
if isinstance(obj, self.Counter):
self.increment_counter(
counter=obj,
labels=payload.service.value,
amount=1, # LOG ERROR COUNT TO PROMETHEUS
)