diff --git a/litellm/integrations/prometheus_services.py b/litellm/integrations/prometheus_services.py index e657732dbb..a36ac9b9c0 100644 --- a/litellm/integrations/prometheus_services.py +++ b/litellm/integrations/prometheus_services.py @@ -81,18 +81,17 @@ class PrometheusServicesLogger: return True return False - def get_metric(self, metric_name): - for metric in self.REGISTRY.collect(): - for sample in metric.samples: - if metric_name == sample.name: - return metric - return None + def _get_metric(self, metric_name): + """ + Helper function to get a metric from the registry by name. + """ + return self.REGISTRY._names_to_collectors.get(metric_name) def create_histogram(self, service: str, type_of_request: str): metric_name = "litellm_{}_{}".format(service, type_of_request) is_registered = self.is_metric_registered(metric_name) if is_registered: - return self.get_metric(metric_name) + return self._get_metric(metric_name) return self.Histogram( metric_name, "Latency for {} service".format(service), @@ -104,7 +103,7 @@ class PrometheusServicesLogger: metric_name = "litellm_{}_{}".format(service, type_of_request) is_registered = self.is_metric_registered(metric_name) if is_registered: - return self.get_metric(metric_name) + return self._get_metric(metric_name) return self.Counter( metric_name, "Total {} for {} service".format(type_of_request, service), diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 5bc044526b..95eff095c2 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -7,4 +7,5 @@ model_list: litellm_settings: callbacks: ["prometheus"] - service_callback: ["prometheus_system"] \ No newline at end of file + service_callback: ["prometheus_system"] + cache: true diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 9eab792ad5..656e97a6c5 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -138,6 +138,12 @@ def safe_deep_copy(data): def log_to_opentelemetry(func): + """ + Decorator to log the duration of a DB related function to ServiceLogger() + + Handles logging DB success/failure to ServiceLogger(), which logs to Prometheus, OTEL, Datadog + """ + @wraps(func) async def wrapper(*args, **kwargs): start_time: datetime = datetime.now() @@ -145,10 +151,8 @@ def log_to_opentelemetry(func): try: result = await func(*args, **kwargs) end_time: datetime = datetime.now() - from litellm.proxy.proxy_server import proxy_logging_obj - # Log to OTEL only if "parent_otel_span" is in kwargs and is not None if "PROXY" not in func.__name__: await proxy_logging_obj.service_logging_obj.async_service_success_hook( service=ServiceTypes.DB, diff --git a/tests/local_testing/test_prometheus_service.py b/tests/local_testing/test_prometheus_service.py index 49dd74839c..c640532a07 100644 --- a/tests/local_testing/test_prometheus_service.py +++ b/tests/local_testing/test_prometheus_service.py @@ -210,3 +210,88 @@ async def test_service_logger_db_monitoring_failure(): assert actual_payload.call_type == "query" assert actual_payload.is_error is True assert actual_payload.error == "Database connection failed" + + +def test_get_metric_existing(): + """Test _get_metric when metric exists. _get_metric should return the metric object""" + pl = PrometheusServicesLogger() + # Create a metric first + hist = pl.create_histogram( + service="test_service", type_of_request="test_type_of_request" + ) + + # Test retrieving existing metric + retrieved_metric = pl._get_metric("litellm_test_service_test_type_of_request") + assert retrieved_metric is hist + assert retrieved_metric is not None + + +def test_get_metric_non_existing(): + """Test _get_metric when metric doesn't exist, returns None""" + pl = PrometheusServicesLogger() + + # Test retrieving non-existent metric + non_existent = pl._get_metric("non_existent_metric") + assert non_existent is None + + +def test_create_histogram_new(): + """Test creating a new histogram""" + pl = PrometheusServicesLogger() + + # Create new histogram + hist = pl.create_histogram( + service="test_service", type_of_request="test_type_of_request" + ) + + assert hist is not None + assert pl._get_metric("litellm_test_service_test_type_of_request") is hist + + +def test_create_histogram_existing(): + """Test creating a histogram that already exists""" + pl = PrometheusServicesLogger() + + # Create initial histogram + hist1 = pl.create_histogram( + service="test_service", type_of_request="test_type_of_request" + ) + + # Create same histogram again + hist2 = pl.create_histogram( + service="test_service", type_of_request="test_type_of_request" + ) + + assert hist2 is hist1 # same object + assert pl._get_metric("litellm_test_service_test_type_of_request") is hist1 + + +def test_create_counter_new(): + """Test creating a new counter""" + pl = PrometheusServicesLogger() + + # Create new counter + counter = pl.create_counter( + service="test_service", type_of_request="test_type_of_request" + ) + + assert counter is not None + assert pl._get_metric("litellm_test_service_test_type_of_request") is counter + + +def test_create_counter_existing(): + """Test creating a counter that already exists""" + pl = PrometheusServicesLogger() + + # Create initial counter + counter1 = pl.create_counter( + service="test_service", type_of_request="test_type_of_request" + ) + + # Create same counter again + counter2 = pl.create_counter( + service="test_service", type_of_request="test_type_of_request" + ) + + assert counter2 is counter1 + assert pl._get_metric("litellm_test_service_test_type_of_request") is counter1