From e253c100f418eb49d3faafdec5408ed68b6a9ed7 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 9 Sep 2024 08:26:03 -0700
Subject: [PATCH] support using "callbacks" for prometheus

---
 litellm/proxy/common_utils/callback_utils.py | 10 ++++
 litellm/proxy/proxy_config.yaml              |  3 ++
 litellm/tests/test_prometheus.py             | 57 ++++++++++++++++++++
 3 files changed, 70 insertions(+)

diff --git a/litellm/proxy/common_utils/callback_utils.py b/litellm/proxy/common_utils/callback_utils.py
index fa976690e..4ccf61e23 100644
--- a/litellm/proxy/common_utils/callback_utils.py
+++ b/litellm/proxy/common_utils/callback_utils.py
@@ -228,6 +228,16 @@ def initialize_callbacks_on_proxy(
             litellm.callbacks.extend(imported_list)
         else:
             litellm.callbacks = imported_list  # type: ignore
+
+        if "prometheus" in value:
+            from litellm.proxy.proxy_server import app
+
+            verbose_proxy_logger.debug("Starting Prometheus Metrics on /metrics")
+            from prometheus_client import make_asgi_app
+
+            # Add prometheus asgi middleware to route /metrics requests
+            metrics_app = make_asgi_app()
+            app.mount("/metrics", metrics_app)
     else:
         litellm.callbacks = [
             get_instance_fn(
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 71a356b80..b407b0d7a 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -15,6 +15,9 @@ guardrails:
       mode: "pre_call"  # pre_call, during_call, post_call
       output_parse_pii: True
 
+litellm_settings:
+  callbacks: ["prometheus"]
+
 general_settings: 
  master_key: sk-1234 
  alerting: ["slack"]
diff --git a/litellm/tests/test_prometheus.py b/litellm/tests/test_prometheus.py
index 7574beb9d..1232130cb 100644
--- a/litellm/tests/test_prometheus.py
+++ b/litellm/tests/test_prometheus.py
@@ -79,3 +79,60 @@ async def test_async_prometheus_success_logging():
     assert metrics["litellm_deployment_success_responses_total"] == 1.0
     assert metrics["litellm_deployment_total_requests_total"] == 1.0
     assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0
+
+
+@pytest.mark.asyncio()
+async def test_async_prometheus_success_logging_with_callbacks():
+    run_id = str(uuid.uuid4())
+    litellm.set_verbose = True
+    litellm.callbacks = ["prometheus"]
+
+    response = await litellm.acompletion(
+        model="claude-instant-1.2",
+        messages=[{"role": "user", "content": "what llm are u"}],
+        max_tokens=10,
+        mock_response="hi",
+        temperature=0.2,
+        metadata={
+            "id": run_id,
+            "tags": ["tag1", "tag2"],
+            "user_api_key": "6eb81e014497d89f3cc1aa9da7c2b37bda6b7fea68e4b710d33d94201e68970c",
+            "user_api_key_alias": "ishaans-prometheus-key",
+            "user_api_end_user_max_budget": None,
+            "litellm_api_version": "1.40.19",
+            "global_max_parallel_requests": None,
+            "user_api_key_user_id": "admin",
+            "user_api_key_org_id": None,
+            "user_api_key_team_id": "dbe2f686-a686-4896-864a-4c3924458709",
+            "user_api_key_team_alias": "testing-team",
+        },
+    )
+    print(response)
+    await asyncio.sleep(3)
+
+    # get prometheus logger
+    from litellm.litellm_core_utils.litellm_logging import _in_memory_loggers
+
+    for callback in _in_memory_loggers:
+        if isinstance(callback, PrometheusLogger):
+            test_prometheus_logger = callback
+
+    print("done with success request")
+
+    print(
+        "vars of test_prometheus_logger",
+        vars(test_prometheus_logger.litellm_requests_metric),
+    )
+
+    # Get the metrics
+    metrics = {}
+    for metric in REGISTRY.collect():
+        for sample in metric.samples:
+            metrics[sample.name] = sample.value
+
+    print("metrics from prometheus", metrics)
+    assert metrics["litellm_requests_metric_total"] == 1.0
+    assert metrics["litellm_total_tokens_total"] == 30.0
+    assert metrics["litellm_deployment_success_responses_total"] == 1.0
+    assert metrics["litellm_deployment_total_requests_total"] == 1.0
+    assert metrics["litellm_deployment_latency_per_output_token_bucket"] == 1.0