diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
index b971a4e5e..f67caa405 100644
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@@ -16,13 +16,9 @@ import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.proxy._types import UserAPIKeyAuth
+from litellm.types.integrations.prometheus import *
 from litellm.types.utils import StandardLoggingPayload
 
-REQUESTED_MODEL = "requested_model"
-EXCEPTION_STATUS = "exception_status"
-EXCEPTION_CLASS = "exception_class"
-EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS]
-
 
 class PrometheusLogger(CustomLogger):
     # Class variables or attributes
@@ -85,6 +81,7 @@ class PrometheusLogger(CustomLogger):
                     "team",
                     "team_alias",
                 ],
+                buckets=LATENCY_BUCKETS,
             )
 
             self.litellm_llm_api_latency_metric = Histogram(
@@ -97,6 +94,7 @@ class PrometheusLogger(CustomLogger):
                     "team",
                     "team_alias",
                 ],
+                buckets=LATENCY_BUCKETS,
             )
 
             # Counter for spend
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 11ccc8561..c71e3fd40 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -1,12 +1,12 @@
 model_list:
-  - model_name: db-openai-endpoint
+  - model_name: fake-openai-endpoint
     litellm_params:
-      model: openai/gpt-4
+      model: openai/fake
       api_key: fake-key
       api_base: https://exampleopenaiendpoint-production.up.railway.app/
 
 litellm_settings:
-  success_callback: ["s3"]
+  callbacks: ["prometheus"]
   turn_off_message_logging: true
   s3_callback_params:
     s3_bucket_name: load-testing-oct   # AWS Bucket Name for S3
diff --git a/litellm/types/integrations/prometheus.py b/litellm/types/integrations/prometheus.py
new file mode 100644
index 000000000..d09ed9670
--- /dev/null
+++ b/litellm/types/integrations/prometheus.py
@@ -0,0 +1,42 @@
+REQUESTED_MODEL = "requested_model"
+EXCEPTION_STATUS = "exception_status"
+EXCEPTION_CLASS = "exception_class"
+EXCEPTION_LABELS = [EXCEPTION_STATUS, EXCEPTION_CLASS]
+LATENCY_BUCKETS = (
+    0.005,
+    0.00625,
+    0.0125,
+    0.025,
+    0.05,
+    0.1,
+    0.5,
+    1.0,
+    1.5,
+    2.0,
+    2.5,
+    3.0,
+    3.5,
+    4.0,
+    4.5,
+    5.0,
+    5.5,
+    6.0,
+    6.5,
+    7.0,
+    7.5,
+    8.0,
+    8.5,
+    9.0,
+    9.5,
+    10.0,
+    15.0,
+    20.0,
+    25.0,
+    30.0,
+    60.0,
+    120.0,
+    180.0,
+    240.0,
+    300.0,
+    float("inf"),
+)
diff --git a/tests/otel_tests/test_prometheus.py b/tests/otel_tests/test_prometheus.py
index 9bda4cd10..0de1c9896 100644
--- a/tests/otel_tests/test_prometheus.py
+++ b/tests/otel_tests/test_prometheus.py
@@ -6,6 +6,12 @@ import pytest
 import aiohttp
 import asyncio
 import uuid
+import os
+import sys
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
 
 
 async def make_bad_chat_completion_request(session, key):
@@ -148,10 +154,52 @@ async def test_proxy_success_metrics():
             in metrics
         )
 
+        # assert (
+        #     'litellm_deployment_latency_per_output_token_count{api_base="https://exampleopenaiendpoint-production.up.railway.app/",api_key_alias="None",api_provider="openai",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="fake",model_id="team-b-model",team="None",team_alias="None"}'
+        #     in metrics
+        # )
+
+        verify_latency_metrics(metrics)
+
+
+def verify_latency_metrics(metrics: str):
+    """
+    Assert that LATENCY_BUCKETS distribution is used for
+    - litellm_request_total_latency_metric_bucket
+    - litellm_llm_api_latency_metric_bucket
+    """
+    from litellm.types.integrations.prometheus import LATENCY_BUCKETS
+    import re
+
+    metric_names = [
+        "litellm_request_total_latency_metric_bucket",
+        "litellm_llm_api_latency_metric_bucket",
+    ]
+
+    for metric_name in metric_names:
+        # Extract all 'le' values for the current metric
+        pattern = rf'{metric_name}{{.*?le="(.*?)".*?}}'
+        le_values = re.findall(pattern, metrics)
+
+        # Convert to set for easier comparison
+        actual_buckets = set(le_values)
+
+        print("actual_buckets", actual_buckets)
+        expected_buckets = []
+        for bucket in LATENCY_BUCKETS:
+            expected_buckets.append(str(bucket))
+
+        # replace inf with +Inf
+        expected_buckets = [
+            bucket.replace("inf", "+Inf") for bucket in expected_buckets
+        ]
+
+        print("expected_buckets", expected_buckets)
+        expected_buckets = set(expected_buckets)
+        # Verify all expected buckets are present
         assert (
-            'litellm_deployment_latency_per_output_token_count{api_base="https://exampleopenaiendpoint-production.up.railway.app/",api_key_alias="None",api_provider="openai",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="fake",model_id="team-b-model",team="None",team_alias="None"}'
-            in metrics
-        )
+            actual_buckets == expected_buckets
+        ), f"Mismatch in {metric_name} buckets. Expected: {expected_buckets}, Got: {actual_buckets}"
 
 
 @pytest.mark.asyncio