diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md
index 6ccf0e44e..a1b9feb40 100644
--- a/docs/my-website/docs/proxy/prometheus.md
+++ b/docs/my-website/docs/proxy/prometheus.md
@@ -58,17 +58,9 @@ http://localhost:4000/metrics
 
 ## 📈 Metrics Tracked 
 
-### Error Metrics
+### Virtual Keys, Teams, Internal Users Metrics
 
-| Metric Name          | Description                          |
-|----------------------|--------------------------------------|
-| `litellm_error_code_metric_total`             | Total number of errors by error code and model |
-
-This metric provides a count of errors encountered, categorized by error code and model. For example:
-
-
-
-### Proxy Requests / Spend Metrics
+Use this for for tracking per [user, key, team, etc.](virtual_keys)
 
 | Metric Name          | Description                          |
 |----------------------|--------------------------------------|
@@ -76,11 +68,32 @@ This metric provides a count of errors encountered, categorized by error code an
 | `litellm_spend_metric`                | Total Spend, per `"user", "key", "model", "team", "end-user"`                 |
 | `litellm_total_tokens`         | input + output tokens per `"user", "key", "model", "team", "end-user"`     |
 
-### Error Monitoring Metrics
+
+
+### LLM API / Provider Metrics
+
+Use this for LLM API Error monitoring and tracking remaining rate limits and token limits
 
 | Metric Name          | Description                          |
-| `litellm_llm_api_failed_requests_metric`   | Number of failed LLM API requests per `"user", "key", "model", "team", "end-user"`    |
-| `litellm_error_code_metric_total`             | Total number of errors by error code and model |
+|----------------------|--------------------------------------|
+ `litellm_deployment_success_responses`              |  Total number of successful LLM API calls for deployment                               |
+| `litellm_deployment_failure_responses`              | Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api                                   |
+| `litellm_deployment_total_requests`                 | Total number of LLM API calls for deployment - success + failure                      |
+| `litellm_remaining_requests_metric`             | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment |
+| `litellm_remaining_tokens`                | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment |
+| `litellm_deployment_state`             | The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage. |
+| `litellm_deployment_latency_per_output_token`       | Latency per output token for deployment                                                          |
+
+## Load Balancing, Fallback, Cooldown Metrics
+
+Use this for tracking [litellm router](../routing) load balancing metrics
+
+| Metric Name          | Description                          |
+|----------------------|--------------------------------------|
+| `litellm_deployment_cooled_down`             |  Number of times a deployment has been cooled down by LiteLLM load balancing logic. exception_status is the status of the exception that caused the deployment to be cooled down |
+| `litellm_deployment_successful_fallbacks`           |  Number of successful fallback requests from primary model -> fallback model        |
+| `litellm_deployment_failed_fallbacks`               | Number of failed fallback requests from primary model -> fallback model            |
+
 
 ### Request Latency Metrics 
 
@@ -90,24 +103,6 @@ This metric provides a count of errors encountered, categorized by error code an
 | `litellm_llm_api_latency_metric`             | latency (seconds) for just the LLM API call - tracked for labels `litellm_call_id`, `model` |
 
 
-
-### LLM API / Provider Metrics
-
-| Metric Name          | Description                          |
-|----------------------|--------------------------------------|
-| `litellm_deployment_state`             | The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage. |
-| `litellm_remaining_requests_metric`             | Track `x-ratelimit-remaining-requests` returned from LLM API Deployment |
-| `litellm_remaining_tokens`                | Track `x-ratelimit-remaining-tokens` return from LLM API Deployment |
- `litellm_deployment_success_responses`              |  Total number of successful LLM API calls for deployment                               |
-| `litellm_deployment_failure_responses`              | Total number of failed LLM API calls for deployment                                   |
-| `litellm_deployment_total_requests`                 | Total number of LLM API calls for deployment - success + failure                      |
-| `litellm_deployment_latency_per_output_token`       | Latency per output token for deployment                                                          |
-| `litellm_deployment_successful_fallbacks`           |  Number of successful fallback requests from primary model -> fallback model        |
-| `litellm_deployment_failed_fallbacks`               | Number of failed fallback requests from primary model -> fallback model            |
-
-
-
-
 ### Budget Metrics
 | Metric Name          | Description                          |
 |----------------------|--------------------------------------|
diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
index ed5035074..0bf7079d0 100644
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@@ -26,8 +26,6 @@ class PrometheusLogger(CustomLogger):
         try:
             from prometheus_client import Counter, Gauge, Histogram
 
-            from litellm.proxy.proxy_server import premium_user
-
             verbose_logger.warning(
                 "🚨🚨🚨 Prometheus Metrics will be moving to LiteLLM Enterprise on September 15th, 2024.\n🚨 Contact us here to get a license https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat \n🚨 Enterprise Pricing: https://www.litellm.ai/#pricing"
             )
@@ -145,83 +143,86 @@ class PrometheusLogger(CustomLogger):
                 labelnames=["error_code", "model"],
             )
 
-            # Litellm-Enterprise Metrics
-            if premium_user is True:
+            ########################################
+            # LLM API Deployment Metrics / analytics
+            ########################################
 
-                ########################################
-                # LLM API Deployment Metrics / analytics
-                ########################################
-
-                # Remaining Rate Limit for model
-                self.litellm_remaining_requests_metric = Gauge(
-                    "litellm_remaining_requests",
-                    "LLM Deployment Analytics - remaining requests for model, returned from LLM API Provider",
-                    labelnames=[
-                        "model_group",
-                        "api_provider",
-                        "api_base",
-                        "litellm_model_name",
-                    ],
-                )
-
-                self.litellm_remaining_tokens_metric = Gauge(
-                    "litellm_remaining_tokens",
-                    "remaining tokens for model, returned from LLM API Provider",
-                    labelnames=[
-                        "model_group",
-                        "api_provider",
-                        "api_base",
-                        "litellm_model_name",
-                    ],
-                )
-                # Get all keys
-                _logged_llm_labels = [
-                    "litellm_model_name",
-                    "model_id",
-                    "api_base",
+            # Remaining Rate Limit for model
+            self.litellm_remaining_requests_metric = Gauge(
+                "litellm_remaining_requests",
+                "LLM Deployment Analytics - remaining requests for model, returned from LLM API Provider",
+                labelnames=[
+                    "model_group",
                     "api_provider",
-                ]
+                    "api_base",
+                    "litellm_model_name",
+                ],
+            )
 
-                # Metric for deployment state
-                self.litellm_deployment_state = Gauge(
-                    "litellm_deployment_state",
-                    "LLM Deployment Analytics - The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage",
-                    labelnames=_logged_llm_labels,
-                )
+            self.litellm_remaining_tokens_metric = Gauge(
+                "litellm_remaining_tokens",
+                "remaining tokens for model, returned from LLM API Provider",
+                labelnames=[
+                    "model_group",
+                    "api_provider",
+                    "api_base",
+                    "litellm_model_name",
+                ],
+            )
+            # Get all keys
+            _logged_llm_labels = [
+                "litellm_model_name",
+                "model_id",
+                "api_base",
+                "api_provider",
+            ]
 
-                self.litellm_deployment_success_responses = Counter(
-                    name="litellm_deployment_success_responses",
-                    documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
-                    labelnames=_logged_llm_labels,
-                )
-                self.litellm_deployment_failure_responses = Counter(
-                    name="litellm_deployment_failure_responses",
-                    documentation="LLM Deployment Analytics - Total number of failed LLM API calls via litellm",
-                    labelnames=_logged_llm_labels,
-                )
-                self.litellm_deployment_total_requests = Counter(
-                    name="litellm_deployment_total_requests",
-                    documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
-                    labelnames=_logged_llm_labels,
-                )
+            # Metric for deployment state
+            self.litellm_deployment_state = Gauge(
+                "litellm_deployment_state",
+                "LLM Deployment Analytics - The state of the deployment: 0 = healthy, 1 = partial outage, 2 = complete outage",
+                labelnames=_logged_llm_labels,
+            )
 
-                # Deployment Latency tracking
-                self.litellm_deployment_latency_per_output_token = Histogram(
-                    name="litellm_deployment_latency_per_output_token",
-                    documentation="LLM Deployment Analytics - Latency per output token",
-                    labelnames=_logged_llm_labels,
-                )
+            self.litellm_deployment_cooled_down = Counter(
+                "litellm_deployment_cooled_down",
+                "LLM Deployment Analytics - Number of times a deployment has been cooled down by LiteLLM load balancing logic. exception_status is the status of the exception that caused the deployment to be cooled down",
+                labelnames=_logged_llm_labels + ["exception_status"],
+            )
 
-                self.litellm_deployment_successful_fallbacks = Counter(
-                    "litellm_deployment_successful_fallbacks",
-                    "LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model",
-                    ["primary_model", "fallback_model"],
-                )
-                self.litellm_deployment_failed_fallbacks = Counter(
-                    "litellm_deployment_failed_fallbacks",
-                    "LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model",
-                    ["primary_model", "fallback_model"],
-                )
+            self.litellm_deployment_success_responses = Counter(
+                name="litellm_deployment_success_responses",
+                documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
+                labelnames=_logged_llm_labels,
+            )
+            self.litellm_deployment_failure_responses = Counter(
+                name="litellm_deployment_failure_responses",
+                documentation="LLM Deployment Analytics - Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api",
+                labelnames=_logged_llm_labels + ["exception_status"],
+            )
+            self.litellm_deployment_total_requests = Counter(
+                name="litellm_deployment_total_requests",
+                documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
+                labelnames=_logged_llm_labels,
+            )
+
+            # Deployment Latency tracking
+            self.litellm_deployment_latency_per_output_token = Histogram(
+                name="litellm_deployment_latency_per_output_token",
+                documentation="LLM Deployment Analytics - Latency per output token",
+                labelnames=_logged_llm_labels,
+            )
+
+            self.litellm_deployment_successful_fallbacks = Counter(
+                "litellm_deployment_successful_fallbacks",
+                "LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model",
+                ["primary_model", "fallback_model"],
+            )
+            self.litellm_deployment_failed_fallbacks = Counter(
+                "litellm_deployment_failed_fallbacks",
+                "LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model",
+                ["primary_model", "fallback_model"],
+            )
 
         except Exception as e:
             print_verbose(f"Got exception on init prometheus client {str(e)}")
@@ -232,7 +233,6 @@ class PrometheusLogger(CustomLogger):
         from litellm.proxy.common_utils.callback_utils import (
             get_model_group_from_litellm_kwargs,
         )
-        from litellm.proxy.proxy_server import premium_user
 
         verbose_logger.debug(
             f"prometheus Logging - Enters success logging function for kwargs {kwargs}"
@@ -375,14 +375,12 @@ class PrometheusLogger(CustomLogger):
         )
 
         # set x-ratelimit headers
-        if premium_user is True:
-            self.set_llm_deployment_success_metrics(
-                kwargs, start_time, end_time, output_tokens
-            )
+        self.set_llm_deployment_success_metrics(
+            kwargs, start_time, end_time, output_tokens
+        )
         pass
 
     async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
-        from litellm.proxy.proxy_server import premium_user
 
         verbose_logger.debug(
             f"prometheus Logging - Enters failure logging function for kwargs {kwargs}"
@@ -404,6 +402,7 @@ class PrometheusLogger(CustomLogger):
         user_api_team_alias = litellm_params.get("metadata", {}).get(
             "user_api_key_team_alias", None
         )
+        exception = kwargs.get("exception", None)
 
         try:
             self.litellm_llm_api_failed_requests_metric.labels(
@@ -441,8 +440,13 @@ class PrometheusLogger(CustomLogger):
             _metadata = _litellm_params.get("metadata", {})
             litellm_model_name = request_kwargs.get("model", None)
             api_base = _metadata.get("api_base", None)
+            if api_base is None:
+                api_base = _litellm_params.get("api_base", None)
             llm_provider = _litellm_params.get("custom_llm_provider", None)
-            model_id = _metadata.get("model_id")
+            _model_info = _metadata.get("model_info") or {}
+            model_id = _model_info.get("id", None)
+            exception = request_kwargs.get("exception", None)
+            exception_status_code: str = str(getattr(exception, "status_code", None))
 
             """
             log these labels
@@ -460,6 +464,7 @@ class PrometheusLogger(CustomLogger):
                 model_id=model_id,
                 api_base=api_base,
                 api_provider=llm_provider,
+                exception_status=exception_status_code,
             ).inc()
 
             self.litellm_deployment_total_requests.labels(
@@ -488,8 +493,11 @@ class PrometheusLogger(CustomLogger):
             litellm_model_name = request_kwargs.get("model", None)
             model_group = _metadata.get("model_group", None)
             api_base = _metadata.get("api_base", None)
+            if api_base is None:
+                api_base = _litellm_params.get("api_base", None)
             llm_provider = _litellm_params.get("custom_llm_provider", None)
-            model_id = _metadata.get("model_id")
+            _model_info = _metadata.get("model_info") or {}
+            model_id = _model_info.get("id", None)
 
             remaining_requests = None
             remaining_tokens = None
@@ -654,6 +662,21 @@ class PrometheusLogger(CustomLogger):
             2, litellm_model_name, model_id, api_base, api_provider
         )
 
+    def increment_deployment_cooled_down(
+        self,
+        litellm_model_name: str,
+        model_id: str,
+        api_base: str,
+        api_provider: str,
+        exception_status: str,
+    ):
+        """
+        increment metric when litellm.Router / load balancing logic places a deployment in cool down
+        """
+        self.litellm_deployment_cooled_down.labels(
+            litellm_model_name, model_id, api_base, api_provider, exception_status
+        ).inc()
+
 
 def safe_get_remaining_budget(
     max_budget: Optional[float], spend: Optional[float]
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 8e8479e1d..b33bc35d1 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -24,5 +24,5 @@ general_settings:
  master_key: sk-1234 
 
 litellm_settings:
-  success_callback: ["datadog"]
+  success_callback: ["prometheus"]
 
diff --git a/litellm/router.py b/litellm/router.py
index eb6bbf040..d31646203 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -53,7 +53,7 @@ from litellm.router_utils.client_initalization_utils import (
     should_initialize_sync_client,
 )
 from litellm.router_utils.cooldown_cache import CooldownCache
-from litellm.router_utils.cooldown_callbacks import router_cooldown_handler
+from litellm.router_utils.cooldown_callbacks import router_cooldown_event_callback
 from litellm.router_utils.cooldown_handlers import (
     DEFAULT_COOLDOWN_TIME_SECONDS,
     _async_get_cooldown_deployments,
diff --git a/litellm/router_utils/cooldown_callbacks.py b/litellm/router_utils/cooldown_callbacks.py
index 661075047..8324d3270 100644
--- a/litellm/router_utils/cooldown_callbacks.py
+++ b/litellm/router_utils/cooldown_callbacks.py
@@ -16,32 +16,39 @@ else:
     LitellmRouter = Any
 
 
-async def router_cooldown_handler(
+async def router_cooldown_event_callback(
     litellm_router_instance: LitellmRouter,
     deployment_id: str,
     exception_status: Union[str, int],
     cooldown_time: float,
 ):
+    """
+    Callback triggered when a deployment is put into cooldown by litellm
+
+    - Updates deploymen state on Prometheus
+    - Increments cooldown metric for deployment on Prometheus
+    """
+    verbose_logger.debug("In router_cooldown_event_callback - updating prometheus")
     _deployment = litellm_router_instance.get_deployment(model_id=deployment_id)
     if _deployment is None:
         verbose_logger.warning(
-            f"in router_cooldown_handler but _deployment is None for deployment_id={deployment_id}. Doing nothing"
+            f"in router_cooldown_event_callback but _deployment is None for deployment_id={deployment_id}. Doing nothing"
         )
         return
     _litellm_params = _deployment["litellm_params"]
     temp_litellm_params = copy.deepcopy(_litellm_params)
     temp_litellm_params = dict(temp_litellm_params)
-    _model_name = _deployment.get("model_name", None)
-    _api_base = litellm.get_api_base(
-        model=_model_name, optional_params=temp_litellm_params
+    _model_name = _deployment.get("model_name", None) or ""
+    _api_base = (
+        litellm.get_api_base(model=_model_name, optional_params=temp_litellm_params)
+        or ""
     )
     model_info = _deployment["model_info"]
     model_id = model_info.id
 
-    litellm_model_name = temp_litellm_params.get("model")
+    litellm_model_name = temp_litellm_params.get("model") or ""
     llm_provider = ""
     try:
-
         _, llm_provider, _, _ = litellm.get_llm_provider(
             model=litellm_model_name,
             custom_llm_provider=temp_litellm_params.get("custom_llm_provider"),
@@ -50,13 +57,29 @@ async def router_cooldown_handler(
         pass
 
     # Trigger cooldown on Prometheus
-    from litellm.litellm_core_utils.litellm_logging import prometheusLogger
+    from litellm.integrations.prometheus import PrometheusLogger
+
+    prometheusLogger = None
+    for callback in litellm.callbacks:
+        if isinstance(callback, PrometheusLogger):
+            prometheusLogger = callback
 
     if prometheusLogger is not None:
-        prometheusLogger.set_deployment_complete_outage(
-            litellm_model_name=_model_name,
-            model_id=model_id,
-            api_base=_api_base,
-            api_provider=llm_provider,
-        )
+
+        if isinstance(prometheusLogger, PrometheusLogger):
+            prometheusLogger.set_deployment_complete_outage(
+                litellm_model_name=_model_name,
+                model_id=model_id,
+                api_base=_api_base,
+                api_provider=llm_provider,
+            )
+
+            prometheusLogger.increment_deployment_cooled_down(
+                litellm_model_name=_model_name,
+                model_id=model_id,
+                api_base=_api_base,
+                api_provider=llm_provider,
+                exception_status=str(exception_status),
+            )
+
     return
diff --git a/litellm/router_utils/cooldown_handlers.py b/litellm/router_utils/cooldown_handlers.py
index e062a2188..54d0694ee 100644
--- a/litellm/router_utils/cooldown_handlers.py
+++ b/litellm/router_utils/cooldown_handlers.py
@@ -11,7 +11,7 @@ from typing import TYPE_CHECKING, Any, List, Optional, Union
 
 import litellm
 from litellm._logging import verbose_router_logger
-from litellm.router_utils.cooldown_callbacks import router_cooldown_handler
+from litellm.router_utils.cooldown_callbacks import router_cooldown_event_callback
 from litellm.utils import get_utc_datetime
 
 from .router_callbacks.track_deployment_metrics import (
@@ -184,7 +184,7 @@ def _set_cooldown_deployments(
 
         # Trigger cooldown callback handler
         asyncio.create_task(
-            router_cooldown_handler(
+            router_cooldown_event_callback(
                 litellm_router_instance=litellm_router_instance,
                 deployment_id=deployment,
                 exception_status=exception_status,