diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
index d6e72113e..22c23f8cf 100644
--- a/litellm/integrations/custom_logger.py
+++ b/litellm/integrations/custom_logger.py
@@ -67,10 +67,14 @@ class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callbac
     ):
         pass
 
-    async def log_success_fallback_event(self, original_model_group: str, kwargs: dict):
+    async def log_success_fallback_event(
+        self, original_model_group: str, kwargs: dict, original_exception: Exception
+    ):
         pass
 
-    async def log_failure_fallback_event(self, original_model_group: str, kwargs: dict):
+    async def log_failure_fallback_event(
+        self, original_model_group: str, kwargs: dict, original_exception: Exception
+    ):
         pass
 
     #### ADAPTERS #### Allow calling 100+ LLMs in custom format - https://github.com/BerriAI/litellm/pulls
diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
index 2f066949c..c9b9e2528 100644
--- a/litellm/integrations/prometheus.py
+++ b/litellm/integrations/prometheus.py
@@ -80,6 +80,10 @@ class PrometheusLogger(CustomLogger):
                 "Total latency (seconds) for a request to LiteLLM",
                 labelnames=[
                     "model",
+                    "hashed_api_key",
+                    "api_key_alias",
+                    "team",
+                    "team_alias",
                 ],
             )
 
@@ -88,6 +92,10 @@ class PrometheusLogger(CustomLogger):
                 "Total latency (seconds) for a models LLM API call",
                 labelnames=[
                     "model",
+                    "hashed_api_key",
+                    "api_key_alias",
+                    "team",
+                    "team_alias",
                 ],
             )
 
@@ -216,6 +224,12 @@ class PrometheusLogger(CustomLogger):
                 "api_base",
                 "api_provider",
             ]
+            team_and_key_labels = [
+                "hashed_api_key",
+                "api_key_alias",
+                "team",
+                "team_alias",
+            ]
 
             # Metric for deployment state
             self.litellm_deployment_state = Gauge(
@@ -233,35 +247,42 @@ class PrometheusLogger(CustomLogger):
             self.litellm_deployment_success_responses = Counter(
                 name="litellm_deployment_success_responses",
                 documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
-                labelnames=[REQUESTED_MODEL] + _logged_llm_labels,
+                labelnames=[REQUESTED_MODEL] + _logged_llm_labels + team_and_key_labels,
             )
             self.litellm_deployment_failure_responses = Counter(
                 name="litellm_deployment_failure_responses",
                 documentation="LLM Deployment Analytics - Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api",
-                labelnames=[REQUESTED_MODEL] + _logged_llm_labels + EXCEPTION_LABELS,
+                labelnames=[REQUESTED_MODEL]
+                + _logged_llm_labels
+                + EXCEPTION_LABELS
+                + team_and_key_labels,
             )
             self.litellm_deployment_total_requests = Counter(
                 name="litellm_deployment_total_requests",
                 documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
-                labelnames=[REQUESTED_MODEL] + _logged_llm_labels,
+                labelnames=[REQUESTED_MODEL] + _logged_llm_labels + team_and_key_labels,
             )
 
             # Deployment Latency tracking
             self.litellm_deployment_latency_per_output_token = Histogram(
                 name="litellm_deployment_latency_per_output_token",
                 documentation="LLM Deployment Analytics - Latency per output token",
-                labelnames=_logged_llm_labels,
+                labelnames=_logged_llm_labels + team_and_key_labels,
             )
 
             self.litellm_deployment_successful_fallbacks = Counter(
                 "litellm_deployment_successful_fallbacks",
                 "LLM Deployment Analytics - Number of successful fallback requests from primary model -> fallback model",
-                ["primary_model", "fallback_model"],
+                [REQUESTED_MODEL, "fallback_model"]
+                + team_and_key_labels
+                + EXCEPTION_LABELS,
             )
             self.litellm_deployment_failed_fallbacks = Counter(
                 "litellm_deployment_failed_fallbacks",
                 "LLM Deployment Analytics - Number of failed fallback requests from primary model -> fallback model",
-                ["primary_model", "fallback_model"],
+                [REQUESTED_MODEL, "fallback_model"]
+                + team_and_key_labels
+                + EXCEPTION_LABELS,
             )
 
             self.litellm_llm_api_failed_requests_metric = Counter(
@@ -448,14 +469,22 @@ class PrometheusLogger(CustomLogger):
                 kwargs.get("end_time") - api_call_start_time
             )
             api_call_total_time_seconds = api_call_total_time.total_seconds()
-            self.litellm_llm_api_latency_metric.labels(model).observe(
-                api_call_total_time_seconds
-            )
+            self.litellm_llm_api_latency_metric.labels(
+                model,
+                user_api_key,
+                user_api_key_alias,
+                user_api_team,
+                user_api_team_alias,
+            ).observe(api_call_total_time_seconds)
 
         # log metrics
-        self.litellm_request_total_latency_metric.labels(model).observe(
-            total_time_seconds
-        )
+        self.litellm_request_total_latency_metric.labels(
+            model,
+            user_api_key,
+            user_api_key_alias,
+            user_api_team,
+            user_api_team_alias,
+        ).observe(total_time_seconds)
 
         # set x-ratelimit headers
         self.set_llm_deployment_success_metrics(
@@ -579,6 +608,9 @@ class PrometheusLogger(CustomLogger):
     def set_llm_deployment_failure_metrics(self, request_kwargs: dict):
         try:
             verbose_logger.debug("setting remaining tokens requests metric")
+            standard_logging_payload: StandardLoggingPayload = request_kwargs.get(
+                "standard_logging_object", {}
+            )
             _response_headers = request_kwargs.get("response_headers")
             _litellm_params = request_kwargs.get("litellm_params", {}) or {}
             _metadata = _litellm_params.get("metadata", {})
@@ -610,6 +642,16 @@ class PrometheusLogger(CustomLogger):
                 exception_status=str(getattr(exception, "status_code", None)),
                 exception_class=exception.__class__.__name__,
                 requested_model=model_group,
+                hashed_api_key=standard_logging_payload["metadata"][
+                    "user_api_key_hash"
+                ],
+                api_key_alias=standard_logging_payload["metadata"][
+                    "user_api_key_alias"
+                ],
+                team=standard_logging_payload["metadata"]["user_api_key_team_id"],
+                team_alias=standard_logging_payload["metadata"][
+                    "user_api_key_team_alias"
+                ],
             ).inc()
 
             self.litellm_deployment_total_requests.labels(
@@ -618,6 +660,16 @@ class PrometheusLogger(CustomLogger):
                 api_base=api_base,
                 api_provider=llm_provider,
                 requested_model=model_group,
+                hashed_api_key=standard_logging_payload["metadata"][
+                    "user_api_key_hash"
+                ],
+                api_key_alias=standard_logging_payload["metadata"][
+                    "user_api_key_alias"
+                ],
+                team=standard_logging_payload["metadata"]["user_api_key_team_id"],
+                team_alias=standard_logging_payload["metadata"][
+                    "user_api_key_team_alias"
+                ],
             ).inc()
 
             pass
@@ -706,6 +758,16 @@ class PrometheusLogger(CustomLogger):
                 api_base=api_base,
                 api_provider=llm_provider,
                 requested_model=model_group,
+                hashed_api_key=standard_logging_payload["metadata"][
+                    "user_api_key_hash"
+                ],
+                api_key_alias=standard_logging_payload["metadata"][
+                    "user_api_key_alias"
+                ],
+                team=standard_logging_payload["metadata"]["user_api_key_team_id"],
+                team_alias=standard_logging_payload["metadata"][
+                    "user_api_key_team_alias"
+                ],
             ).inc()
 
             self.litellm_deployment_total_requests.labels(
@@ -714,6 +776,16 @@ class PrometheusLogger(CustomLogger):
                 api_base=api_base,
                 api_provider=llm_provider,
                 requested_model=model_group,
+                hashed_api_key=standard_logging_payload["metadata"][
+                    "user_api_key_hash"
+                ],
+                api_key_alias=standard_logging_payload["metadata"][
+                    "user_api_key_alias"
+                ],
+                team=standard_logging_payload["metadata"]["user_api_key_team_id"],
+                team_alias=standard_logging_payload["metadata"][
+                    "user_api_key_team_alias"
+                ],
             ).inc()
 
             # Track deployment Latency
@@ -744,6 +816,16 @@ class PrometheusLogger(CustomLogger):
                     model_id=model_id,
                     api_base=api_base,
                     api_provider=llm_provider,
+                    hashed_api_key=standard_logging_payload["metadata"][
+                        "user_api_key_hash"
+                    ],
+                    api_key_alias=standard_logging_payload["metadata"][
+                        "user_api_key_alias"
+                    ],
+                    team=standard_logging_payload["metadata"]["user_api_key_team_id"],
+                    team_alias=standard_logging_payload["metadata"][
+                        "user_api_key_team_alias"
+                    ],
                 ).observe(latency_per_token)
 
         except Exception as e:
@@ -754,26 +836,70 @@ class PrometheusLogger(CustomLogger):
             )
             return
 
-    async def log_success_fallback_event(self, original_model_group: str, kwargs: dict):
+    async def log_success_fallback_event(
+        self, original_model_group: str, kwargs: dict, original_exception: Exception
+    ):
+        """
+
+        Logs a successful LLM fallback event on prometheus
+
+        """
+        from litellm.litellm_core_utils.litellm_logging import (
+            StandardLoggingMetadata,
+            get_standard_logging_metadata,
+        )
+
         verbose_logger.debug(
             "Prometheus: log_success_fallback_event, original_model_group: %s, kwargs: %s",
             original_model_group,
             kwargs,
         )
+        _metadata = kwargs.get("metadata", {})
+        standard_metadata: StandardLoggingMetadata = get_standard_logging_metadata(
+            metadata=_metadata
+        )
         _new_model = kwargs.get("model")
         self.litellm_deployment_successful_fallbacks.labels(
-            primary_model=original_model_group, fallback_model=_new_model
+            requested_model=original_model_group,
+            fallback_model=_new_model,
+            hashed_api_key=standard_metadata["user_api_key_hash"],
+            api_key_alias=standard_metadata["user_api_key_alias"],
+            team=standard_metadata["user_api_key_team_id"],
+            team_alias=standard_metadata["user_api_key_team_alias"],
+            exception_status=str(getattr(original_exception, "status_code", None)),
+            exception_class=str(original_exception.__class__.__name__),
         ).inc()
 
-    async def log_failure_fallback_event(self, original_model_group: str, kwargs: dict):
+    async def log_failure_fallback_event(
+        self, original_model_group: str, kwargs: dict, original_exception: Exception
+    ):
+        """
+        Logs a failed LLM fallback event on prometheus
+        """
+        from litellm.litellm_core_utils.litellm_logging import (
+            StandardLoggingMetadata,
+            get_standard_logging_metadata,
+        )
+
         verbose_logger.debug(
             "Prometheus: log_failure_fallback_event, original_model_group: %s, kwargs: %s",
             original_model_group,
             kwargs,
         )
         _new_model = kwargs.get("model")
+        _metadata = kwargs.get("metadata", {})
+        standard_metadata: StandardLoggingMetadata = get_standard_logging_metadata(
+            metadata=_metadata
+        )
         self.litellm_deployment_failed_fallbacks.labels(
-            primary_model=original_model_group, fallback_model=_new_model
+            requested_model=original_model_group,
+            fallback_model=_new_model,
+            hashed_api_key=standard_metadata["user_api_key_hash"],
+            api_key_alias=standard_metadata["user_api_key_alias"],
+            team=standard_metadata["user_api_key_team_id"],
+            team_alias=standard_metadata["user_api_key_team_alias"],
+            exception_status=str(getattr(original_exception, "status_code", None)),
+            exception_class=str(original_exception.__class__.__name__),
         ).inc()
 
     def set_litellm_deployment_state(
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index 8b5c15ca3..2756d53ab 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -2176,11 +2176,11 @@ def _init_custom_logger_compatible_class(
         _in_memory_loggers.append(_langsmith_logger)
         return _langsmith_logger  # type: ignore
     elif logging_integration == "prometheus":
-        if premium_user:
-            for callback in _in_memory_loggers:
-                if isinstance(callback, PrometheusLogger):
-                    return callback  # type: ignore
+        for callback in _in_memory_loggers:
+            if isinstance(callback, PrometheusLogger):
+                return callback  # type: ignore
 
+        if premium_user:
             _prometheus_logger = PrometheusLogger()
             _in_memory_loggers.append(_prometheus_logger)
             return _prometheus_logger  # type: ignore
@@ -2476,31 +2476,7 @@ def get_standard_logging_object_payload(
                 }
             )
         # clean up litellm metadata
-        clean_metadata = StandardLoggingMetadata(
-            user_api_key_hash=None,
-            user_api_key_alias=None,
-            user_api_key_team_id=None,
-            user_api_key_user_id=None,
-            user_api_key_team_alias=None,
-            spend_logs_metadata=None,
-            requester_ip_address=None,
-            requester_metadata=None,
-        )
-        if isinstance(metadata, dict):
-            # Filter the metadata dictionary to include only the specified keys
-            clean_metadata = StandardLoggingMetadata(
-                **{  # type: ignore
-                    key: metadata[key]
-                    for key in StandardLoggingMetadata.__annotations__.keys()
-                    if key in metadata
-                }
-            )
-
-            if metadata.get("user_api_key") is not None:
-                if is_valid_sha256_hash(str(metadata.get("user_api_key"))):
-                    clean_metadata["user_api_key_hash"] = metadata.get(
-                        "user_api_key"
-                    )  # this is the hash
+        clean_metadata = get_standard_logging_metadata(metadata=metadata)
 
         if litellm.cache is not None:
             cache_key = litellm.cache.get_cache_key(**kwargs)
@@ -2610,6 +2586,51 @@ def get_standard_logging_object_payload(
         return None
 
 
+def get_standard_logging_metadata(
+    metadata: Optional[Dict[str, Any]]
+) -> StandardLoggingMetadata:
+    """
+    Clean and filter the metadata dictionary to include only the specified keys in StandardLoggingMetadata.
+
+    Args:
+        metadata (Optional[Dict[str, Any]]): The original metadata dictionary.
+
+    Returns:
+        StandardLoggingMetadata: A StandardLoggingMetadata object containing the cleaned metadata.
+
+    Note:
+        - If the input metadata is None or not a dictionary, an empty StandardLoggingMetadata object is returned.
+        - If 'user_api_key' is present in metadata and is a valid SHA256 hash, it's stored as 'user_api_key_hash'.
+    """
+    # Initialize with default values
+    clean_metadata = StandardLoggingMetadata(
+        user_api_key_hash=None,
+        user_api_key_alias=None,
+        user_api_key_team_id=None,
+        user_api_key_user_id=None,
+        user_api_key_team_alias=None,
+        spend_logs_metadata=None,
+        requester_ip_address=None,
+        requester_metadata=None,
+    )
+    if isinstance(metadata, dict):
+        # Filter the metadata dictionary to include only the specified keys
+        clean_metadata = StandardLoggingMetadata(
+            **{  # type: ignore
+                key: metadata[key]
+                for key in StandardLoggingMetadata.__annotations__.keys()
+                if key in metadata
+            }
+        )
+
+        if metadata.get("user_api_key") is not None:
+            if is_valid_sha256_hash(str(metadata.get("user_api_key"))):
+                clean_metadata["user_api_key_hash"] = metadata.get(
+                    "user_api_key"
+                )  # this is the hash
+    return clean_metadata
+
+
 def scrub_sensitive_keys_in_metadata(litellm_params: Optional[dict]):
     if litellm_params is None:
         litellm_params = {}
diff --git a/litellm/router_utils/fallback_event_handlers.py b/litellm/router_utils/fallback_event_handlers.py
index fc1ce648d..9aab5416f 100644
--- a/litellm/router_utils/fallback_event_handlers.py
+++ b/litellm/router_utils/fallback_event_handlers.py
@@ -1,8 +1,9 @@
-from typing import TYPE_CHECKING, Any, Dict, List, Tuple
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
 
 import litellm
 from litellm._logging import verbose_router_logger
 from litellm.integrations.custom_logger import CustomLogger
+from litellm.main import verbose_logger
 
 if TYPE_CHECKING:
     from litellm.router import Router as _Router
@@ -41,13 +42,17 @@ async def run_async_fallback(
             verbose_router_logger.info("Successful fallback b/w models.")
             # callback for successfull_fallback_event():
             await log_success_fallback_event(
-                original_model_group=original_model_group, kwargs=kwargs
+                original_model_group=original_model_group,
+                kwargs=kwargs,
+                original_exception=original_exception,
             )
             return response
         except Exception as e:
             error_from_fallbacks = e
             await log_failure_fallback_event(
-                original_model_group=original_model_group, kwargs=kwargs
+                original_model_group=original_model_group,
+                kwargs=kwargs,
+                original_exception=original_exception,
             )
     raise error_from_fallbacks
 
@@ -83,29 +88,115 @@ def run_sync_fallback(
     raise error_from_fallbacks
 
 
-async def log_success_fallback_event(original_model_group: str, kwargs: dict):
+async def log_success_fallback_event(
+    original_model_group: str, kwargs: dict, original_exception: Exception
+):
+    """
+    Log a successful fallback event to all registered callbacks.
+
+    This function iterates through all callbacks, initializing _known_custom_logger_compatible_callbacks  if needed,
+    and calls the log_success_fallback_event method on CustomLogger instances.
+
+    Args:
+        original_model_group (str): The original model group before fallback.
+        kwargs (dict): kwargs for the request
+
+    Note:
+        Errors during logging are caught and reported but do not interrupt the process.
+    """
+    from litellm.litellm_core_utils.litellm_logging import (
+        _init_custom_logger_compatible_class,
+    )
+
     for _callback in litellm.callbacks:
-        if isinstance(_callback, CustomLogger):
+        if isinstance(_callback, CustomLogger) or (
+            _callback in litellm._known_custom_logger_compatible_callbacks
+        ):
             try:
-                await _callback.log_success_fallback_event(
-                    original_model_group=original_model_group, kwargs=kwargs
+                _callback_custom_logger: Optional[CustomLogger] = None
+                if _callback in litellm._known_custom_logger_compatible_callbacks:
+                    _callback_custom_logger = _init_custom_logger_compatible_class(
+                        logging_integration=_callback,  # type: ignore
+                        llm_router=None,
+                        internal_usage_cache=None,
+                    )
+                elif isinstance(_callback, CustomLogger):
+                    _callback_custom_logger = _callback
+                else:
+                    verbose_router_logger.exception(
+                        f"{_callback} logger not found / initialized properly"
+                    )
+                    continue
+
+                if _callback_custom_logger is None:
+                    verbose_router_logger.exception(
+                        f"{_callback} logger not found / initialized properly, callback is None"
+                    )
+                    continue
+
+                await _callback_custom_logger.log_success_fallback_event(
+                    original_model_group=original_model_group,
+                    kwargs=kwargs,
+                    original_exception=original_exception,
                 )
             except Exception as e:
                 verbose_router_logger.error(
-                    f"Error in log_success_fallback_event: {(str(e))}"
+                    f"Error in log_success_fallback_event: {str(e)}"
                 )
-                pass
 
 
-async def log_failure_fallback_event(original_model_group: str, kwargs: dict):
+async def log_failure_fallback_event(
+    original_model_group: str, kwargs: dict, original_exception: Exception
+):
+    """
+    Log a failed fallback event to all registered callbacks.
+
+    This function iterates through all callbacks, initializing _known_custom_logger_compatible_callbacks if needed,
+    and calls the log_failure_fallback_event method on CustomLogger instances.
+
+    Args:
+        original_model_group (str): The original model group before fallback.
+        kwargs (dict): kwargs for the request
+
+    Note:
+        Errors during logging are caught and reported but do not interrupt the process.
+    """
+    from litellm.litellm_core_utils.litellm_logging import (
+        _init_custom_logger_compatible_class,
+    )
+
     for _callback in litellm.callbacks:
-        if isinstance(_callback, CustomLogger):
+        if isinstance(_callback, CustomLogger) or (
+            _callback in litellm._known_custom_logger_compatible_callbacks
+        ):
             try:
-                await _callback.log_failure_fallback_event(
-                    original_model_group=original_model_group, kwargs=kwargs
+                _callback_custom_logger: Optional[CustomLogger] = None
+                if _callback in litellm._known_custom_logger_compatible_callbacks:
+                    _callback_custom_logger = _init_custom_logger_compatible_class(
+                        logging_integration=_callback,  # type: ignore
+                        llm_router=None,
+                        internal_usage_cache=None,
+                    )
+                elif isinstance(_callback, CustomLogger):
+                    _callback_custom_logger = _callback
+                else:
+                    verbose_router_logger.exception(
+                        f"{_callback} logger not found / initialized properly"
+                    )
+                    continue
+
+                if _callback_custom_logger is None:
+                    verbose_router_logger.exception(
+                        f"{_callback} logger not found / initialized properly"
+                    )
+                    continue
+
+                await _callback_custom_logger.log_failure_fallback_event(
+                    original_model_group=original_model_group,
+                    kwargs=kwargs,
+                    original_exception=original_exception,
                 )
             except Exception as e:
                 verbose_router_logger.error(
-                    f"Error in log_failure_fallback_event: {(str(e))}"
+                    f"Error in log_failure_fallback_event: {str(e)}"
                 )
-                pass
diff --git a/tests/otel_tests/test_prometheus.py b/tests/otel_tests/test_prometheus.py
index 84333f7c8..9bda4cd10 100644
--- a/tests/otel_tests/test_prometheus.py
+++ b/tests/otel_tests/test_prometheus.py
@@ -5,6 +5,7 @@ Unit tests for prometheus metrics
 import pytest
 import aiohttp
 import asyncio
+import uuid
 
 
 async def make_bad_chat_completion_request(session, key):
@@ -23,6 +24,53 @@ async def make_bad_chat_completion_request(session, key):
         return status, response_text
 
 
+async def make_good_chat_completion_request(session, key):
+    url = "http://0.0.0.0:4000/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
+
+    data = {
+        "model": "fake-openai-endpoint",
+        "messages": [{"role": "user", "content": f"Hello {uuid.uuid4()}"}],
+        "tags": ["teamB"],
+    }
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+        return status, response_text
+
+
+async def make_chat_completion_request_with_fallback(session, key):
+    url = "http://0.0.0.0:4000/chat/completions"
+    headers = {
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+    }
+    data = {
+        "model": "fake-azure-endpoint",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "fallbacks": ["fake-openai-endpoint"],
+    }
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+
+    # make a request with a failed fallback
+    data = {
+        "model": "fake-azure-endpoint",
+        "messages": [{"role": "user", "content": "Hello"}],
+        "fallbacks": ["unknown-model"],
+    }
+
+    async with session.post(url, headers=headers, json=data) as response:
+        status = response.status
+        response_text = await response.text()
+
+    return
+
+
 @pytest.mark.asyncio
 async def test_proxy_failure_metrics():
     """
@@ -59,3 +107,77 @@ async def test_proxy_failure_metrics():
             'litellm_proxy_total_requests_metric_total{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None",user="default_user_id"} 1.0'
             in metrics
         )
+
+        assert (
+            'litellm_deployment_failure_responses_total{api_base="https://exampleopenaiendpoint-production.up.railway.app",api_key_alias="None",api_provider="openai",exception_class="RateLimitError",exception_status="429",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="429",model_id="7499d31f98cd518cf54486d5a00deda6894239ce16d13543398dc8abf870b15f",requested_model="fake-azure-endpoint",team="None",team_alias="None"}'
+            in metrics
+        )
+
+
+@pytest.mark.asyncio
+async def test_proxy_success_metrics():
+    """
+    Make 1 good /chat/completions call to "openai/gpt-3.5-turbo"
+    GET /metrics
+    Assert the success metric is incremented by 1
+    """
+
+    async with aiohttp.ClientSession() as session:
+        # Make a good chat completion call
+        status, response_text = await make_good_chat_completion_request(
+            session, "sk-1234"
+        )
+
+        # Check if the request succeeded as expected
+        assert status == 200, f"Expected status 200, but got {status}"
+
+        # Get metrics
+        async with session.get("http://0.0.0.0:4000/metrics") as response:
+            metrics = await response.text()
+
+        print("/metrics", metrics)
+
+        # Check if the success metric is present and correct
+        assert (
+            'litellm_request_total_latency_metric_bucket{api_key_alias="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",team="None",team_alias="None"}'
+            in metrics
+        )
+
+        assert (
+            'litellm_llm_api_latency_metric_bucket{api_key_alias="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",team="None",team_alias="None"}'
+            in metrics
+        )
+
+        assert (
+            'litellm_deployment_latency_per_output_token_count{api_base="https://exampleopenaiendpoint-production.up.railway.app/",api_key_alias="None",api_provider="openai",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",litellm_model_name="fake",model_id="team-b-model",team="None",team_alias="None"}'
+            in metrics
+        )
+
+
+@pytest.mark.asyncio
+async def test_proxy_fallback_metrics():
+    """
+    Make 1 request with a client side fallback - check metrics
+    """
+
+    async with aiohttp.ClientSession() as session:
+        # Make a good chat completion call
+        await make_chat_completion_request_with_fallback(session, "sk-1234")
+
+        # Get metrics
+        async with session.get("http://0.0.0.0:4000/metrics") as response:
+            metrics = await response.text()
+
+        print("/metrics", metrics)
+
+        # Check if successful fallback metric is incremented
+        assert (
+            'litellm_deployment_successful_fallbacks_total{api_key_alias="None",exception_class="RateLimitError",exception_status="429",fallback_model="fake-openai-endpoint",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None"} 1.0'
+            in metrics
+        )
+
+        # Check if failed fallback metric is incremented
+        assert (
+            'litellm_deployment_failed_fallbacks_total{api_key_alias="None",exception_class="RateLimitError",exception_status="429",fallback_model="unknown-model",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",requested_model="fake-azure-endpoint",team="None",team_alias="None"} 1.0'
+            in metrics
+        )