diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py
index d03922bc1..b06a22920 100644
--- a/litellm/integrations/slack_alerting.py
+++ b/litellm/integrations/slack_alerting.py
@@ -76,16 +76,14 @@ class SlackAlerting(CustomLogger):
         internal_usage_cache: Optional[DualCache] = None,
         alerting_threshold: float = 300,  # threshold for slow / hanging llm responses (in seconds)
         alerting: Optional[List] = [],
-        alert_types: Optional[
-            List[
-                Literal[
-                    "llm_exceptions",
-                    "llm_too_slow",
-                    "llm_requests_hanging",
-                    "budget_alerts",
-                    "db_exceptions",
-                    "daily_reports",
-                ]
+        alert_types: List[
+            Literal[
+                "llm_exceptions",
+                "llm_too_slow",
+                "llm_requests_hanging",
+                "budget_alerts",
+                "db_exceptions",
+                "daily_reports",
             ]
         ] = [
             "llm_exceptions",
@@ -812,14 +810,6 @@ Model Info:
                     updated_at=litellm.utils.get_utc_datetime(),
                 )
             )
-        if "llm_exceptions" in self.alert_types:
-            original_exception = kwargs.get("exception", None)
-
-            await self.send_alert(
-                message="LLM API Failure - " + str(original_exception),
-                level="High",
-                alert_type="llm_exceptions",
-            )
 
     async def _run_scheduler_helper(self, llm_router) -> bool:
         """
diff --git a/litellm/proxy/_super_secret_config.yaml b/litellm/proxy/_super_secret_config.yaml
index 86037caf7..b83883beb 100644
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@@ -8,6 +8,16 @@ model_list:
     base_model: text-embedding-ada-002
     mode: embedding
   model_name: text-embedding-ada-002
+- model_name: gpt-3.5-turbo-012
+  litellm_params:
+    model: gpt-3.5-turbo
+    api_base: http://0.0.0.0:8080
+    api_key: "" 
+- model_name: gpt-3.5-turbo-0125-preview
+  litellm_params:
+    model: azure/chatgpt-v-2
+    api_key: os.environ/AZURE_API_KEY
+    api_base: os.environ/AZURE_API_BASE
 
 router_settings:
   redis_host: redis
@@ -17,6 +27,7 @@ router_settings:
 
 litellm_settings:
   set_verbose: True
+  fallbacks: [{"gpt-3.5-turbo-012": ["gpt-3.5-turbo-0125-preview"]}]
   # service_callback: ["prometheus_system"]
   # success_callback: ["prometheus"]
   # failure_callback: ["prometheus"]
@@ -25,4 +36,5 @@ general_settings:
   enable_jwt_auth: True
   disable_reset_budget: True
   proxy_batch_write_at: 60 # 👈 Frequency of batch writing logs to server (in seconds)
-  routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
\ No newline at end of file
+  routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
+  alerting: ["slack"]
diff --git a/litellm/router.py b/litellm/router.py
index ad11dc98e..b345a2f25 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -1413,7 +1413,7 @@ class Router:
                 verbose_router_logger.debug(f"Trying to fallback b/w models")
                 if (
                     hasattr(e, "status_code")
-                    and e.status_code == 400
+                    and e.status_code == 400  # type: ignore
                     and not isinstance(e, litellm.ContextWindowExceededError)
                 ):  # don't retry a malformed request
                     raise e
@@ -3648,7 +3648,7 @@ class Router:
                 )
                 asyncio.create_task(
                     proxy_logging_obj.slack_alerting_instance.send_alert(
-                        message=f"Router: Cooling down deployment: {_api_base}, for {self.cooldown_time} seconds. Got exception: {str(exception_status)}",
+                        message=f"Router: Cooling down deployment: {_api_base}, for {self.cooldown_time} seconds. Got exception: {str(exception_status)}. Change 'cooldown_time' + 'allowed_failes' under 'Router Settings' on proxy UI, or via config - https://docs.litellm.ai/docs/proxy/reliability#fallbacks--retries--timeouts--cooldowns",
                         alert_type="cooldown_deployment",
                         level="Low",
                     )
diff --git a/litellm/tests/test_router_fallbacks.py b/litellm/tests/test_router_fallbacks.py
index c1035e3e0..ce2b014e9 100644
--- a/litellm/tests/test_router_fallbacks.py
+++ b/litellm/tests/test_router_fallbacks.py
@@ -961,3 +961,49 @@ def test_custom_cooldown_times():
 
     except Exception as e:
         print(e)
+
+
+@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.asyncio
+async def test_service_unavailable_fallbacks(sync_mode):
+    """
+    Initial model - openai
+    Fallback - azure
+
+    Error - 503, service unavailable
+    """
+    router = Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo-012",
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "api_key": "anything",
+                    "api_base": "http://0.0.0.0:8080",
+                },
+            },
+            {
+                "model_name": "gpt-3.5-turbo-0125-preview",
+                "litellm_params": {
+                    "model": "azure/chatgpt-v-2",
+                    "api_key": os.getenv("AZURE_API_KEY"),
+                    "api_version": os.getenv("AZURE_API_VERSION"),
+                    "api_base": os.getenv("AZURE_API_BASE"),
+                },
+            },
+        ],
+        fallbacks=[{"gpt-3.5-turbo-012": ["gpt-3.5-turbo-0125-preview"]}],
+    )
+
+    if sync_mode:
+        response = router.completion(
+            model="gpt-3.5-turbo-012",
+            messages=[{"role": "user", "content": "Hey, how's it going?"}],
+        )
+    else:
+        response = await router.acompletion(
+            model="gpt-3.5-turbo-012",
+            messages=[{"role": "user", "content": "Hey, how's it going?"}],
+        )
+
+    assert response.model == "gpt-35-turbo"