diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 869de6dde..17db8c3ab 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -2138,14 +2138,6 @@ async def async_data_generator(response, user_api_key_dict):
             except Exception as e:
                 yield f"data: {str(e)}\n\n"
 
-        ### ALERTING ###
-        end_time = time.time()
-        asyncio.create_task(
-            proxy_logging_obj.response_taking_too_long(
-                start_time=start_time, end_time=end_time, type="slow_response"
-            )
-        )
-
         # Streaming is done, yield the [DONE] chunk
         done_message = "[DONE]"
         yield f"data: {done_message}\n\n"
@@ -2494,14 +2486,6 @@ async def completion(
                 headers=custom_headers,
             )
 
-        ### ALERTING ###
-        end_time = time.time()
-        asyncio.create_task(
-            proxy_logging_obj.response_taking_too_long(
-                start_time=start_time, end_time=end_time, type="slow_response"
-            )
-        )
-
         fastapi_response.headers["x-litellm-model-id"] = model_id
         return response
     except Exception as e:
@@ -2700,14 +2684,6 @@ async def chat_completion(
                 headers=custom_headers,
             )
 
-        ### ALERTING ###
-        end_time = time.time()
-        asyncio.create_task(
-            proxy_logging_obj.response_taking_too_long(
-                start_time=start_time, end_time=end_time, type="slow_response"
-            )
-        )
-
         fastapi_response.headers["x-litellm-model-id"] = model_id
 
         ### CALL HOOKS ### - modify outgoing data
@@ -2915,12 +2891,6 @@ async def embeddings(
 
         ### ALERTING ###
         data["litellm_status"] = "success"  # used for alerting
-        end_time = time.time()
-        asyncio.create_task(
-            proxy_logging_obj.response_taking_too_long(
-                start_time=start_time, end_time=end_time, type="slow_response"
-            )
-        )
 
         return response
     except Exception as e:
@@ -3066,12 +3036,6 @@ async def image_generation(
 
         ### ALERTING ###
         data["litellm_status"] = "success"  # used for alerting
-        end_time = time.time()
-        asyncio.create_task(
-            proxy_logging_obj.response_taking_too_long(
-                start_time=start_time, end_time=end_time, type="slow_response"
-            )
-        )
 
         return response
     except Exception as e:
@@ -3225,12 +3189,6 @@ async def moderations(
 
         ### ALERTING ###
         data["litellm_status"] = "success"  # used for alerting
-        end_time = time.time()
-        asyncio.create_task(
-            proxy_logging_obj.response_taking_too_long(
-                start_time=start_time, end_time=end_time, type="slow_response"
-            )
-        )
 
         return response
     except Exception as e:
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 1cc52401a..948e686dd 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -64,6 +64,7 @@ class ProxyLogging:
         litellm.callbacks.append(self.max_parallel_request_limiter)
         litellm.callbacks.append(self.max_budget_limiter)
         litellm.callbacks.append(self.cache_control_check)
+        litellm.callbacks.append(self.response_taking_too_long_callback)
         for callback in litellm.callbacks:
             if callback not in litellm.input_callback:
                 litellm.input_callback.append(callback)
@@ -142,6 +143,30 @@ class ProxyLogging:
                 raise e
         return data
 
+    async def response_taking_too_long_callback(
+        self,
+        kwargs,  # kwargs to completion
+        completion_response,  # response from completion
+        start_time,
+        end_time,  # start/end time
+    ):
+        if self.alerting is None:
+            return
+        time_difference = end_time - start_time
+        # Convert the timedelta to float (in seconds)
+        time_difference_float = time_difference.total_seconds()
+        litellm_params = kwargs.get("litellm_params", {})
+        api_base = litellm_params.get("api_base", "")
+        model = kwargs.get("model", "")
+        messages = kwargs.get("messages", "")
+        request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
+        slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
+        if time_difference_float > self.alerting_threshold:
+            await self.alerting_handler(
+                message=slow_message + request_info,
+                level="Low",
+            )
+
     async def response_taking_too_long(
         self,
         start_time: Optional[float] = None,
@@ -189,16 +214,6 @@ class ProxyLogging:
                     level="Medium",
                 )
 
-        elif (
-            type == "slow_response" and start_time is not None and end_time is not None
-        ):
-            slow_message = f"`Responses are slow - {round(end_time-start_time,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
-            if end_time - start_time > self.alerting_threshold:
-                await self.alerting_handler(
-                    message=slow_message + request_info,
-                    level="Low",
-                )
-
     async def budget_alerts(
         self,
         type: Literal[