diff --git a/litellm/__init__.py b/litellm/__init__.py
index fb513c133..ef9374aa1 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -595,6 +595,7 @@ from .utils import (
     _should_retry,
     get_secret,
     get_supported_openai_params,
+    get_api_base,
 )
 from .llms.huggingface_restapi import HuggingfaceConfig
 from .llms.anthropic import AnthropicConfig
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index ea96985fd..47676328c 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -3441,13 +3441,15 @@ async def chat_completion(
         )  # run the moderation check in parallel to the actual llm api call
         response = responses[1]
 
-        # Post Call Processing
-        data["litellm_status"] = "success"  # used for alerting
-
         hidden_params = getattr(response, "_hidden_params", {}) or {}
         model_id = hidden_params.get("model_id", None) or ""
         cache_key = hidden_params.get("cache_key", None) or ""
 
+        # Post Call Processing
+        if llm_router is not None:
+            data["deployment"] = llm_router.get_deployment(model_id=model_id)
+        data["litellm_status"] = "success"  # used for alerting
+
         if (
             "stream" in data and data["stream"] == True
         ):  # use generate_responses to stream responses
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 20b4f6cd8..a36124792 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -182,6 +182,25 @@ class ProxyLogging:
                 raise e
         return data
 
+    def _response_taking_too_long_callback(
+        self,
+        kwargs,  # kwargs to completion
+        start_time,
+        end_time,  # start/end time
+    ):
+        try:
+            time_difference = end_time - start_time
+            # Convert the timedelta to float (in seconds)
+            time_difference_float = time_difference.total_seconds()
+            litellm_params = kwargs.get("litellm_params", {})
+            api_base = litellm_params.get("api_base", "")
+            model = kwargs.get("model", "")
+            messages = kwargs.get("messages", "")
+
+            return time_difference_float, model, api_base, messages
+        except Exception as e:
+            raise e
+
     async def response_taking_too_long_callback(
         self,
         kwargs,  # kwargs to completion
@@ -191,13 +210,13 @@ class ProxyLogging:
     ):
         if self.alerting is None:
             return
-        time_difference = end_time - start_time
-        # Convert the timedelta to float (in seconds)
-        time_difference_float = time_difference.total_seconds()
-        litellm_params = kwargs.get("litellm_params", {})
-        api_base = litellm_params.get("api_base", "")
-        model = kwargs.get("model", "")
-        messages = kwargs.get("messages", "")
+        time_difference_float, model, api_base, messages = (
+            self._response_taking_too_long_callback(
+                kwargs=kwargs,
+                start_time=start_time,
+                end_time=end_time,
+            )
+        )
         request_info = f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
         slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {self.alerting_threshold}s`"
         if time_difference_float > self.alerting_threshold:
@@ -244,6 +263,20 @@ class ProxyLogging:
                 request_data is not None
                 and request_data.get("litellm_status", "") != "success"
             ):
+                if request_data.get("deployment", None) is not None and isinstance(
+                    request_data["deployment"], dict
+                ):
+                    _api_base = litellm.get_api_base(
+                        model=model,
+                        optional_params=request_data["deployment"].get(
+                            "litellm_params", {}
+                        ),
+                    )
+
+                    if _api_base is None:
+                        _api_base = ""
+
+                    request_info += f"\nAPI Base: {_api_base}"
                 # only alert hanging responses if they have not been marked as success
                 alerting_message = (
                     f"`Requests are hanging - {self.alerting_threshold}s+ request time`"
diff --git a/litellm/router.py b/litellm/router.py
index 8c7c1be58..39053e05b 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -77,6 +77,13 @@ class LiteLLM_Params(BaseModel):
     )
     max_retries: int = 2  # follows openai default of 2
     organization: Optional[str] = None  # for openai orgs
+    ## VERTEX AI ##
+    vertex_project: Optional[str] = None
+    vertex_location: Optional[str] = None
+    ## AWS BEDROCK / SAGEMAKER ##
+    aws_access_key_id: Optional[str] = None
+    aws_secret_access_key: Optional[str] = None
+    aws_region_name: Optional[str] = None
 
     def __init__(self, max_retries: Optional[Union[int, str]] = None, **params):
         if max_retries is None:
@@ -2263,6 +2270,13 @@ class Router:
         self.model_names.append(deployment.model_name)
         return
 
+    def get_deployment(self, model_id: str):
+        for model in self.model_list:
+            if "model_info" in model and "id" in model["model_info"]:
+                if model_id == model["model_info"]["id"]:
+                    return model
+        return None
+
     def get_model_ids(self):
         ids = []
         for model in self.model_list:
diff --git a/litellm/tests/test_alerting.py b/litellm/tests/test_alerting.py
new file mode 100644
index 000000000..225982bf3
--- /dev/null
+++ b/litellm/tests/test_alerting.py
@@ -0,0 +1,86 @@
+# What is this?
+## Tests slack alerting on proxy logging object
+
+import sys
+import os
+import io, asyncio
+from datetime import datetime
+
+# import logging
+# logging.basicConfig(level=logging.DEBUG)
+sys.path.insert(0, os.path.abspath("../.."))
+from litellm.proxy.utils import ProxyLogging
+from litellm.caching import DualCache
+import litellm
+import pytest
+
+
+@pytest.mark.asyncio
+async def test_get_api_base():
+    _pl = ProxyLogging(user_api_key_cache=DualCache())
+    _pl.update_values(alerting=["slack"], alerting_threshold=100, redis_cache=None)
+    model = "chatgpt-v-2"
+    messages = [{"role": "user", "content": "Hey how's it going?"}]
+    litellm_params = {
+        "acompletion": True,
+        "api_key": None,
+        "api_base": "https://openai-gpt-4-test-v-1.openai.azure.com/",
+        "force_timeout": 600,
+        "logger_fn": None,
+        "verbose": False,
+        "custom_llm_provider": "azure",
+        "litellm_call_id": "68f46d2d-714d-4ad8-8137-69600ec8755c",
+        "model_alias_map": {},
+        "completion_call_id": None,
+        "metadata": None,
+        "model_info": None,
+        "proxy_server_request": None,
+        "preset_cache_key": None,
+        "no-log": False,
+        "stream_response": {},
+    }
+    start_time = datetime.now()
+    end_time = datetime.now()
+
+    time_difference_float, model, api_base, messages = (
+        _pl._response_taking_too_long_callback(
+            kwargs={
+                "model": model,
+                "messages": messages,
+                "litellm_params": litellm_params,
+            },
+            start_time=start_time,
+            end_time=end_time,
+        )
+    )
+
+    assert api_base is not None
+    assert isinstance(api_base, str)
+    assert len(api_base) > 0
+    request_info = (
+        f"\nRequest Model: `{model}`\nAPI Base: `{api_base}`\nMessages: `{messages}`"
+    )
+    slow_message = f"`Responses are slow - {round(time_difference_float,2)}s response time > Alerting threshold: {100}s`"
+    await _pl.alerting_handler(
+        message=slow_message + request_info,
+        level="Low",
+    )
+
+
+@pytest.mark.asyncio
+async def test_request_taking_too_long():
+    """
+    - attach request_taking_too_long as a success callback to litellm
+    - unit test kwargs for azure call vs. vertex ai call -> ensure api base constructed correctly for both
+    """
+    import time
+
+    _pl = ProxyLogging(user_api_key_cache=DualCache())
+    litellm.success_callback = [_pl.response_taking_too_long_callback]
+
+    response = await litellm.acompletion(
+        model="azure/chatgpt-v-2",
+        messages=[{"role": "user", "content": "Hey, how's it going?"}],
+    )
+
+    raise Exception("it worked!")
diff --git a/litellm/tests/test_custom_callback_input.py b/litellm/tests/test_custom_callback_input.py
index 3b3eae1cc..7175f5bc4 100644
--- a/litellm/tests/test_custom_callback_input.py
+++ b/litellm/tests/test_custom_callback_input.py
@@ -75,7 +75,6 @@ class CompletionCustomHandler(
 
     def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
         try:
-            print(f"kwargs: {kwargs}")
             self.states.append("post_api_call")
             ## START TIME
             assert isinstance(start_time, datetime)
@@ -167,6 +166,8 @@ class CompletionCustomHandler(
             )
             assert isinstance(kwargs["optional_params"], dict)
             assert isinstance(kwargs["litellm_params"], dict)
+            assert isinstance(kwargs["litellm_params"]["api_base"], str)
+            assert isinstance(kwargs["cache_hit"], Optional[bool])
             assert isinstance(kwargs["start_time"], (datetime, type(None)))
             assert isinstance(kwargs["stream"], bool)
             assert isinstance(kwargs["user"], (str, type(None)))
@@ -265,8 +266,10 @@ class CompletionCustomHandler(
             assert isinstance(kwargs["messages"], list)
             assert isinstance(kwargs["optional_params"], dict)
             assert isinstance(kwargs["litellm_params"], dict)
+            assert isinstance(kwargs["litellm_params"]["api_base"], str)
             assert isinstance(kwargs["start_time"], (datetime, type(None)))
             assert isinstance(kwargs["stream"], bool)
+            assert isinstance(kwargs["cache_hit"], Optional[bool])
             assert isinstance(kwargs["user"], (str, type(None)))
             assert isinstance(kwargs["input"], (list, dict, str))
             assert isinstance(kwargs["api_key"], (str, type(None)))
@@ -651,8 +654,8 @@ def load_vertex_ai_credentials():
     os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.abspath(temp_file.name)
 
 
+@pytest.mark.skip(reason="Vertex AI Hanging")
 @pytest.mark.asyncio
-@pytest.mark.skip(reason="Skipping on this PR to test other stuff")
 async def test_async_chat_vertex_ai_stream():
     try:
         load_vertex_ai_credentials()
diff --git a/litellm/utils.py b/litellm/utils.py
index 5153a414b..e3823e446 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -75,6 +75,7 @@ from .proxy._types import KeyManagementSystem
 from openai import OpenAIError as OriginalError
 from openai._models import BaseModel as OpenAIObject
 from .caching import S3Cache, RedisSemanticCache, RedisCache
+from .router import LiteLLM_Params
 from .exceptions import (
     AuthenticationError,
     BadRequestError,
@@ -1075,6 +1076,9 @@ class Logging:
                 headers = {}
             data = additional_args.get("complete_input_dict", {})
             api_base = additional_args.get("api_base", "")
+            self.model_call_details["litellm_params"]["api_base"] = str(
+                api_base
+            )  # used for alerting
             masked_headers = {
                 k: (v[:-20] + "*" * 20) if (isinstance(v, str) and len(v) > 20) else v
                 for k, v in headers.items()
@@ -1203,7 +1207,6 @@ class Logging:
             self.model_call_details["original_response"] = original_response
             self.model_call_details["additional_args"] = additional_args
             self.model_call_details["log_event_type"] = "post_api_call"
-
             # User Logging -> if you pass in a custom logging function
             print_verbose(
                 f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n",
@@ -2546,7 +2549,7 @@ def client(original_function):
                 langfuse_secret=kwargs.pop("langfuse_secret", None),
             )
             ## check if metadata is passed in
-            litellm_params = {}
+            litellm_params = {"api_base": ""}
             if "metadata" in kwargs:
                 litellm_params["metadata"] = kwargs["metadata"]
             logging_obj.update_environment_variables(
@@ -3033,7 +3036,7 @@ def client(original_function):
                         cached_result = await litellm.cache.async_get_cache(
                             *args, **kwargs
                         )
-                    else:
+                    else:  # for s3 caching. [NOT RECOMMENDED IN PROD - this will slow down responses since boto3 is sync]
                         preset_cache_key = litellm.cache.get_cache_key(*args, **kwargs)
                         kwargs["preset_cache_key"] = (
                             preset_cache_key  # for streaming calls, we need to pass the preset_cache_key
@@ -3076,6 +3079,7 @@ def client(original_function):
                                     "preset_cache_key", None
                                 ),
                                 "stream_response": kwargs.get("stream_response", {}),
+                                "api_base": kwargs.get("api_base", ""),
                             },
                             input=kwargs.get("messages", ""),
                             api_key=kwargs.get("api_key", None),
@@ -3209,6 +3213,7 @@ def client(original_function):
                                     "stream_response": kwargs.get(
                                         "stream_response", {}
                                     ),
+                                    "api_base": "",
                                 },
                                 input=kwargs.get("messages", ""),
                                 api_key=kwargs.get("api_key", None),
@@ -5305,6 +5310,27 @@ def get_optional_params(
     return optional_params
 
 
+def get_api_base(model: str, optional_params: dict) -> Optional[str]:
+    _optional_params = LiteLLM_Params(**optional_params)  # convert to pydantic object
+
+    if _optional_params.api_base is not None:
+        return _optional_params.api_base
+
+    if (
+        _optional_params.vertex_location is not None
+        and _optional_params.vertex_project is not None
+    ):
+        _api_base = "{}-aiplatform.googleapis.com/v1/projects/{}/locations/{}/publishers/google/models/{}:streamGenerateContent".format(
+            _optional_params.vertex_location,
+            _optional_params.vertex_project,
+            _optional_params.vertex_location,
+            model,
+        )
+        return _api_base
+
+    return None
+
+
 def get_supported_openai_params(model: str, custom_llm_provider: str):
     """
     Returns the supported openai params for a given model + provider