diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index 05eece834..1d79653ea 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -548,42 +548,6 @@ def test_gemini_pro_vision_base64():
 
 
 def test_gemini_pro_function_calling():
-    load_vertex_ai_credentials()
-    tools = [
-        {
-            "type": "function",
-            "function": {
-                "name": "get_current_weather",
-                "description": "Get the current weather in a given location",
-                "parameters": {
-                    "type": "object",
-                    "properties": {
-                        "location": {
-                            "type": "string",
-                            "description": "The city and state, e.g. San Francisco, CA",
-                        },
-                        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
-                    },
-                    "required": ["location"],
-                },
-            },
-        }
-    ]
-
-    messages = [
-        {
-            "role": "user",
-            "content": "What's the weather like in Boston today in fahrenheit?",
-        }
-    ]
-    completion = litellm.completion(
-        model="gemini-pro", messages=messages, tools=tools, tool_choice="auto"
-    )
-    print(f"completion: {completion}")
-    if hasattr(completion.choices[0].message, "tool_calls") and isinstance(
-        completion.choices[0].message.tool_calls, list
-    ):
-        assert len(completion.choices[0].message.tool_calls) == 1
     try:
         load_vertex_ai_credentials()
         tools = [
diff --git a/litellm/utils.py b/litellm/utils.py
index ec296e9dc..80d26f58b 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -3974,12 +3974,10 @@ def calculage_img_tokens(
         tile_tokens = (base_tokens * 2) * tiles_needed_high_res
         total_tokens = base_tokens + tile_tokens
         return total_tokens
-    
+
 
 def create_pretrained_tokenizer(
-    identifier: str, 
-    revision="main", 
-    auth_token: Optional[str] = None
+    identifier: str, revision="main", auth_token: Optional[str] = None
 ):
     """
     Creates a tokenizer from an existing file on a HuggingFace repository to be used with `token_counter`.
@@ -3993,7 +3991,9 @@ def create_pretrained_tokenizer(
     dict: A dictionary with the tokenizer and its type.
     """
 
-    tokenizer = Tokenizer.from_pretrained(identifier, revision=revision, auth_token=auth_token)
+    tokenizer = Tokenizer.from_pretrained(
+        identifier, revision=revision, auth_token=auth_token
+    )
     return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
 
 
@@ -9001,7 +9001,16 @@ def exception_type(
                             request=original_exception.request,
                         )
             elif custom_llm_provider == "azure":
-                if "This model's maximum context length is" in error_str:
+                if "Internal server error" in error_str:
+                    exception_mapping_worked = True
+                    raise APIError(
+                        status_code=500,
+                        message=f"AzureException - {original_exception.message}",
+                        llm_provider="azure",
+                        model=model,
+                        request=httpx.Request(method="POST", url="https://openai.com/"),
+                    )
+                elif "This model's maximum context length is" in error_str:
                     exception_mapping_worked = True
                     raise ContextWindowExceededError(
                         message=f"AzureException - {original_exception.message}",