diff --git a/docs/my-website/docs/providers/azure_ai.md b/docs/my-website/docs/providers/azure_ai.md
index 71776e0fb7..4b7a9a783e 100644
--- a/docs/my-website/docs/providers/azure_ai.md
+++ b/docs/my-website/docs/providers/azure_ai.md
@@ -8,7 +8,7 @@ Set `MISTRAL_AZURE_API_KEY` and `MISTRAL_AZURE_API_BASE` in your env
 
 ```shell
 MISTRAL_AZURE_API_KEY = "zE************""
-MISTRAL_AZURE_API_BASE = "https://Mistral-large-nmefg-serverless.eastus2.inference.ai.azure.com"
+MISTRAL_AZURE_API_BASE = "https://Mistral-large-nmefg-serverless.eastus2.inference.ai.azure.com/v1"
 ```
 
 ```python
diff --git a/litellm/_logging.py b/litellm/_logging.py
index 26693c15ec..4f7e464468 100644
--- a/litellm/_logging.py
+++ b/litellm/_logging.py
@@ -8,7 +8,7 @@ handler.setLevel(logging.DEBUG)
 
 # Create a formatter and set it for the handler
 formatter = logging.Formatter(
-    "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(message)s",
+    "\033[92m%(asctime)s - %(name)s:%(levelname)s\033[0m: %(filename)s:%(lineno)s - %(message)s",
     datefmt="%H:%M:%S",
 )
 
diff --git a/litellm/main.py b/litellm/main.py
index 8326e03f69..8ccde52e6a 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -3682,11 +3682,12 @@ async def ahealth_check(
                 response = {}  # args like remaining ratelimit etc.
         return response
     except Exception as e:
+        traceback.print_exc()
         if model not in litellm.model_cost and mode is None:
             raise Exception(
                 "Missing `mode`. Set the `mode` for the model - https://docs.litellm.ai/docs/proxy/health#embedding-models"
             )
-        return {"error": str(e)}
+        return {"error": f"{str(e)}"}
 
 
 ####### HELPER FUNCTIONS ################
diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py
index 8982e4e2bf..af59869c29 100644
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@@ -324,7 +324,10 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
     async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
         try:
             self.print_verbose(f"Inside Max Parallel Request Failure Hook")
-            user_api_key = kwargs["litellm_params"]["metadata"]["user_api_key"]
+            user_api_key = (
+                kwargs["litellm_params"].get("metadata", {}).get("user_api_key", None)
+            )
+            self.print_verbose(f"user_api_key: {user_api_key}")
             if user_api_key is None:
                 return
 
@@ -355,7 +358,6 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                 # ------------
                 # Update usage
                 # ------------
-
                 current = self.user_api_key_cache.get_cache(
                     key=request_count_api_key
                 ) or {
@@ -375,4 +377,6 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                     request_count_api_key, new_val, ttl=60
                 )  # save in cache for up to 1 min.
         except Exception as e:
-            print(f"An exception occurred - {str(e)}")  # noqa
+            verbose_proxy_logger.info(
+                f"Inside Parallel Request Limiter: An exception occurred - {str(e)}."
+            )