From e617ef981d44dd1841d6ab77c24df1c406f188f1 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Sat, 3 Feb 2024 12:58:16 -0800
Subject: [PATCH] fix(vertex_ai.py): treat vertex ai high-traffic error as a
 rate limit error - allows backoff logic to work here

---
 litellm/tests/test_amazing_vertex_completion.py |  5 ++---
 litellm/utils.py                                | 17 +++++++++++++++++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py
index 62e6bc657..0188d2358 100644
--- a/litellm/tests/test_amazing_vertex_completion.py
+++ b/litellm/tests/test_amazing_vertex_completion.py
@@ -99,8 +99,7 @@ def test_vertex_ai():
     # litellm.vertex_project = "reliablekeys"
 
     test_models = random.sample(test_models, 1)
-    # test_models += litellm.vertex_language_models  # always test gemini-pro
-    test_models = litellm.vertex_language_models  # always test gemini-pro
+    test_models += litellm.vertex_language_models  # always test gemini-pro
     for model in test_models:
         try:
             if model in [
@@ -374,7 +373,7 @@ async def gemini_pro_async_function_calling():
     print(f"completion: {completion}")
 
 
-asyncio.run(gemini_pro_async_function_calling())
+# asyncio.run(gemini_pro_async_function_calling())
 
 # Extra gemini Vision tests for completion + stream, async, async + stream
 # if we run into issues with gemini, we will also add these to our ci/cd pipeline
diff --git a/litellm/utils.py b/litellm/utils.py
index 2bab08876..ec288cfb8 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -6447,6 +6447,23 @@ def exception_type(
                         llm_provider="vertex_ai",
                         response=original_exception.response,
                     )
+                elif (
+                    "429 Quota exceeded" in error_str
+                    or "IndexError: list index out of range"
+                ):
+                    exception_mapping_worked = True
+                    raise RateLimitError(
+                        message=f"VertexAIException - {error_str}",
+                        model=model,
+                        llm_provider="vertex_ai",
+                        response=httpx.Response(
+                            status_code=429,
+                            request=httpx.Request(
+                                method="POST",
+                                url=" https://cloud.google.com/vertex-ai/",
+                            ),
+                        ),
+                    )
                 if hasattr(original_exception, "status_code"):
                     if original_exception.status_code == 400:
                         exception_mapping_worked = True