Merge pull request #1802 from BerriAI/litellm_vertex_ai_high_traffic_fix

fix(vertex_ai.py): treat vertex ai high-traffic error as a rate limit error - allows user-controlled backoff logic to work here
2025-04-25 10:44:24 +00:00 · 2024-02-03 15:37:05 -08:00 · 2024-02-03 15:37:05 -08:00 · 1d241b4001
commit 1d241b4001
parent 6eb616cf12 e617ef981d
2 changed files with 19 additions and 3 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -6468,6 +6468,23 @@ def exception_type(
                        llm_provider="vertex_ai",
                        response=original_exception.response,
                    )
+                elif (
+                    "429 Quota exceeded" in error_str
+                    or "IndexError: list index out of range"
+                ):
+                    exception_mapping_worked = True
+                    raise RateLimitError(
+                        message=f"VertexAIException - {error_str}",
+                        model=model,
+                        llm_provider="vertex_ai",
+                        response=httpx.Response(
+                            status_code=429,
+                            request=httpx.Request(
+                                method="POST",
+                                url=" https://cloud.google.com/vertex-ai/",
+                            ),
+                        ),
+                    )
                if hasattr(original_exception, "status_code"):
                    if original_exception.status_code == 400:
                        exception_mapping_worked = True