From e617ef981d44dd1841d6ab77c24df1c406f188f1 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 3 Feb 2024 12:58:16 -0800 Subject: [PATCH] fix(vertex_ai.py): treat vertex ai high-traffic error as a rate limit error - allows backoff logic to work here --- litellm/tests/test_amazing_vertex_completion.py | 5 ++--- litellm/utils.py | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py index 62e6bc657..0188d2358 100644 --- a/litellm/tests/test_amazing_vertex_completion.py +++ b/litellm/tests/test_amazing_vertex_completion.py @@ -99,8 +99,7 @@ def test_vertex_ai(): # litellm.vertex_project = "reliablekeys" test_models = random.sample(test_models, 1) - # test_models += litellm.vertex_language_models # always test gemini-pro - test_models = litellm.vertex_language_models # always test gemini-pro + test_models += litellm.vertex_language_models # always test gemini-pro for model in test_models: try: if model in [ @@ -374,7 +373,7 @@ async def gemini_pro_async_function_calling(): print(f"completion: {completion}") -asyncio.run(gemini_pro_async_function_calling()) +# asyncio.run(gemini_pro_async_function_calling()) # Extra gemini Vision tests for completion + stream, async, async + stream # if we run into issues with gemini, we will also add these to our ci/cd pipeline diff --git a/litellm/utils.py b/litellm/utils.py index 2bab08876..ec288cfb8 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -6447,6 +6447,23 @@ def exception_type( llm_provider="vertex_ai", response=original_exception.response, ) + elif ( + "429 Quota exceeded" in error_str + or "IndexError: list index out of range" + ): + exception_mapping_worked = True + raise RateLimitError( + message=f"VertexAIException - {error_str}", + model=model, + llm_provider="vertex_ai", + response=httpx.Response( + status_code=429, + request=httpx.Request( + method="POST", + url=" https://cloud.google.com/vertex-ai/", + ), + ), + ) if hasattr(original_exception, "status_code"): if original_exception.status_code == 400: exception_mapping_worked = True