fix(vertex_ai.py): treat vertex ai high-traffic error as a rate limit error - allows backoff logic to work here

This commit is contained in:
Krrish Dholakia 2024-02-03 12:58:16 -08:00
parent 5bf51a6058
commit e617ef981d
2 changed files with 19 additions and 3 deletions

View file

@ -6447,6 +6447,23 @@ def exception_type(
llm_provider="vertex_ai",
response=original_exception.response,
)
elif (
"429 Quota exceeded" in error_str
or "IndexError: list index out of range"
):
exception_mapping_worked = True
raise RateLimitError(
message=f"VertexAIException - {error_str}",
model=model,
llm_provider="vertex_ai",
response=httpx.Response(
status_code=429,
request=httpx.Request(
method="POST",
url=" https://cloud.google.com/vertex-ai/",
),
),
)
if hasattr(original_exception, "status_code"):
if original_exception.status_code == 400:
exception_mapping_worked = True