forked from phoenix/litellm-mirror
Merge pull request #1802 from BerriAI/litellm_vertex_ai_high_traffic_fix
fix(vertex_ai.py): treat vertex ai high-traffic error as a rate limit error - allows user-controlled backoff logic to work here
This commit is contained in:
commit
1d241b4001
2 changed files with 19 additions and 3 deletions
|
@ -99,8 +99,7 @@ def test_vertex_ai():
|
|||
# litellm.vertex_project = "reliablekeys"
|
||||
|
||||
test_models = random.sample(test_models, 1)
|
||||
# test_models += litellm.vertex_language_models # always test gemini-pro
|
||||
test_models = litellm.vertex_language_models # always test gemini-pro
|
||||
test_models += litellm.vertex_language_models # always test gemini-pro
|
||||
for model in test_models:
|
||||
try:
|
||||
if model in [
|
||||
|
@ -374,7 +373,7 @@ async def gemini_pro_async_function_calling():
|
|||
print(f"completion: {completion}")
|
||||
|
||||
|
||||
asyncio.run(gemini_pro_async_function_calling())
|
||||
# asyncio.run(gemini_pro_async_function_calling())
|
||||
|
||||
# Extra gemini Vision tests for completion + stream, async, async + stream
|
||||
# if we run into issues with gemini, we will also add these to our ci/cd pipeline
|
||||
|
|
|
@ -6468,6 +6468,23 @@ def exception_type(
|
|||
llm_provider="vertex_ai",
|
||||
response=original_exception.response,
|
||||
)
|
||||
elif (
|
||||
"429 Quota exceeded" in error_str
|
||||
or "IndexError: list index out of range"
|
||||
):
|
||||
exception_mapping_worked = True
|
||||
raise RateLimitError(
|
||||
message=f"VertexAIException - {error_str}",
|
||||
model=model,
|
||||
llm_provider="vertex_ai",
|
||||
response=httpx.Response(
|
||||
status_code=429,
|
||||
request=httpx.Request(
|
||||
method="POST",
|
||||
url=" https://cloud.google.com/vertex-ai/",
|
||||
),
|
||||
),
|
||||
)
|
||||
if hasattr(original_exception, "status_code"):
|
||||
if original_exception.status_code == 400:
|
||||
exception_mapping_worked = True
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue