forked from phoenix/litellm-mirror
Merge pull request #1802 from BerriAI/litellm_vertex_ai_high_traffic_fix
fix(vertex_ai.py): treat vertex ai high-traffic error as a rate limit error - allows user-controlled backoff logic to work here
This commit is contained in:
commit
1d241b4001
2 changed files with 19 additions and 3 deletions
|
@ -99,8 +99,7 @@ def test_vertex_ai():
|
||||||
# litellm.vertex_project = "reliablekeys"
|
# litellm.vertex_project = "reliablekeys"
|
||||||
|
|
||||||
test_models = random.sample(test_models, 1)
|
test_models = random.sample(test_models, 1)
|
||||||
# test_models += litellm.vertex_language_models # always test gemini-pro
|
test_models += litellm.vertex_language_models # always test gemini-pro
|
||||||
test_models = litellm.vertex_language_models # always test gemini-pro
|
|
||||||
for model in test_models:
|
for model in test_models:
|
||||||
try:
|
try:
|
||||||
if model in [
|
if model in [
|
||||||
|
@ -374,7 +373,7 @@ async def gemini_pro_async_function_calling():
|
||||||
print(f"completion: {completion}")
|
print(f"completion: {completion}")
|
||||||
|
|
||||||
|
|
||||||
asyncio.run(gemini_pro_async_function_calling())
|
# asyncio.run(gemini_pro_async_function_calling())
|
||||||
|
|
||||||
# Extra gemini Vision tests for completion + stream, async, async + stream
|
# Extra gemini Vision tests for completion + stream, async, async + stream
|
||||||
# if we run into issues with gemini, we will also add these to our ci/cd pipeline
|
# if we run into issues with gemini, we will also add these to our ci/cd pipeline
|
||||||
|
|
|
@ -6468,6 +6468,23 @@ def exception_type(
|
||||||
llm_provider="vertex_ai",
|
llm_provider="vertex_ai",
|
||||||
response=original_exception.response,
|
response=original_exception.response,
|
||||||
)
|
)
|
||||||
|
elif (
|
||||||
|
"429 Quota exceeded" in error_str
|
||||||
|
or "IndexError: list index out of range"
|
||||||
|
):
|
||||||
|
exception_mapping_worked = True
|
||||||
|
raise RateLimitError(
|
||||||
|
message=f"VertexAIException - {error_str}",
|
||||||
|
model=model,
|
||||||
|
llm_provider="vertex_ai",
|
||||||
|
response=httpx.Response(
|
||||||
|
status_code=429,
|
||||||
|
request=httpx.Request(
|
||||||
|
method="POST",
|
||||||
|
url=" https://cloud.google.com/vertex-ai/",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
if hasattr(original_exception, "status_code"):
|
if hasattr(original_exception, "status_code"):
|
||||||
if original_exception.status_code == 400:
|
if original_exception.status_code == 400:
|
||||||
exception_mapping_worked = True
|
exception_mapping_worked = True
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue