fix(router.py): fix exponential backoff to use retry-after if present in headers

2025-04-27 03:34:10 +00:00 · 2023-11-28 17:24:49 -08:00 · 2023-11-28 17:24:49 -08:00 · 60d6b6bc37
commit 60d6b6bc37
parent 7f34298ef8
7 changed files with 154 additions and 67 deletions
--- a/litellm/tests/test_exceptions.py
+++ b/litellm/tests/test_exceptions.py
@ -227,10 +227,10 @@ async def asynctest_completion_azure_exception():
        print("exception", e)
        pytest.fail(f"Error occurred: {e}")

-import asyncio
-asyncio.run(
-    asynctest_completion_azure_exception()
-)
+# import asyncio
+# asyncio.run(
+#     asynctest_completion_azure_exception()
+# )


 def test_completion_openai_exception():
@ -265,39 +265,40 @@ def test_completion_openai_exception():

 # test_invalid_request_error(model="command-nightly")
 # Test 3: Rate Limit Errors
-# def test_model_call(model):
-#     try:
-#         sample_text = "how does a court case get to the Supreme Court?"
-#         messages = [{ "content": sample_text,"role": "user"}]
-#         print(f"model: {model}")
-#         response = completion(model=model, messages=messages)
-#     except RateLimitError:
-#         return True
-#     # except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
-#     #     return True
-#     except Exception as e:
-#         print(f"Uncaught Exception {model}: {type(e).__name__} - {e}")
-#         traceback.print_exc()
-#         pass
-#     return False
-# # Repeat each model 500 times
-# # extended_models = [model for model in models for _ in range(250)]
-# extended_models = ["gpt-3.5-turbo-instruct" for _ in range(250)]
+def test_model_call(model):
+    try:
+        sample_text = "how does a court case get to the Supreme Court?"
+        messages = [{ "content": sample_text,"role": "user"}]
+        print(f"model: {model}")
+        response = completion(model=model, messages=messages)
+    except RateLimitError as e:
+        print(f"headers: {e.response.headers}")
+        return True
+    # except OpenAIError: # is at least an openai error -> in case of random model errors - e.g. overloaded server
+    #     return True
+    except Exception as e:
+        print(f"Uncaught Exception {model}: {type(e).__name__} - {e}")
+        traceback.print_exc()
+        pass
+    return False
+# Repeat each model 500 times
+# extended_models = [model for model in models for _ in range(250)]
+extended_models = ["azure/chatgpt-v-2" for _ in range(250)]

-# def worker(model):
-#     return test_model_call(model)
+def worker(model):
+    return test_model_call(model)

-# # Create a dictionary to store the results
-# counts = {True: 0, False: 0}
+# Create a dictionary to store the results
+counts = {True: 0, False: 0}

-# # Use Thread Pool Executor
-# with ThreadPoolExecutor(max_workers=500) as executor:
-#     # Use map to start the operation in thread pool
-#     results = executor.map(worker, extended_models)
+# Use Thread Pool Executor
+with ThreadPoolExecutor(max_workers=500) as executor:
+    # Use map to start the operation in thread pool
+    results = executor.map(worker, extended_models)

-#     # Iterate over results and count True/False
-#     for result in results:
-#         counts[result] += 1
+    # Iterate over results and count True/False
+    for result in results:
+        counts[result] += 1

-# accuracy_score = counts[True]/(counts[True] + counts[False])
-# print(f"accuracy_score: {accuracy_score}")
+accuracy_score = counts[True]/(counts[True] + counts[False])
+print(f"accuracy_score: {accuracy_score}")