fix(router.py): fix least-busy routing

2023-12-08 20:29:37 -08:00 · 2023-12-08 20:29:37 -08:00 · 4bf875d3ed
commit 4bf875d3ed
parent d9b115b8fb
8 changed files with 292 additions and 31 deletions
--- a/litellm/tests/test_least_busy_routing.py
+++ b/litellm/tests/test_least_busy_routing.py
@ -0,0 +1,79 @@
+# #### What this tests ####
+# #    This tests the router's ability to identify the least busy deployment
+
+# #
+# # How is this achieved? 
+# # - Before each call, have the router print the state of requests {"deployment": "requests_in_flight"}
+# # - use litellm.input_callbacks to log when a request is just about to be made to a model - {"deployment-id": traffic}
+# # - use litellm.success + failure callbacks to log when a request completed 
+# # - in get_available_deployment, for a given model group name -> pick based on traffic
+
+# import sys, os, asyncio, time
+# import traceback
+# from dotenv import load_dotenv
+
+# load_dotenv()
+# import os
+
+# sys.path.insert(
+#     0, os.path.abspath("../..")
+# )  # Adds the parent directory to the system path
+# import pytest
+# from litellm import Router
+# import litellm
+
+# async def test_least_busy_routing():
+#     model_list = [{
+#         "model_name": "azure-model",
+#         "litellm_params": {
+#             "model": "azure/gpt-turbo",
+#             "api_key": "os.environ/AZURE_FRANCE_API_KEY",
+#             "api_base": "https://openai-france-1234.openai.azure.com",
+#             "rpm": 1440,
+#         }
+#     }, {
+#         "model_name": "azure-model",
+#         "litellm_params": {
+#             "model": "azure/gpt-35-turbo",
+#             "api_key": "os.environ/AZURE_EUROPE_API_KEY",
+#             "api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
+#             "rpm": 6
+#         }
+#     }, {
+#         "model_name": "azure-model",
+#         "litellm_params": {
+#             "model": "azure/gpt-35-turbo",
+#             "api_key": "os.environ/AZURE_CANADA_API_KEY",
+#             "api_base": "https://my-endpoint-canada-berri992.openai.azure.com",
+#             "rpm": 6
+#         }
+#     }]
+#     router = Router(model_list=model_list, 
+# 					routing_strategy="least-busy",
+# 					set_verbose=False,
+#                     num_retries=3) # type: ignore
+    
+#     async def call_azure_completion():
+#         try: 
+#             response = await router.acompletion(
+#                 model="azure-model",
+#                 messages=[
+#                     {
+#                         "role": "user",
+#                         "content": "hello this request will pass"
+#                     }
+#                 ]
+#             )
+#             print("\n response", response)
+#             return response
+#         except: 
+#             return None
+    
+#     n = 1000
+#     start_time = time.time()
+#     tasks = [call_azure_completion() for _ in range(n)]
+#     chat_completions = await asyncio.gather(*tasks)
+#     successful_completions = [c for c in chat_completions if c is not None]
+#     print(n, time.time() - start_time, len(successful_completions))
+
+# asyncio.run(test_least_busy_routing())