litellm-mirror/litellm/tests/test_least_busy_routing.py
2023-12-08 20:29:49 -08:00

79 lines
No EOL
2.7 KiB
Python

# #### What this tests ####
# # This tests the router's ability to identify the least busy deployment
# #
# # How is this achieved?
# # - Before each call, have the router print the state of requests {"deployment": "requests_in_flight"}
# # - use litellm.input_callbacks to log when a request is just about to be made to a model - {"deployment-id": traffic}
# # - use litellm.success + failure callbacks to log when a request completed
# # - in get_available_deployment, for a given model group name -> pick based on traffic
# import sys, os, asyncio, time
# import traceback
# from dotenv import load_dotenv
# load_dotenv()
# import os
# sys.path.insert(
# 0, os.path.abspath("../..")
# ) # Adds the parent directory to the system path
# import pytest
# from litellm import Router
# import litellm
# async def test_least_busy_routing():
# model_list = [{
# "model_name": "azure-model",
# "litellm_params": {
# "model": "azure/gpt-turbo",
# "api_key": "os.environ/AZURE_FRANCE_API_KEY",
# "api_base": "https://openai-france-1234.openai.azure.com",
# "rpm": 1440,
# }
# }, {
# "model_name": "azure-model",
# "litellm_params": {
# "model": "azure/gpt-35-turbo",
# "api_key": "os.environ/AZURE_EUROPE_API_KEY",
# "api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
# "rpm": 6
# }
# }, {
# "model_name": "azure-model",
# "litellm_params": {
# "model": "azure/gpt-35-turbo",
# "api_key": "os.environ/AZURE_CANADA_API_KEY",
# "api_base": "https://my-endpoint-canada-berri992.openai.azure.com",
# "rpm": 6
# }
# }]
# router = Router(model_list=model_list,
# routing_strategy="least-busy",
# set_verbose=False,
# num_retries=3) # type: ignore
# async def call_azure_completion():
# try:
# response = await router.acompletion(
# model="azure-model",
# messages=[
# {
# "role": "user",
# "content": "hello this request will pass"
# }
# ]
# )
# print("\n response", response)
# return response
# except:
# return None
# n = 1000
# start_time = time.time()
# tasks = [call_azure_completion() for _ in range(n)]
# chat_completions = await asyncio.gather(*tasks)
# successful_completions = [c for c in chat_completions if c is not None]
# print(n, time.time() - start_time, len(successful_completions))
# asyncio.run(test_least_busy_routing())