mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
79 lines
No EOL
2.7 KiB
Python
79 lines
No EOL
2.7 KiB
Python
# #### What this tests ####
|
|
# # This tests the router's ability to identify the least busy deployment
|
|
|
|
# #
|
|
# # How is this achieved?
|
|
# # - Before each call, have the router print the state of requests {"deployment": "requests_in_flight"}
|
|
# # - use litellm.input_callbacks to log when a request is just about to be made to a model - {"deployment-id": traffic}
|
|
# # - use litellm.success + failure callbacks to log when a request completed
|
|
# # - in get_available_deployment, for a given model group name -> pick based on traffic
|
|
|
|
# import sys, os, asyncio, time
|
|
# import traceback
|
|
# from dotenv import load_dotenv
|
|
|
|
# load_dotenv()
|
|
# import os
|
|
|
|
# sys.path.insert(
|
|
# 0, os.path.abspath("../..")
|
|
# ) # Adds the parent directory to the system path
|
|
# import pytest
|
|
# from litellm import Router
|
|
# import litellm
|
|
|
|
# async def test_least_busy_routing():
|
|
# model_list = [{
|
|
# "model_name": "azure-model",
|
|
# "litellm_params": {
|
|
# "model": "azure/gpt-turbo",
|
|
# "api_key": "os.environ/AZURE_FRANCE_API_KEY",
|
|
# "api_base": "https://openai-france-1234.openai.azure.com",
|
|
# "rpm": 1440,
|
|
# }
|
|
# }, {
|
|
# "model_name": "azure-model",
|
|
# "litellm_params": {
|
|
# "model": "azure/gpt-35-turbo",
|
|
# "api_key": "os.environ/AZURE_EUROPE_API_KEY",
|
|
# "api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
|
|
# "rpm": 6
|
|
# }
|
|
# }, {
|
|
# "model_name": "azure-model",
|
|
# "litellm_params": {
|
|
# "model": "azure/gpt-35-turbo",
|
|
# "api_key": "os.environ/AZURE_CANADA_API_KEY",
|
|
# "api_base": "https://my-endpoint-canada-berri992.openai.azure.com",
|
|
# "rpm": 6
|
|
# }
|
|
# }]
|
|
# router = Router(model_list=model_list,
|
|
# routing_strategy="least-busy",
|
|
# set_verbose=False,
|
|
# num_retries=3) # type: ignore
|
|
|
|
# async def call_azure_completion():
|
|
# try:
|
|
# response = await router.acompletion(
|
|
# model="azure-model",
|
|
# messages=[
|
|
# {
|
|
# "role": "user",
|
|
# "content": "hello this request will pass"
|
|
# }
|
|
# ]
|
|
# )
|
|
# print("\n response", response)
|
|
# return response
|
|
# except:
|
|
# return None
|
|
|
|
# n = 1000
|
|
# start_time = time.time()
|
|
# tasks = [call_azure_completion() for _ in range(n)]
|
|
# chat_completions = await asyncio.gather(*tasks)
|
|
# successful_completions = [c for c in chat_completions if c is not None]
|
|
# print(n, time.time() - start_time, len(successful_completions))
|
|
|
|
# asyncio.run(test_least_busy_routing()) |