forked from phoenix/litellm-mirror
fix(router.py): fix least-busy routing
This commit is contained in:
parent
d9b115b8fb
commit
4bf875d3ed
8 changed files with 292 additions and 31 deletions
79
litellm/tests/test_least_busy_routing.py
Normal file
79
litellm/tests/test_least_busy_routing.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
# #### What this tests ####
|
||||
# # This tests the router's ability to identify the least busy deployment
|
||||
|
||||
# #
|
||||
# # How is this achieved?
|
||||
# # - Before each call, have the router print the state of requests {"deployment": "requests_in_flight"}
|
||||
# # - use litellm.input_callbacks to log when a request is just about to be made to a model - {"deployment-id": traffic}
|
||||
# # - use litellm.success + failure callbacks to log when a request completed
|
||||
# # - in get_available_deployment, for a given model group name -> pick based on traffic
|
||||
|
||||
# import sys, os, asyncio, time
|
||||
# import traceback
|
||||
# from dotenv import load_dotenv
|
||||
|
||||
# load_dotenv()
|
||||
# import os
|
||||
|
||||
# sys.path.insert(
|
||||
# 0, os.path.abspath("../..")
|
||||
# ) # Adds the parent directory to the system path
|
||||
# import pytest
|
||||
# from litellm import Router
|
||||
# import litellm
|
||||
|
||||
# async def test_least_busy_routing():
|
||||
# model_list = [{
|
||||
# "model_name": "azure-model",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/gpt-turbo",
|
||||
# "api_key": "os.environ/AZURE_FRANCE_API_KEY",
|
||||
# "api_base": "https://openai-france-1234.openai.azure.com",
|
||||
# "rpm": 1440,
|
||||
# }
|
||||
# }, {
|
||||
# "model_name": "azure-model",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/gpt-35-turbo",
|
||||
# "api_key": "os.environ/AZURE_EUROPE_API_KEY",
|
||||
# "api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
|
||||
# "rpm": 6
|
||||
# }
|
||||
# }, {
|
||||
# "model_name": "azure-model",
|
||||
# "litellm_params": {
|
||||
# "model": "azure/gpt-35-turbo",
|
||||
# "api_key": "os.environ/AZURE_CANADA_API_KEY",
|
||||
# "api_base": "https://my-endpoint-canada-berri992.openai.azure.com",
|
||||
# "rpm": 6
|
||||
# }
|
||||
# }]
|
||||
# router = Router(model_list=model_list,
|
||||
# routing_strategy="least-busy",
|
||||
# set_verbose=False,
|
||||
# num_retries=3) # type: ignore
|
||||
|
||||
# async def call_azure_completion():
|
||||
# try:
|
||||
# response = await router.acompletion(
|
||||
# model="azure-model",
|
||||
# messages=[
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "hello this request will pass"
|
||||
# }
|
||||
# ]
|
||||
# )
|
||||
# print("\n response", response)
|
||||
# return response
|
||||
# except:
|
||||
# return None
|
||||
|
||||
# n = 1000
|
||||
# start_time = time.time()
|
||||
# tasks = [call_azure_completion() for _ in range(n)]
|
||||
# chat_completions = await asyncio.gather(*tasks)
|
||||
# successful_completions = [c for c in chat_completions if c is not None]
|
||||
# print(n, time.time() - start_time, len(successful_completions))
|
||||
|
||||
# asyncio.run(test_least_busy_routing())
|
Loading…
Add table
Add a link
Reference in a new issue