refactor(lowest_tpm_rpm.py): move tpm/rpm based routing to a separate file for better testing

This commit is contained in:
Krrish Dholakia 2023-12-29 18:33:43 +05:30
parent 3fa1bb9f08
commit a30f00276b
4 changed files with 410 additions and 154 deletions

View file

@ -1,13 +1,6 @@
#### What this tests ####
# This tests the router's ability to identify the least busy deployment
#
# How is this achieved?
# - Before each call, have the router print the state of requests {"deployment": "requests_in_flight"}
# - use litellm.input_callbacks to log when a request is just about to be made to a model - {"deployment-id": traffic}
# - use litellm.success + failure callbacks to log when a request completed
# - in get_available_deployment, for a given model group name -> pick based on traffic
import sys, os, asyncio, time
import traceback
from dotenv import load_dotenv
@ -137,4 +130,4 @@ def test_router_get_available_deployments():
assert return_dict[3] == 100
test_router_get_available_deployments()
# test_router_get_available_deployments()