Merge pull request #3358 from sumanth13131/usage-based-routing-RPM-fix

usage based routing RPM count fix
This commit is contained in:
Krish Dholakia 2024-04-29 16:45:25 -07:00 committed by GitHub
commit 32534b5e91
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 31 additions and 2 deletions

View file

@ -206,7 +206,7 @@ class LowestTPMLoggingHandler(CustomLogger):
if item_tpm + input_tokens > _deployment_tpm:
continue
elif (rpm_dict is not None and item in rpm_dict) and (
rpm_dict[item] + 1 > _deployment_rpm
rpm_dict[item] + 1 >= _deployment_rpm
):
continue
elif item_tpm < lowest_tpm:

View file

@ -366,7 +366,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
if item_tpm + input_tokens > _deployment_tpm:
continue
elif (rpm_dict is not None and item in rpm_dict) and (
rpm_dict[item] + 1 > _deployment_rpm
rpm_dict[item] + 1 >= _deployment_rpm
):
continue
elif item_tpm == lowest_tpm:

View file

@ -264,3 +264,32 @@ async def test_acompletion_caching_on_router_caching_groups():
except Exception as e:
traceback.print_exc()
pytest.fail(f"Error occurred: {e}")
def test_rpm_limiting():
try:
litellm.set_verbose = True
model_list = [
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "gpt-3.5-turbo",
"api_key": os.getenv("OPENAI_API_KEY"),
},
"tpm": 10000,
"rpm": 3,
},
]
router = Router(
model_list = model_list,
routing_strategy = "usage-based-routing",
)
failedCount = 0
for i in range(10):
try:
response = router.completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": ""}])
except ValueError as e:
failedCount += 1
assert failedCount == 7
except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}")