From 89e655c79ec7436406c71f011be3513e48ec7501 Mon Sep 17 00:00:00 2001 From: sumanth Date: Tue, 30 Apr 2024 00:29:38 +0530 Subject: [PATCH] usage based routing RPM count fix --- litellm/router_strategy/lowest_tpm_rpm.py | 2 +- litellm/router_strategy/lowest_tpm_rpm_v2.py | 2 +- litellm/tests/test_router_caching.py | 29 ++++++++++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/litellm/router_strategy/lowest_tpm_rpm.py b/litellm/router_strategy/lowest_tpm_rpm.py index 0437c2aff..0a7773a84 100644 --- a/litellm/router_strategy/lowest_tpm_rpm.py +++ b/litellm/router_strategy/lowest_tpm_rpm.py @@ -206,7 +206,7 @@ class LowestTPMLoggingHandler(CustomLogger): if item_tpm + input_tokens > _deployment_tpm: continue elif (rpm_dict is not None and item in rpm_dict) and ( - rpm_dict[item] + 1 > _deployment_rpm + rpm_dict[item] + 1 >= _deployment_rpm ): continue elif item_tpm < lowest_tpm: diff --git a/litellm/router_strategy/lowest_tpm_rpm_v2.py b/litellm/router_strategy/lowest_tpm_rpm_v2.py index 39dbcd9d0..f61484e08 100644 --- a/litellm/router_strategy/lowest_tpm_rpm_v2.py +++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py @@ -366,7 +366,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger): if item_tpm + input_tokens > _deployment_tpm: continue elif (rpm_dict is not None and item in rpm_dict) and ( - rpm_dict[item] + 1 > _deployment_rpm + rpm_dict[item] + 1 >= _deployment_rpm ): continue elif item_tpm < lowest_tpm: diff --git a/litellm/tests/test_router_caching.py b/litellm/tests/test_router_caching.py index ebace161c..ce03498f9 100644 --- a/litellm/tests/test_router_caching.py +++ b/litellm/tests/test_router_caching.py @@ -264,3 +264,32 @@ async def test_acompletion_caching_on_router_caching_groups(): except Exception as e: traceback.print_exc() pytest.fail(f"Error occurred: {e}") + +def test_rpm_limiting(): + try: + litellm.set_verbose = True + model_list = [ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo", + "api_key": os.getenv("OPENAI_API_KEY"), + }, + "tpm": 10000, + "rpm": 3, + }, + ] + + router = Router( + model_list = model_list, + routing_strategy = "usage-based-routing", + ) + failedCount = 0 + for i in range(10): + try: + response = router.completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": ""}]) + except ValueError as e: + failedCount += 1 + assert failedCount == 7 + except Exception as e: + pytest.fail(f"An exception occurred - {str(e)}") \ No newline at end of file