Merge pull request #3358 from sumanth13131/usage-based-routing-RPM-fix

usage based routing RPM count fix
2024-04-29 16:45:25 -07:00 · 2024-04-29 16:45:25 -07:00 · 32534b5e91
commit 32534b5e91
parent bd79e8b516 89e655c79e
3 changed files with 31 additions and 2 deletions
--- a/litellm/router_strategy/lowest_tpm_rpm.py
+++ b/litellm/router_strategy/lowest_tpm_rpm.py
@ -206,7 +206,7 @@ class LowestTPMLoggingHandler(CustomLogger):
            if item_tpm + input_tokens > _deployment_tpm:
                continue
            elif (rpm_dict is not None and item in rpm_dict) and (
-                rpm_dict[item] + 1 > _deployment_rpm
+                rpm_dict[item] + 1 >= _deployment_rpm
            ):
                continue
            elif item_tpm < lowest_tpm:
--- a/litellm/router_strategy/lowest_tpm_rpm_v2.py
+++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py
@ -366,7 +366,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
            if item_tpm + input_tokens > _deployment_tpm:
                continue
            elif (rpm_dict is not None and item in rpm_dict) and (
-                rpm_dict[item] + 1 > _deployment_rpm
+                rpm_dict[item] + 1 >= _deployment_rpm
            ):
                continue
            elif item_tpm == lowest_tpm:
--- a/litellm/tests/test_router_caching.py
+++ b/litellm/tests/test_router_caching.py
@ -264,3 +264,32 @@ async def test_acompletion_caching_on_router_caching_groups():
    except Exception as e:
        traceback.print_exc()
        pytest.fail(f"Error occurred: {e}")
+
+def test_rpm_limiting():
+    try:
+        litellm.set_verbose = True
+        model_list = [
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "api_key": os.getenv("OPENAI_API_KEY"),
+                },
+                "tpm": 10000,
+                "rpm": 3,
+            },
+        ]
+        
+        router = Router(
+                    model_list = model_list, 
+                    routing_strategy = "usage-based-routing",
+                )
+        failedCount = 0
+        for i in range(10):
+            try:
+                response = router.completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": ""}])
+            except ValueError as e:
+                failedCount += 1
+        assert failedCount == 7
+    except Exception as e:
+        pytest.fail(f"An exception occurred - {str(e)}")