Merge pull request #1182 from sumanth13131/usage-based-routing-fix

usage_based_routing_fix
2023-12-23 11:50:34 +05:30 · 2023-12-23 11:50:34 +05:30 · 2df5ce4b7c
commit 2df5ce4b7c
parent d195787db7 96e23daf4d
2 changed files with 53 additions and 2 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -860,10 +860,22 @@ class Router:
        # ----------------------
        lowest_tpm = float("inf")
        deployment = None
+        
+        # load model context map
+        models_context_map = litellm.model_cost
+        

        # return deployment with lowest tpm usage
        for item in potential_deployments:
-            item_tpm, item_rpm = self._get_deployment_usage(deployment_name=item["litellm_params"]["model"])
+            deployment_name=item["litellm_params"]["model"]
+            custom_llm_provider = item["litellm_params"].get("custom_llm_provider", None)
+            if custom_llm_provider is not None:
+                deployment_name = f"{custom_llm_provider}/{deployment_name}"
+            else:
+                litellm_provider = models_context_map.get(deployment_name, {}).get("litellm_provider", None)
+                if litellm_provider is not None:
+                    deployment_name = f"{litellm_provider}/{deployment_name}"
+            item_tpm, item_rpm = self._get_deployment_usage(deployment_name=deployment_name)

            if item_tpm == 0:
                return item
--- a/litellm/tests/test_router_caching.py
+++ b/litellm/tests/test_router_caching.py
@ -124,4 +124,43 @@ async def test_acompletion_caching_on_router_caching_groups():
 		pass
 	except Exception as e:
 		traceback.print_exc()
-		pytest.fail(f"Error occurred: {e}")
+		pytest.fail(f"Error occurred: {e}")
+  
+def test_usage_based_routing_completion():
+  model_list = [
+		{
+			"model_name": "gpt-3.5-turbo", 
+			"litellm_params": {
+					"model": "gpt-3.5-turbo-0301", 
+					"api_key": os.getenv("OPENAI_API_KEY"),
+					"custom_llm_provider": "Custom-LLM"
+			},
+			"tpm": 10000,
+			"rpm": 5
+		},
+  	{
+			"model_name": "gpt-3.5-turbo", 
+			"litellm_params": {
+					"model": "gpt-3.5-turbo-0301", 
+					"api_key": os.getenv("OPENAI_API_KEY"),
+			},
+			"tpm": 10000,
+			"rpm": 5
+		}
+	]
+  router = Router(model_list= model_list, 
+                routing_strategy= "usage-based-routing",
+                set_verbose= False)
+  max_requests = 5
+  while max_requests > 0:
+    try:
+      router.completion(
+				model='gpt-3.5-turbo',
+				messages=[{"content": "write a one sentence poem.", "role": "user"}],
+			)
+    except ValueError as e:
+      traceback.print_exc()
+      pytest.fail(f"Error occurred: {e}")
+    finally:
+      max_requests -= 1
+  router.reset()