From 96e23daf4dd914c290d6d9ceff71d9213910f2a2 Mon Sep 17 00:00:00 2001 From: sumanth Date: Tue, 19 Dec 2023 13:15:48 +0530 Subject: [PATCH] fix --- litellm/router.py | 14 +++++++++- litellm/tests/test_router_caching.py | 41 +++++++++++++++++++++++++++- 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 410d4964e..aa0966f45 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -770,10 +770,22 @@ class Router: # ---------------------- lowest_tpm = float("inf") deployment = None + + # load model context map + models_context_map = litellm.model_cost + # return deployment with lowest tpm usage for item in potential_deployments: - item_tpm, item_rpm = self._get_deployment_usage(deployment_name=item["litellm_params"]["model"]) + deployment_name=item["litellm_params"]["model"] + custom_llm_provider = item["litellm_params"].get("custom_llm_provider", None) + if custom_llm_provider is not None: + deployment_name = f"{custom_llm_provider}/{deployment_name}" + else: + litellm_provider = models_context_map.get(deployment_name, {}).get("litellm_provider", None) + if litellm_provider is not None: + deployment_name = f"{litellm_provider}/{deployment_name}" + item_tpm, item_rpm = self._get_deployment_usage(deployment_name=deployment_name) if item_tpm == 0: return item diff --git a/litellm/tests/test_router_caching.py b/litellm/tests/test_router_caching.py index 27191c8d2..006a5b50c 100644 --- a/litellm/tests/test_router_caching.py +++ b/litellm/tests/test_router_caching.py @@ -124,4 +124,43 @@ async def test_acompletion_caching_on_router_caching_groups(): pass except Exception as e: traceback.print_exc() - pytest.fail(f"Error occurred: {e}") \ No newline at end of file + pytest.fail(f"Error occurred: {e}") + +def test_usage_based_routing_completion(): + model_list = [ + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo-0301", + "api_key": os.getenv("OPENAI_API_KEY"), + "custom_llm_provider": "Custom-LLM" + }, + "tpm": 10000, + "rpm": 5 + }, + { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo-0301", + "api_key": os.getenv("OPENAI_API_KEY"), + }, + "tpm": 10000, + "rpm": 5 + } + ] + router = Router(model_list= model_list, + routing_strategy= "usage-based-routing", + set_verbose= False) + max_requests = 5 + while max_requests > 0: + try: + router.completion( + model='gpt-3.5-turbo', + messages=[{"content": "write a one sentence poem.", "role": "user"}], + ) + except ValueError as e: + traceback.print_exc() + pytest.fail(f"Error occurred: {e}") + finally: + max_requests -= 1 + router.reset() \ No newline at end of file