diff --git a/litellm/router.py b/litellm/router.py index 512a47a349..ebe0e877fd 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -1546,9 +1546,11 @@ class Router: ############## Available Deployments passed, we find the relevant item ################# else: ## check if min deployment is a string, if so, cast it to int - if isinstance(min_deployment, str): - min_deployment = int(min_deployment) for m in healthy_deployments: + if isinstance(min_deployment, str) and isinstance( + m["model_info"]["id"], int + ): + min_deployment = int(min_deployment) if m["model_info"]["id"] == min_deployment: return m self.print_verbose(f"no healthy deployment with that id found!") diff --git a/litellm/router_strategy/lowest_tpm_rpm.py b/litellm/router_strategy/lowest_tpm_rpm.py index 2e53aae88e..9217383bb5 100644 --- a/litellm/router_strategy/lowest_tpm_rpm.py +++ b/litellm/router_strategy/lowest_tpm_rpm.py @@ -66,6 +66,7 @@ class LowestTPMLoggingHandler(CustomLogger): if self.test_flag: self.logged_success += 1 except Exception as e: + traceback.print_exc() pass async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): @@ -114,6 +115,7 @@ class LowestTPMLoggingHandler(CustomLogger): if self.test_flag: self.logged_success += 1 except Exception as e: + traceback.print_exc() pass def get_available_deployments(self, model_group: str): diff --git a/litellm/tests/test_router_caching.py b/litellm/tests/test_router_caching.py index 155c703ca7..d93288fcea 100644 --- a/litellm/tests/test_router_caching.py +++ b/litellm/tests/test_router_caching.py @@ -149,7 +149,7 @@ async def test_acompletion_caching_with_ttl_on_router(): async def test_acompletion_caching_on_router_caching_groups(): # tests acompletion + caching on router try: - litellm.set_verbose = True + # litellm.set_verbose = True model_list = [ { "model_name": "openai-gpt-3.5-turbo", @@ -212,6 +212,7 @@ async def test_acompletion_caching_on_router_caching_groups(): def test_usage_based_routing_completion(): + litellm.set_verbose = True model_list = [ { "model_name": "gpt-3.5-turbo", @@ -249,3 +250,6 @@ def test_usage_based_routing_completion(): finally: max_requests -= 1 router.reset() + + +test_usage_based_routing_completion()