diff --git a/litellm/tests/test_lowest_latency_routing.py b/litellm/tests/test_lowest_latency_routing.py index 13627b7136..e7be2050d5 100644 --- a/litellm/tests/test_lowest_latency_routing.py +++ b/litellm/tests/test_lowest_latency_routing.py @@ -9,7 +9,9 @@ from dotenv import load_dotenv load_dotenv() import os -sys.path.insert(0, os.path.abspath("../..")) # Adds the parent directory to the system path +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path import pytest from litellm import Router from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler @@ -47,7 +49,8 @@ def test_latency_updated(): ) latency_key = f"{model_group}_map" assert ( - end_time - start_time == test_cache.get_cache(key=latency_key)[deployment_id]["latency"][0] + end_time - start_time + == test_cache.get_cache(key=latency_key)[deployment_id]["latency"][0] ) @@ -195,7 +198,9 @@ async def _gather_deploy(all_deploys): return await asyncio.gather(*[_deploy(*t) for t in all_deploys]) -@pytest.mark.parametrize("ans_rpm", [1, 5]) # 1 should produce nothing, 10 should select first +@pytest.mark.parametrize( + "ans_rpm", [1, 5] +) # 1 should produce nothing, 10 should select first def test_get_available_endpoints_tpm_rpm_check_async(ans_rpm): """ Pass in list of 2 valid models @@ -240,7 +245,9 @@ def test_get_available_endpoints_tpm_rpm_check_async(ans_rpm): # test_get_available_endpoints_tpm_rpm_check_async() -@pytest.mark.parametrize("ans_rpm", [1, 5]) # 1 should produce nothing, 10 should select first +@pytest.mark.parametrize( + "ans_rpm", [1, 5] +) # 1 should produce nothing, 10 should select first def test_get_available_endpoints_tpm_rpm_check(ans_rpm): """ Pass in list of 2 valid models @@ -409,7 +416,9 @@ def test_router_get_available_deployments(): @pytest.mark.asyncio async def test_router_completion_streaming(): - messages = [{"role": "user", "content": "Hello, can you generate a 500 words poem?"}] + messages = [ + {"role": "user", "content": "Hello, can you generate a 500 words poem?"} + ] model = "azure-model" model_list = [ { @@ -459,8 +468,10 @@ async def test_router_completion_streaming(): final_response = await router.acompletion(model=model, messages=messages) print(f"min deployment id: {picked_deployment}") print(f"model id: {final_response._hidden_params['model_id']}") - assert final_response._hidden_params["model_id"] == picked_deployment["model_info"]["id"] + assert ( + final_response._hidden_params["model_id"] + == picked_deployment["model_info"]["id"] + ) # asyncio.run(test_router_completion_streaming()) -# %% diff --git a/tests/test_ratelimit.py b/tests/test_ratelimit.py index 488d7c3995..565f4c3d3d 100644 --- a/tests/test_ratelimit.py +++ b/tests/test_ratelimit.py @@ -56,17 +56,23 @@ def calculate_limits(list_of_messages): Return the min rpm and tpm level that would let all messages in list_of_messages be sent this minute """ rpm = len(list_of_messages) - tpm = sum((utils.token_counter(messages=m) + COMPLETION_TOKENS for m in list_of_messages)) + tpm = sum( + (utils.token_counter(messages=m) + COMPLETION_TOKENS for m in list_of_messages) + ) return rpm, tpm async def async_call(router: Router, list_of_messages) -> Any: - tasks = [router.acompletion(model="gpt-3.5-turbo", messages=m) for m in list_of_messages] + tasks = [ + router.acompletion(model="gpt-3.5-turbo", messages=m) for m in list_of_messages + ] return await asyncio.gather(*tasks) def sync_call(router: Router, list_of_messages) -> Any: - return [router.completion(model="gpt-3.5-turbo", messages=m) for m in list_of_messages] + return [ + router.completion(model="gpt-3.5-turbo", messages=m) for m in list_of_messages + ] class ExpectNoException(Exception): @@ -77,22 +83,26 @@ class ExpectNoException(Exception): "num_try_send, num_allowed_send", [ (2, 2), # sending as many as allowed, ExpectNoException - (10, 10), # sending as many as allowed, ExpectNoException + # (10, 10), # sending as many as allowed, ExpectNoException (3, 2), # Sending more than allowed, ValueError - (10, 9), # Sending more than allowed, ValueError + # (10, 9), # Sending more than allowed, ValueError ], ) -@pytest.mark.parametrize("sync_mode", [True, False]) # Use parametrization for sync/async +@pytest.mark.parametrize( + "sync_mode", [True, False] +) # Use parametrization for sync/async @pytest.mark.parametrize( "routing_strategy", [ "usage-based-routing", # "simple-shuffle", # dont expect to rate limit # "least-busy", # dont expect to rate limit - "latency-based-routing", + # "latency-based-routing", ], ) -def test_rate_limit(router_factory, num_try_send, num_allowed_send, sync_mode, routing_strategy): +def test_rate_limit( + router_factory, num_try_send, num_allowed_send, sync_mode, routing_strategy +): """ Check if router.completion and router.acompletion can send more messages than they've been limited to. Args: @@ -105,7 +115,9 @@ def test_rate_limit(router_factory, num_try_send, num_allowed_send, sync_mode, r ExpectNoException: Signfies that no other error has happened. A NOP """ # Can send more messages then we're going to; so don't expect a rate limit error - expected_exception = ExpectNoException if num_try_send <= num_allowed_send else ValueError + expected_exception = ( + ExpectNoException if num_try_send <= num_allowed_send else ValueError + ) list_of_messages = generate_list_of_messages(max(num_try_send, num_allowed_send)) rpm, tpm = calculate_limits(list_of_messages[:num_allowed_send])