diff --git a/litellm/tests/test_lowest_latency_routing.py b/litellm/tests/test_lowest_latency_routing.py index 6af690a0d..4b93853f4 100644 --- a/litellm/tests/test_lowest_latency_routing.py +++ b/litellm/tests/test_lowest_latency_routing.py @@ -477,3 +477,81 @@ async def test_router_completion_streaming(): # asyncio.run(test_router_completion_streaming()) + + +@pytest.mark.asyncio +async def test_lowest_latency_routing_with_timeouts(): + """ + PROD Test: + - Endpoint 1: triggers timeout errors (it takes 10+ seconds to respond) + - Endpoint 2: Responds in under 1s + - Run 5 requests to collect data on latency + - Run Wait till cache is filled with data + - Run 10 more requests + - All requests should have been routed to endpoint 2 + """ + import litellm + + litellm.set_verbose = True + + router = Router( + model_list=[ + { + "model_name": "azure-model", + "litellm_params": { + "model": "openai/slow-endpoint", + "api_base": "https://exampleopenaiendpoint-production-c715.up.railway.app/", # If you are Krrish, this is OpenAI Endpoint3 on our Railway endpoint :) + "api_key": "fake-key", + }, + "model_info": {"id": "slow-endpoint"}, + }, + { + "model_name": "azure-model", + "litellm_params": { + "model": "openai/fast-endpoint", + "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", + "api_key": "fake-key", + }, + "model_info": {"id": "fast-endpoint"}, + }, + ], + routing_strategy="latency-based-routing", + set_verbose=True, + debug_level="DEBUG", + timeout=1, + ) # type: ignore + + # make 4 requests + for _ in range(4): + try: + response = await router.acompletion( + model="azure-model", messages=[{"role": "user", "content": "hello"}] + ) + print(response) + except Exception as e: + print("got exception", e) + + await asyncio.sleep(1) + print("done sending initial requests to collect latency") + """ + Note: for debugging + - By this point: slow-endpoint should have timed out 3-4 times and should be heavily penalized :) + - The next 10 requests should all be routed to the fast-endpoint + """ + + deployments = {} + # make 10 requests + for _ in range(10): + response = await router.acompletion( + model="azure-model", messages=[{"role": "user", "content": "hello"}] + ) + print(response) + _picked_model_id = response._hidden_params["model_id"] + if _picked_model_id not in deployments: + deployments[_picked_model_id] = 1 + else: + deployments[_picked_model_id] += 1 + print("deployments", deployments) + + # ALL the Requests should have been routed to the fast-endpoint + assert deployments["fast-endpoint"] == 10