test - lowest latency logger

2024-04-24 16:35:43 -07:00 · 2024-04-24 16:35:43 -07:00 · 2e6fc91a75
commit 2e6fc91a75
parent bf6abed808
1 changed files with 78 additions and 0 deletions
--- a/litellm/tests/test_lowest_latency_routing.py
+++ b/litellm/tests/test_lowest_latency_routing.py
@ -477,3 +477,81 @@ async def test_router_completion_streaming():


 # asyncio.run(test_router_completion_streaming())
+
+
+@pytest.mark.asyncio
+async def test_lowest_latency_routing_with_timeouts():
+    """
+    PROD Test:
+    - Endpoint 1: triggers timeout errors (it takes 10+ seconds to respond)
+    - Endpoint 2: Responds in under 1s
+    - Run 5 requests to collect data on latency
+    - Run Wait till cache is filled with data
+    - Run 10 more requests
+    - All requests should have been routed to endpoint 2
+    """
+    import litellm
+
+    litellm.set_verbose = True
+
+    router = Router(
+        model_list=[
+            {
+                "model_name": "azure-model",
+                "litellm_params": {
+                    "model": "openai/slow-endpoint",
+                    "api_base": "https://exampleopenaiendpoint-production-c715.up.railway.app/",  # If you are Krrish, this is OpenAI Endpoint3 on our Railway endpoint :)
+                    "api_key": "fake-key",
+                },
+                "model_info": {"id": "slow-endpoint"},
+            },
+            {
+                "model_name": "azure-model",
+                "litellm_params": {
+                    "model": "openai/fast-endpoint",
+                    "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+                    "api_key": "fake-key",
+                },
+                "model_info": {"id": "fast-endpoint"},
+            },
+        ],
+        routing_strategy="latency-based-routing",
+        set_verbose=True,
+        debug_level="DEBUG",
+        timeout=1,
+    )  # type: ignore
+
+    # make 4 requests
+    for _ in range(4):
+        try:
+            response = await router.acompletion(
+                model="azure-model", messages=[{"role": "user", "content": "hello"}]
+            )
+            print(response)
+        except Exception as e:
+            print("got exception", e)
+
+    await asyncio.sleep(1)
+    print("done sending initial requests to collect latency")
+    """
+    Note: for debugging
+    - By this point: slow-endpoint should have timed out 3-4 times and should be heavily penalized :)
+    - The next 10 requests should all be routed to the fast-endpoint
+    """
+
+    deployments = {}
+    # make 10 requests
+    for _ in range(10):
+        response = await router.acompletion(
+            model="azure-model", messages=[{"role": "user", "content": "hello"}]
+        )
+        print(response)
+        _picked_model_id = response._hidden_params["model_id"]
+        if _picked_model_id not in deployments:
+            deployments[_picked_model_id] = 1
+        else:
+            deployments[_picked_model_id] += 1
+    print("deployments", deployments)
+
+    # ALL the Requests should have been routed to the fast-endpoint
+    assert deployments["fast-endpoint"] == 10