test(test_least_busy_routing.py): avoid deployments with low rate limits

2024-05-09 13:54:24 -07:00 · 2024-05-09 13:54:24 -07:00 · c4295e1667
commit c4295e1667
parent 927d36148f
1 changed files with 7 additions and 7 deletions
--- a/litellm/tests/test_least_busy_routing.py
+++ b/litellm/tests/test_least_busy_routing.py
@ -150,9 +150,9 @@ async def test_router_atext_completion_streaming():
        {
            "model_name": "azure-model",
            "litellm_params": {
-                "model": "azure/gpt-35-turbo",
-                "api_key": "os.environ/AZURE_EUROPE_API_KEY",
-                "api_base": "https://my-endpoint-europe-berri-992.openai.azure.com",
+                "model": "azure/gpt-turbo",
+                "api_key": "os.environ/AZURE_FRANCE_API_KEY",
+                "api_base": "https://openai-france-1234.openai.azure.com",
                "rpm": 6,
            },
            "model_info": {"id": 2},
@ -160,9 +160,9 @@ async def test_router_atext_completion_streaming():
        {
            "model_name": "azure-model",
            "litellm_params": {
-                "model": "azure/gpt-35-turbo",
-                "api_key": "os.environ/AZURE_CANADA_API_KEY",
-                "api_base": "https://my-endpoint-canada-berri992.openai.azure.com",
+                "model": "azure/gpt-turbo",
+                "api_key": "os.environ/AZURE_FRANCE_API_KEY",
+                "api_base": "https://openai-france-1234.openai.azure.com",
                "rpm": 6,
            },
            "model_info": {"id": 3},
@ -193,7 +193,7 @@ async def test_router_atext_completion_streaming():
    ## check if calls equally distributed
    cache_dict = router.cache.get_cache(key=cache_key)
    for k, v in cache_dict.items():
-        assert v == 1
+        assert v == 1, f"Failed. K={k} called v={v} times, cache_dict={cache_dict}"


 # asyncio.run(test_router_atext_completion_streaming())