test(test_least_busy_routing.py): avoid deployments with low rate limits

This commit is contained in:
Krrish Dholakia 2024-05-09 13:54:24 -07:00
parent 927d36148f
commit c4295e1667

View file

@ -150,9 +150,9 @@ async def test_router_atext_completion_streaming():
{ {
"model_name": "azure-model", "model_name": "azure-model",
"litellm_params": { "litellm_params": {
"model": "azure/gpt-35-turbo", "model": "azure/gpt-turbo",
"api_key": "os.environ/AZURE_EUROPE_API_KEY", "api_key": "os.environ/AZURE_FRANCE_API_KEY",
"api_base": "https://my-endpoint-europe-berri-992.openai.azure.com", "api_base": "https://openai-france-1234.openai.azure.com",
"rpm": 6, "rpm": 6,
}, },
"model_info": {"id": 2}, "model_info": {"id": 2},
@ -160,9 +160,9 @@ async def test_router_atext_completion_streaming():
{ {
"model_name": "azure-model", "model_name": "azure-model",
"litellm_params": { "litellm_params": {
"model": "azure/gpt-35-turbo", "model": "azure/gpt-turbo",
"api_key": "os.environ/AZURE_CANADA_API_KEY", "api_key": "os.environ/AZURE_FRANCE_API_KEY",
"api_base": "https://my-endpoint-canada-berri992.openai.azure.com", "api_base": "https://openai-france-1234.openai.azure.com",
"rpm": 6, "rpm": 6,
}, },
"model_info": {"id": 3}, "model_info": {"id": 3},
@ -193,7 +193,7 @@ async def test_router_atext_completion_streaming():
## check if calls equally distributed ## check if calls equally distributed
cache_dict = router.cache.get_cache(key=cache_key) cache_dict = router.cache.get_cache(key=cache_key)
for k, v in cache_dict.items(): for k, v in cache_dict.items():
assert v == 1 assert v == 1, f"Failed. K={k} called v={v} times, cache_dict={cache_dict}"
# asyncio.run(test_router_atext_completion_streaming()) # asyncio.run(test_router_atext_completion_streaming())