mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
[Fix-Router] Don't cooldown when only 1 deployment exists (#5673)
* fix get model list * fix test custom callback router * fix embedding fallback test * fix router retry policy on AuthErrors * fix router test * add test for single deployments no cooldown test prod * add test test_single_deployment_no_cooldowns_test_prod_mock_completion_calls
This commit is contained in:
parent
40c52f9263
commit
e7c22f63e7
4 changed files with 128 additions and 17 deletions
|
@ -150,3 +150,100 @@ def test_single_deployment_no_cooldowns(num_deployments):
|
|||
mock_client.assert_not_called()
|
||||
else:
|
||||
mock_client.assert_called_once()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_single_deployment_no_cooldowns_test_prod():
|
||||
"""
|
||||
Do not cooldown on single deployment.
|
||||
|
||||
"""
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-5",
|
||||
"litellm_params": {
|
||||
"model": "openai/gpt-5",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-12",
|
||||
"litellm_params": {
|
||||
"model": "openai/gpt-12",
|
||||
},
|
||||
},
|
||||
],
|
||||
allowed_fails=0,
|
||||
num_retries=0,
|
||||
)
|
||||
|
||||
with patch.object(
|
||||
router.cooldown_cache, "add_deployment_to_cooldown", new=MagicMock()
|
||||
) as mock_client:
|
||||
try:
|
||||
await router.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
mock_response="litellm.RateLimitError",
|
||||
)
|
||||
except litellm.RateLimitError:
|
||||
pass
|
||||
|
||||
await asyncio.sleep(2)
|
||||
|
||||
mock_client.assert_not_called()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_single_deployment_no_cooldowns_test_prod_mock_completion_calls():
|
||||
"""
|
||||
Do not cooldown on single deployment.
|
||||
|
||||
"""
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-5",
|
||||
"litellm_params": {
|
||||
"model": "openai/gpt-5",
|
||||
},
|
||||
},
|
||||
{
|
||||
"model_name": "gpt-12",
|
||||
"litellm_params": {
|
||||
"model": "openai/gpt-12",
|
||||
},
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
for _ in range(20):
|
||||
try:
|
||||
await router.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||
mock_response="litellm.RateLimitError",
|
||||
)
|
||||
except litellm.RateLimitError:
|
||||
pass
|
||||
|
||||
cooldown_list = await router._async_get_cooldown_deployments()
|
||||
assert len(cooldown_list) == 0
|
||||
|
||||
healthy_deployments, _ = await router._async_get_healthy_deployments(
|
||||
model="gpt-3.5-turbo"
|
||||
)
|
||||
|
||||
print("healthy_deployments: ", healthy_deployments)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue