litellm-mirror/litellm/tests/test_router_cooldowns.py
Krish Dholakia dec53961f7 LiteLLM Minor Fixes and Improvements (11/09/2024) (#5634)
* fix(caching.py): set ttl for async_increment cache

fixes issue where ttl for redis client was not being set on increment_cache

Fixes https://github.com/BerriAI/litellm/issues/5609

* fix(caching.py): fix increment cache w/ ttl for sync increment cache on redis

Fixes https://github.com/BerriAI/litellm/issues/5609

* fix(router.py): support adding retry policy + allowed fails policy via config.yaml

* fix(router.py): don't cooldown single deployments

No point, as there's no other deployment to loadbalance with.

* fix(user_api_key_auth.py): support setting allowed email domains on jwt tokens

Closes https://github.com/BerriAI/litellm/issues/5605

* docs(token_auth.md): add user upsert + allowed email domain to jwt auth docs

* fix(litellm_pre_call_utils.py): fix dynamic key logging when team id is set

Fixes issue where key logging would not be set if team metadata was not none

* fix(secret_managers/main.py): load environment variables correctly

Fixes issue where os.environ/ was not being loaded correctly

* test(test_router.py): fix test

* feat(spend_tracking_utils.py): support logging additional usage params - e.g. prompt caching values for deepseek

* test: fix tests

* test: fix test

* test: fix test

* test: fix test

* test: fix test
2024-09-11 22:36:06 -07:00

152 lines
3.9 KiB
Python

#### What this tests ####
# This tests calling router with fallback models
import asyncio
import os
import sys
import time
import traceback
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import openai
import litellm
from litellm import Router
from litellm.integrations.custom_logger import CustomLogger
from litellm.types.router import DeploymentTypedDict, LiteLLMParamsTypedDict
@pytest.mark.asyncio
async def test_cooldown_badrequest_error():
"""
Test 1. It SHOULD NOT cooldown a deployment on a BadRequestError
"""
router = litellm.Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {
"model": "azure/chatgpt-v-2",
"api_key": os.getenv("AZURE_API_KEY"),
"api_version": os.getenv("AZURE_API_VERSION"),
"api_base": os.getenv("AZURE_API_BASE"),
},
}
],
debug_level="DEBUG",
set_verbose=True,
cooldown_time=300,
num_retries=0,
allowed_fails=0,
)
# Act & Assert
try:
response = await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "gm"}],
bad_param=200,
)
except:
pass
await asyncio.sleep(3) # wait for deployment to get cooled-down
response = await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "gm"}],
mock_response="hello",
)
assert response is not None
print(response)
@pytest.mark.asyncio
async def test_dynamic_cooldowns():
"""
Assert kwargs for completion/embedding have 'cooldown_time' as a litellm_param
"""
# litellm.set_verbose = True
tmp_mock = MagicMock()
litellm.failure_callback = [tmp_mock]
router = Router(
model_list=[
{
"model_name": "my-fake-model",
"litellm_params": {
"model": "openai/gpt-1",
"api_key": "my-key",
"mock_response": Exception("this is an error"),
},
}
],
cooldown_time=60,
)
try:
_ = router.completion(
model="my-fake-model",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
cooldown_time=0,
num_retries=0,
)
except Exception:
pass
tmp_mock.assert_called_once()
print(tmp_mock.call_count)
assert "cooldown_time" in tmp_mock.call_args[0][0]["litellm_params"]
assert tmp_mock.call_args[0][0]["litellm_params"]["cooldown_time"] == 0
@pytest.mark.parametrize("num_deployments", [1, 2])
def test_single_deployment_no_cooldowns(num_deployments):
"""
Do not cooldown on single deployment.
Cooldown on multiple deployments.
"""
model_list = []
for i in range(num_deployments):
model = DeploymentTypedDict(
model_name="gpt-3.5-turbo",
litellm_params=LiteLLMParamsTypedDict(
model="gpt-3.5-turbo",
),
)
model_list.append(model)
router = Router(model_list=model_list, allowed_fails=0, num_retries=0)
with patch.object(
router.cooldown_cache, "add_deployment_to_cooldown", new=MagicMock()
) as mock_client:
try:
router.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
mock_response="litellm.RateLimitError",
)
except litellm.RateLimitError:
pass
if num_deployments == 1:
mock_client.assert_not_called()
else:
mock_client.assert_called_once()