fix(route_llm_request.py): move to using common router, even for clie… (#8966)

* fix(route_llm_request.py): move to using common router, even for client-side credentials

ensures fallbacks / cooldown logic still works

* test(test_route_llm_request.py): add unit test for route request

* feat(router.py): generate unique model id when clientside credential passed in

Prevents cooldowns for api key 1 from impacting api key 2

* test(test_router.py): update testing to ensure original litellm params not mutated

* fix(router.py): upsert clientside call into llm router model list

enables cooldown logic to work accurately

* fix: fix linting error

* test(test_router_utils.py): add direct test for new util on router
This commit is contained in:
Krish Dholakia 2025-03-03 22:57:08 -08:00 committed by GitHub
parent bd2231400f
commit ae6f91a56d
9 changed files with 273 additions and 36 deletions

View file

@ -692,3 +692,50 @@ def test_router_fallbacks_with_cooldowns_and_model_id():
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "hi"}],
)
@pytest.mark.asyncio()
async def test_router_fallbacks_with_cooldowns_and_dynamic_credentials():
"""
Ensure cooldown on credential 1 does not affect credential 2
"""
from litellm.router_utils.cooldown_handlers import _async_get_cooldown_deployments
litellm._turn_on_debug()
router = Router(
model_list=[
{
"model_name": "gpt-3.5-turbo",
"litellm_params": {"model": "gpt-3.5-turbo", "rpm": 1},
"model_info": {
"id": "123",
},
}
]
)
## trigger ratelimit
try:
await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "hi"}],
api_key="my-bad-key-1",
mock_response="litellm.RateLimitError",
)
pytest.fail("Expected RateLimitError")
except litellm.RateLimitError:
pass
await asyncio.sleep(1)
cooldown_list = await _async_get_cooldown_deployments(
litellm_router_instance=router, parent_otel_span=None
)
print("cooldown_list: ", cooldown_list)
assert len(cooldown_list) == 1
await router.acompletion(
model="gpt-3.5-turbo",
api_key=os.getenv("OPENAI_API_KEY"),
messages=[{"role": "user", "content": "hi"}],
)