mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 19:24:27 +00:00
fix(route_llm_request.py): move to using common router, even for clie… (#8966)
* fix(route_llm_request.py): move to using common router, even for client-side credentials ensures fallbacks / cooldown logic still works * test(test_route_llm_request.py): add unit test for route request * feat(router.py): generate unique model id when clientside credential passed in Prevents cooldowns for api key 1 from impacting api key 2 * test(test_router.py): update testing to ensure original litellm params not mutated * fix(router.py): upsert clientside call into llm router model list enables cooldown logic to work accurately * fix: fix linting error * test(test_router_utils.py): add direct test for new util on router
This commit is contained in:
parent
bd2231400f
commit
ae6f91a56d
9 changed files with 273 additions and 36 deletions
|
@ -692,3 +692,50 @@ def test_router_fallbacks_with_cooldowns_and_model_id():
|
|||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio()
|
||||
async def test_router_fallbacks_with_cooldowns_and_dynamic_credentials():
|
||||
"""
|
||||
Ensure cooldown on credential 1 does not affect credential 2
|
||||
"""
|
||||
from litellm.router_utils.cooldown_handlers import _async_get_cooldown_deployments
|
||||
|
||||
litellm._turn_on_debug()
|
||||
router = Router(
|
||||
model_list=[
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {"model": "gpt-3.5-turbo", "rpm": 1},
|
||||
"model_info": {
|
||||
"id": "123",
|
||||
},
|
||||
}
|
||||
]
|
||||
)
|
||||
|
||||
## trigger ratelimit
|
||||
try:
|
||||
await router.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
api_key="my-bad-key-1",
|
||||
mock_response="litellm.RateLimitError",
|
||||
)
|
||||
pytest.fail("Expected RateLimitError")
|
||||
except litellm.RateLimitError:
|
||||
pass
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
cooldown_list = await _async_get_cooldown_deployments(
|
||||
litellm_router_instance=router, parent_otel_span=None
|
||||
)
|
||||
print("cooldown_list: ", cooldown_list)
|
||||
assert len(cooldown_list) == 1
|
||||
|
||||
await router.acompletion(
|
||||
model="gpt-3.5-turbo",
|
||||
api_key=os.getenv("OPENAI_API_KEY"),
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue