[Fix-Router] Don't cooldown when only 1 deployment exists (#5673)

* fix get model list

* fix test custom callback router

* fix embedding fallback test

* fix router retry policy on AuthErrors

* fix router test

* add test for single deployments no cooldown test prod

* add test test_single_deployment_no_cooldowns_test_prod_mock_completion_calls
This commit is contained in:
Ishaan Jaff 2024-09-12 19:14:58 -07:00 committed by GitHub
parent 13ba22d6fd
commit 19a06d7842
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 128 additions and 17 deletions

View file

@ -533,6 +533,7 @@ async def test_async_chat_azure_with_fallbacks():
try:
customHandler_fallbacks = CompletionCustomHandler()
litellm.callbacks = [customHandler_fallbacks]
litellm.set_verbose = True
# with fallbacks
model_list = [
{
@ -555,7 +556,13 @@ async def test_async_chat_azure_with_fallbacks():
"rpm": 1800,
},
]
router = Router(model_list=model_list, fallbacks=[{"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}]) # type: ignore
router = Router(
model_list=model_list,
fallbacks=[{"gpt-3.5-turbo": ["gpt-3.5-turbo-16k"]}],
retry_policy=litellm.router.RetryPolicy(
AuthenticationErrorRetries=0,
),
) # type: ignore
response = await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],