fix(router.py): fix router should_retry

This commit is contained in:
Krrish Dholakia 2024-04-27 15:13:20 -07:00
parent 5e0bd5982e
commit 9f24421d44
2 changed files with 38 additions and 20 deletions

View file

@ -1625,12 +1625,10 @@ class Router:
min_timeout=self.retry_after, min_timeout=self.retry_after,
) )
time.sleep(timeout) time.sleep(timeout)
elif ( elif hasattr(e, "status_code") and litellm._should_retry(
hasattr(e, "status_code") status_code=e.status_code
and hasattr(e, "response")
and litellm._should_retry(status_code=e.status_code)
): ):
if hasattr(e.response, "headers"): if hasattr(e, "response") and hasattr(e.response, "headers"):
timeout = litellm._calculate_retry_after( timeout = litellm._calculate_retry_after(
remaining_retries=remaining_retries, remaining_retries=remaining_retries,
max_retries=num_retries, max_retries=num_retries,

View file

@ -119,7 +119,9 @@ def test_multiple_deployments_parallel():
# test_multiple_deployments_parallel() # test_multiple_deployments_parallel()
def test_cooldown_same_model_name(): @pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_cooldown_same_model_name(sync_mode):
# users could have the same model with different api_base # users could have the same model with different api_base
# example # example
# azure/chatgpt, api_base: 1234 # azure/chatgpt, api_base: 1234
@ -161,22 +163,40 @@ def test_cooldown_same_model_name():
num_retries=3, num_retries=3,
) # type: ignore ) # type: ignore
response = router.completion( if sync_mode:
model="gpt-3.5-turbo", response = router.completion(
messages=[{"role": "user", "content": "hello this request will pass"}], model="gpt-3.5-turbo",
) messages=[{"role": "user", "content": "hello this request will pass"}],
print(router.model_list) )
model_ids = [] print(router.model_list)
for model in router.model_list: model_ids = []
model_ids.append(model["model_info"]["id"]) for model in router.model_list:
print("\n litellm model ids ", model_ids) model_ids.append(model["model_info"]["id"])
print("\n litellm model ids ", model_ids)
# example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960'] # example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
assert ( assert (
model_ids[0] != model_ids[1] model_ids[0] != model_ids[1]
) # ensure both models have a uuid added, and they have different names ) # ensure both models have a uuid added, and they have different names
print("\ngot response\n", response) print("\ngot response\n", response)
else:
response = await router.acompletion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "hello this request will pass"}],
)
print(router.model_list)
model_ids = []
for model in router.model_list:
model_ids.append(model["model_info"]["id"])
print("\n litellm model ids ", model_ids)
# example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
assert (
model_ids[0] != model_ids[1]
) # ensure both models have a uuid added, and they have different names
print("\ngot response\n", response)
except Exception as e: except Exception as e:
pytest.fail(f"Got unexpected exception on router! - {e}") pytest.fail(f"Got unexpected exception on router! - {e}")