forked from phoenix/litellm-mirror
fix(router.py): fix router should_retry
This commit is contained in:
parent
5e0bd5982e
commit
9f24421d44
2 changed files with 38 additions and 20 deletions
|
@ -1625,12 +1625,10 @@ class Router:
|
||||||
min_timeout=self.retry_after,
|
min_timeout=self.retry_after,
|
||||||
)
|
)
|
||||||
time.sleep(timeout)
|
time.sleep(timeout)
|
||||||
elif (
|
elif hasattr(e, "status_code") and litellm._should_retry(
|
||||||
hasattr(e, "status_code")
|
status_code=e.status_code
|
||||||
and hasattr(e, "response")
|
|
||||||
and litellm._should_retry(status_code=e.status_code)
|
|
||||||
):
|
):
|
||||||
if hasattr(e.response, "headers"):
|
if hasattr(e, "response") and hasattr(e.response, "headers"):
|
||||||
timeout = litellm._calculate_retry_after(
|
timeout = litellm._calculate_retry_after(
|
||||||
remaining_retries=remaining_retries,
|
remaining_retries=remaining_retries,
|
||||||
max_retries=num_retries,
|
max_retries=num_retries,
|
||||||
|
|
|
@ -119,7 +119,9 @@ def test_multiple_deployments_parallel():
|
||||||
|
|
||||||
|
|
||||||
# test_multiple_deployments_parallel()
|
# test_multiple_deployments_parallel()
|
||||||
def test_cooldown_same_model_name():
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_cooldown_same_model_name(sync_mode):
|
||||||
# users could have the same model with different api_base
|
# users could have the same model with different api_base
|
||||||
# example
|
# example
|
||||||
# azure/chatgpt, api_base: 1234
|
# azure/chatgpt, api_base: 1234
|
||||||
|
@ -161,22 +163,40 @@ def test_cooldown_same_model_name():
|
||||||
num_retries=3,
|
num_retries=3,
|
||||||
) # type: ignore
|
) # type: ignore
|
||||||
|
|
||||||
response = router.completion(
|
if sync_mode:
|
||||||
model="gpt-3.5-turbo",
|
response = router.completion(
|
||||||
messages=[{"role": "user", "content": "hello this request will pass"}],
|
model="gpt-3.5-turbo",
|
||||||
)
|
messages=[{"role": "user", "content": "hello this request will pass"}],
|
||||||
print(router.model_list)
|
)
|
||||||
model_ids = []
|
print(router.model_list)
|
||||||
for model in router.model_list:
|
model_ids = []
|
||||||
model_ids.append(model["model_info"]["id"])
|
for model in router.model_list:
|
||||||
print("\n litellm model ids ", model_ids)
|
model_ids.append(model["model_info"]["id"])
|
||||||
|
print("\n litellm model ids ", model_ids)
|
||||||
|
|
||||||
# example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
|
# example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
|
||||||
assert (
|
assert (
|
||||||
model_ids[0] != model_ids[1]
|
model_ids[0] != model_ids[1]
|
||||||
) # ensure both models have a uuid added, and they have different names
|
) # ensure both models have a uuid added, and they have different names
|
||||||
|
|
||||||
print("\ngot response\n", response)
|
print("\ngot response\n", response)
|
||||||
|
else:
|
||||||
|
response = await router.acompletion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "hello this request will pass"}],
|
||||||
|
)
|
||||||
|
print(router.model_list)
|
||||||
|
model_ids = []
|
||||||
|
for model in router.model_list:
|
||||||
|
model_ids.append(model["model_info"]["id"])
|
||||||
|
print("\n litellm model ids ", model_ids)
|
||||||
|
|
||||||
|
# example litellm_model_names ['azure/chatgpt-v-2-ModelID-64321', 'azure/chatgpt-v-2-ModelID-63960']
|
||||||
|
assert (
|
||||||
|
model_ids[0] != model_ids[1]
|
||||||
|
) # ensure both models have a uuid added, and they have different names
|
||||||
|
|
||||||
|
print("\ngot response\n", response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Got unexpected exception on router! - {e}")
|
pytest.fail(f"Got unexpected exception on router! - {e}")
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue