forked from phoenix/litellm-mirror
fix(test_router_fallbacks.py): fix tests
This commit is contained in:
parent
1baad80c7d
commit
bc5c9d7da9
2 changed files with 24 additions and 45 deletions
|
@ -1512,31 +1512,6 @@ class Router:
|
||||||
## LOGGING
|
## LOGGING
|
||||||
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
||||||
remaining_retries = num_retries - current_attempt
|
remaining_retries = num_retries - current_attempt
|
||||||
# if "No models available" in str(e):
|
|
||||||
# timeout = litellm._calculate_retry_after(
|
|
||||||
# remaining_retries=remaining_retries,
|
|
||||||
# max_retries=num_retries,
|
|
||||||
# min_timeout=self.retry_after,
|
|
||||||
# )
|
|
||||||
# await asyncio.sleep(timeout)
|
|
||||||
# elif (
|
|
||||||
# hasattr(e, "status_code")
|
|
||||||
# and hasattr(e, "response")
|
|
||||||
# and litellm._should_retry(status_code=e.status_code)
|
|
||||||
# ):
|
|
||||||
# if hasattr(e.response, "headers"):
|
|
||||||
# timeout = litellm._calculate_retry_after(
|
|
||||||
# remaining_retries=remaining_retries,
|
|
||||||
# max_retries=num_retries,
|
|
||||||
# response_headers=e.response.headers,
|
|
||||||
# min_timeout=self.retry_after,
|
|
||||||
# )
|
|
||||||
# else:
|
|
||||||
# timeout = litellm._calculate_retry_after(
|
|
||||||
# remaining_retries=remaining_retries,
|
|
||||||
# max_retries=num_retries,
|
|
||||||
# min_timeout=self.retry_after,
|
|
||||||
# )
|
|
||||||
_timeout = self._router_should_retry(
|
_timeout = self._router_should_retry(
|
||||||
e=original_exception,
|
e=original_exception,
|
||||||
remaining_retries=remaining_retries,
|
remaining_retries=remaining_retries,
|
||||||
|
|
|
@ -127,7 +127,7 @@ def test_sync_fallbacks():
|
||||||
response = router.completion(**kwargs)
|
response = router.completion(**kwargs)
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
|
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
|
||||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
assert customHandler.previous_models == 4
|
||||||
|
|
||||||
print("Passed ! Test router_fallbacks: test_sync_fallbacks()")
|
print("Passed ! Test router_fallbacks: test_sync_fallbacks()")
|
||||||
router.reset()
|
router.reset()
|
||||||
|
@ -209,12 +209,13 @@ async def test_async_fallbacks():
|
||||||
user_message = "Hello, how are you?"
|
user_message = "Hello, how are you?"
|
||||||
messages = [{"content": user_message, "role": "user"}]
|
messages = [{"content": user_message, "role": "user"}]
|
||||||
try:
|
try:
|
||||||
|
kwargs["model"] = "azure/gpt-3.5-turbo"
|
||||||
response = await router.acompletion(**kwargs)
|
response = await router.acompletion(**kwargs)
|
||||||
print(f"customHandler.previous_models: {customHandler.previous_models}")
|
print(f"customHandler.previous_models: {customHandler.previous_models}")
|
||||||
await asyncio.sleep(
|
await asyncio.sleep(
|
||||||
0.05
|
0.05
|
||||||
) # allow a delay as success_callbacks are on a separate thread
|
) # allow a delay as success_callbacks are on a separate thread
|
||||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||||
router.reset()
|
router.reset()
|
||||||
except litellm.Timeout as e:
|
except litellm.Timeout as e:
|
||||||
pass
|
pass
|
||||||
|
@ -258,7 +259,6 @@ def test_sync_fallbacks_embeddings():
|
||||||
model_list=model_list,
|
model_list=model_list,
|
||||||
fallbacks=[{"bad-azure-embedding-model": ["good-azure-embedding-model"]}],
|
fallbacks=[{"bad-azure-embedding-model": ["good-azure-embedding-model"]}],
|
||||||
set_verbose=False,
|
set_verbose=False,
|
||||||
num_retries=0,
|
|
||||||
)
|
)
|
||||||
customHandler = MyCustomHandler()
|
customHandler = MyCustomHandler()
|
||||||
litellm.callbacks = [customHandler]
|
litellm.callbacks = [customHandler]
|
||||||
|
@ -269,7 +269,7 @@ def test_sync_fallbacks_embeddings():
|
||||||
response = router.embedding(**kwargs)
|
response = router.embedding(**kwargs)
|
||||||
print(f"customHandler.previous_models: {customHandler.previous_models}")
|
print(f"customHandler.previous_models: {customHandler.previous_models}")
|
||||||
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
|
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
|
||||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||||
router.reset()
|
router.reset()
|
||||||
except litellm.Timeout as e:
|
except litellm.Timeout as e:
|
||||||
pass
|
pass
|
||||||
|
@ -323,7 +323,7 @@ async def test_async_fallbacks_embeddings():
|
||||||
await asyncio.sleep(
|
await asyncio.sleep(
|
||||||
0.05
|
0.05
|
||||||
) # allow a delay as success_callbacks are on a separate thread
|
) # allow a delay as success_callbacks are on a separate thread
|
||||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||||
router.reset()
|
router.reset()
|
||||||
except litellm.Timeout as e:
|
except litellm.Timeout as e:
|
||||||
pass
|
pass
|
||||||
|
@ -394,7 +394,7 @@ def test_dynamic_fallbacks_sync():
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
router = Router(model_list=model_list, set_verbose=True, num_retries=0)
|
router = Router(model_list=model_list, set_verbose=True)
|
||||||
kwargs = {}
|
kwargs = {}
|
||||||
kwargs["model"] = "azure/gpt-3.5-turbo"
|
kwargs["model"] = "azure/gpt-3.5-turbo"
|
||||||
kwargs["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
|
kwargs["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
|
@ -402,7 +402,7 @@ def test_dynamic_fallbacks_sync():
|
||||||
response = router.completion(**kwargs)
|
response = router.completion(**kwargs)
|
||||||
print(f"response: {response}")
|
print(f"response: {response}")
|
||||||
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
|
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
|
||||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||||
router.reset()
|
router.reset()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
pytest.fail(f"An exception occurred - {e}")
|
||||||
|
@ -488,7 +488,7 @@ async def test_dynamic_fallbacks_async():
|
||||||
await asyncio.sleep(
|
await asyncio.sleep(
|
||||||
0.05
|
0.05
|
||||||
) # allow a delay as success_callbacks are on a separate thread
|
) # allow a delay as success_callbacks are on a separate thread
|
||||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||||
router.reset()
|
router.reset()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"An exception occurred - {e}")
|
pytest.fail(f"An exception occurred - {e}")
|
||||||
|
@ -573,7 +573,7 @@ async def test_async_fallbacks_streaming():
|
||||||
await asyncio.sleep(
|
await asyncio.sleep(
|
||||||
0.05
|
0.05
|
||||||
) # allow a delay as success_callbacks are on a separate thread
|
) # allow a delay as success_callbacks are on a separate thread
|
||||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||||
router.reset()
|
router.reset()
|
||||||
except litellm.Timeout as e:
|
except litellm.Timeout as e:
|
||||||
pass
|
pass
|
||||||
|
@ -766,10 +766,10 @@ def test_usage_based_routing_fallbacks():
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Constants for TPM and RPM allocation
|
# Constants for TPM and RPM allocation
|
||||||
AZURE_FAST_TPM = 3
|
AZURE_FAST_RPM = 3
|
||||||
AZURE_BASIC_TPM = 4
|
AZURE_BASIC_RPM = 4
|
||||||
OPENAI_TPM = 400
|
OPENAI_RPM = 10
|
||||||
ANTHROPIC_TPM = 100000
|
ANTHROPIC_RPM = 100000
|
||||||
|
|
||||||
def get_azure_params(deployment_name: str):
|
def get_azure_params(deployment_name: str):
|
||||||
params = {
|
params = {
|
||||||
|
@ -798,22 +798,26 @@ def test_usage_based_routing_fallbacks():
|
||||||
{
|
{
|
||||||
"model_name": "azure/gpt-4-fast",
|
"model_name": "azure/gpt-4-fast",
|
||||||
"litellm_params": get_azure_params("chatgpt-v-2"),
|
"litellm_params": get_azure_params("chatgpt-v-2"),
|
||||||
"tpm": AZURE_FAST_TPM,
|
"model_info": {"id": 1},
|
||||||
|
"rpm": AZURE_FAST_RPM,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model_name": "azure/gpt-4-basic",
|
"model_name": "azure/gpt-4-basic",
|
||||||
"litellm_params": get_azure_params("chatgpt-v-2"),
|
"litellm_params": get_azure_params("chatgpt-v-2"),
|
||||||
"tpm": AZURE_BASIC_TPM,
|
"model_info": {"id": 2},
|
||||||
|
"rpm": AZURE_BASIC_RPM,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model_name": "openai-gpt-4",
|
"model_name": "openai-gpt-4",
|
||||||
"litellm_params": get_openai_params("gpt-3.5-turbo"),
|
"litellm_params": get_openai_params("gpt-3.5-turbo"),
|
||||||
"tpm": OPENAI_TPM,
|
"model_info": {"id": 3},
|
||||||
|
"rpm": OPENAI_RPM,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"model_name": "anthropic-claude-instant-1.2",
|
"model_name": "anthropic-claude-instant-1.2",
|
||||||
"litellm_params": get_anthropic_params("claude-instant-1.2"),
|
"litellm_params": get_anthropic_params("claude-instant-1.2"),
|
||||||
"tpm": ANTHROPIC_TPM,
|
"model_info": {"id": 4},
|
||||||
|
"rpm": ANTHROPIC_RPM,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
# litellm.set_verbose=True
|
# litellm.set_verbose=True
|
||||||
|
@ -844,10 +848,10 @@ def test_usage_based_routing_fallbacks():
|
||||||
mock_response="very nice to meet you",
|
mock_response="very nice to meet you",
|
||||||
)
|
)
|
||||||
print("response: ", response)
|
print("response: ", response)
|
||||||
print("response._hidden_params: ", response._hidden_params)
|
print(f"response._hidden_params: {response._hidden_params}")
|
||||||
# in this test, we expect azure/gpt-4 fast to fail, then azure-gpt-4 basic to fail and then openai-gpt-4 to pass
|
# in this test, we expect azure/gpt-4 fast to fail, then azure-gpt-4 basic to fail and then openai-gpt-4 to pass
|
||||||
# the token count of this message is > AZURE_FAST_TPM, > AZURE_BASIC_TPM
|
# the token count of this message is > AZURE_FAST_TPM, > AZURE_BASIC_TPM
|
||||||
assert response._hidden_params["custom_llm_provider"] == "openai"
|
assert response._hidden_params["model_id"] == "1"
|
||||||
|
|
||||||
# now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-instant-1.2
|
# now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-instant-1.2
|
||||||
for i in range(20):
|
for i in range(20):
|
||||||
|
@ -861,7 +865,7 @@ def test_usage_based_routing_fallbacks():
|
||||||
print("response._hidden_params: ", response._hidden_params)
|
print("response._hidden_params: ", response._hidden_params)
|
||||||
if i == 19:
|
if i == 19:
|
||||||
# by the 19th call we should have hit TPM LIMIT for OpenAI, it should fallback to anthropic-claude-instant-1.2
|
# by the 19th call we should have hit TPM LIMIT for OpenAI, it should fallback to anthropic-claude-instant-1.2
|
||||||
assert response._hidden_params["custom_llm_provider"] == "anthropic"
|
assert response._hidden_params["model_id"] == "4"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"An exception occurred {e}")
|
pytest.fail(f"An exception occurred {e}")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue