forked from phoenix/litellm-mirror
fix(test_router_fallbacks.py): fix tests
This commit is contained in:
parent
1baad80c7d
commit
bc5c9d7da9
2 changed files with 24 additions and 45 deletions
|
@ -1512,31 +1512,6 @@ class Router:
|
|||
## LOGGING
|
||||
kwargs = self.log_retry(kwargs=kwargs, e=e)
|
||||
remaining_retries = num_retries - current_attempt
|
||||
# if "No models available" in str(e):
|
||||
# timeout = litellm._calculate_retry_after(
|
||||
# remaining_retries=remaining_retries,
|
||||
# max_retries=num_retries,
|
||||
# min_timeout=self.retry_after,
|
||||
# )
|
||||
# await asyncio.sleep(timeout)
|
||||
# elif (
|
||||
# hasattr(e, "status_code")
|
||||
# and hasattr(e, "response")
|
||||
# and litellm._should_retry(status_code=e.status_code)
|
||||
# ):
|
||||
# if hasattr(e.response, "headers"):
|
||||
# timeout = litellm._calculate_retry_after(
|
||||
# remaining_retries=remaining_retries,
|
||||
# max_retries=num_retries,
|
||||
# response_headers=e.response.headers,
|
||||
# min_timeout=self.retry_after,
|
||||
# )
|
||||
# else:
|
||||
# timeout = litellm._calculate_retry_after(
|
||||
# remaining_retries=remaining_retries,
|
||||
# max_retries=num_retries,
|
||||
# min_timeout=self.retry_after,
|
||||
# )
|
||||
_timeout = self._router_should_retry(
|
||||
e=original_exception,
|
||||
remaining_retries=remaining_retries,
|
||||
|
|
|
@ -127,7 +127,7 @@ def test_sync_fallbacks():
|
|||
response = router.completion(**kwargs)
|
||||
print(f"response: {response}")
|
||||
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4
|
||||
|
||||
print("Passed ! Test router_fallbacks: test_sync_fallbacks()")
|
||||
router.reset()
|
||||
|
@ -209,12 +209,13 @@ async def test_async_fallbacks():
|
|||
user_message = "Hello, how are you?"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
try:
|
||||
kwargs["model"] = "azure/gpt-3.5-turbo"
|
||||
response = await router.acompletion(**kwargs)
|
||||
print(f"customHandler.previous_models: {customHandler.previous_models}")
|
||||
await asyncio.sleep(
|
||||
0.05
|
||||
) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
|
@ -258,7 +259,6 @@ def test_sync_fallbacks_embeddings():
|
|||
model_list=model_list,
|
||||
fallbacks=[{"bad-azure-embedding-model": ["good-azure-embedding-model"]}],
|
||||
set_verbose=False,
|
||||
num_retries=0,
|
||||
)
|
||||
customHandler = MyCustomHandler()
|
||||
litellm.callbacks = [customHandler]
|
||||
|
@ -269,7 +269,7 @@ def test_sync_fallbacks_embeddings():
|
|||
response = router.embedding(**kwargs)
|
||||
print(f"customHandler.previous_models: {customHandler.previous_models}")
|
||||
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
|
@ -323,7 +323,7 @@ async def test_async_fallbacks_embeddings():
|
|||
await asyncio.sleep(
|
||||
0.05
|
||||
) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
|
@ -394,7 +394,7 @@ def test_dynamic_fallbacks_sync():
|
|||
},
|
||||
]
|
||||
|
||||
router = Router(model_list=model_list, set_verbose=True, num_retries=0)
|
||||
router = Router(model_list=model_list, set_verbose=True)
|
||||
kwargs = {}
|
||||
kwargs["model"] = "azure/gpt-3.5-turbo"
|
||||
kwargs["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
|
@ -402,7 +402,7 @@ def test_dynamic_fallbacks_sync():
|
|||
response = router.completion(**kwargs)
|
||||
print(f"response: {response}")
|
||||
time.sleep(0.05) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
router.reset()
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {e}")
|
||||
|
@ -488,7 +488,7 @@ async def test_dynamic_fallbacks_async():
|
|||
await asyncio.sleep(
|
||||
0.05
|
||||
) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
router.reset()
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {e}")
|
||||
|
@ -573,7 +573,7 @@ async def test_async_fallbacks_streaming():
|
|||
await asyncio.sleep(
|
||||
0.05
|
||||
) # allow a delay as success_callbacks are on a separate thread
|
||||
assert customHandler.previous_models == 1 # 0 retries, 1 fallback
|
||||
assert customHandler.previous_models == 4 # 1 init call, 2 retries, 1 fallback
|
||||
router.reset()
|
||||
except litellm.Timeout as e:
|
||||
pass
|
||||
|
@ -766,10 +766,10 @@ def test_usage_based_routing_fallbacks():
|
|||
load_dotenv()
|
||||
|
||||
# Constants for TPM and RPM allocation
|
||||
AZURE_FAST_TPM = 3
|
||||
AZURE_BASIC_TPM = 4
|
||||
OPENAI_TPM = 400
|
||||
ANTHROPIC_TPM = 100000
|
||||
AZURE_FAST_RPM = 3
|
||||
AZURE_BASIC_RPM = 4
|
||||
OPENAI_RPM = 10
|
||||
ANTHROPIC_RPM = 100000
|
||||
|
||||
def get_azure_params(deployment_name: str):
|
||||
params = {
|
||||
|
@ -798,22 +798,26 @@ def test_usage_based_routing_fallbacks():
|
|||
{
|
||||
"model_name": "azure/gpt-4-fast",
|
||||
"litellm_params": get_azure_params("chatgpt-v-2"),
|
||||
"tpm": AZURE_FAST_TPM,
|
||||
"model_info": {"id": 1},
|
||||
"rpm": AZURE_FAST_RPM,
|
||||
},
|
||||
{
|
||||
"model_name": "azure/gpt-4-basic",
|
||||
"litellm_params": get_azure_params("chatgpt-v-2"),
|
||||
"tpm": AZURE_BASIC_TPM,
|
||||
"model_info": {"id": 2},
|
||||
"rpm": AZURE_BASIC_RPM,
|
||||
},
|
||||
{
|
||||
"model_name": "openai-gpt-4",
|
||||
"litellm_params": get_openai_params("gpt-3.5-turbo"),
|
||||
"tpm": OPENAI_TPM,
|
||||
"model_info": {"id": 3},
|
||||
"rpm": OPENAI_RPM,
|
||||
},
|
||||
{
|
||||
"model_name": "anthropic-claude-instant-1.2",
|
||||
"litellm_params": get_anthropic_params("claude-instant-1.2"),
|
||||
"tpm": ANTHROPIC_TPM,
|
||||
"model_info": {"id": 4},
|
||||
"rpm": ANTHROPIC_RPM,
|
||||
},
|
||||
]
|
||||
# litellm.set_verbose=True
|
||||
|
@ -844,10 +848,10 @@ def test_usage_based_routing_fallbacks():
|
|||
mock_response="very nice to meet you",
|
||||
)
|
||||
print("response: ", response)
|
||||
print("response._hidden_params: ", response._hidden_params)
|
||||
print(f"response._hidden_params: {response._hidden_params}")
|
||||
# in this test, we expect azure/gpt-4 fast to fail, then azure-gpt-4 basic to fail and then openai-gpt-4 to pass
|
||||
# the token count of this message is > AZURE_FAST_TPM, > AZURE_BASIC_TPM
|
||||
assert response._hidden_params["custom_llm_provider"] == "openai"
|
||||
assert response._hidden_params["model_id"] == "1"
|
||||
|
||||
# now make 100 mock requests to OpenAI - expect it to fallback to anthropic-claude-instant-1.2
|
||||
for i in range(20):
|
||||
|
@ -861,7 +865,7 @@ def test_usage_based_routing_fallbacks():
|
|||
print("response._hidden_params: ", response._hidden_params)
|
||||
if i == 19:
|
||||
# by the 19th call we should have hit TPM LIMIT for OpenAI, it should fallback to anthropic-claude-instant-1.2
|
||||
assert response._hidden_params["custom_llm_provider"] == "anthropic"
|
||||
assert response._hidden_params["model_id"] == "4"
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred {e}")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue