mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
formating
This commit is contained in:
parent
1c93ebf05a
commit
006d0237e4
2 changed files with 39 additions and 16 deletions
|
@ -56,17 +56,23 @@ def calculate_limits(list_of_messages):
|
|||
Return the min rpm and tpm level that would let all messages in list_of_messages be sent this minute
|
||||
"""
|
||||
rpm = len(list_of_messages)
|
||||
tpm = sum((utils.token_counter(messages=m) + COMPLETION_TOKENS for m in list_of_messages))
|
||||
tpm = sum(
|
||||
(utils.token_counter(messages=m) + COMPLETION_TOKENS for m in list_of_messages)
|
||||
)
|
||||
return rpm, tpm
|
||||
|
||||
|
||||
async def async_call(router: Router, list_of_messages) -> Any:
|
||||
tasks = [router.acompletion(model="gpt-3.5-turbo", messages=m) for m in list_of_messages]
|
||||
tasks = [
|
||||
router.acompletion(model="gpt-3.5-turbo", messages=m) for m in list_of_messages
|
||||
]
|
||||
return await asyncio.gather(*tasks)
|
||||
|
||||
|
||||
def sync_call(router: Router, list_of_messages) -> Any:
|
||||
return [router.completion(model="gpt-3.5-turbo", messages=m) for m in list_of_messages]
|
||||
return [
|
||||
router.completion(model="gpt-3.5-turbo", messages=m) for m in list_of_messages
|
||||
]
|
||||
|
||||
|
||||
class ExpectNoException(Exception):
|
||||
|
@ -77,22 +83,26 @@ class ExpectNoException(Exception):
|
|||
"num_try_send, num_allowed_send",
|
||||
[
|
||||
(2, 2), # sending as many as allowed, ExpectNoException
|
||||
(10, 10), # sending as many as allowed, ExpectNoException
|
||||
# (10, 10), # sending as many as allowed, ExpectNoException
|
||||
(3, 2), # Sending more than allowed, ValueError
|
||||
(10, 9), # Sending more than allowed, ValueError
|
||||
# (10, 9), # Sending more than allowed, ValueError
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("sync_mode", [True, False]) # Use parametrization for sync/async
|
||||
@pytest.mark.parametrize(
|
||||
"sync_mode", [True, False]
|
||||
) # Use parametrization for sync/async
|
||||
@pytest.mark.parametrize(
|
||||
"routing_strategy",
|
||||
[
|
||||
"usage-based-routing",
|
||||
# "simple-shuffle", # dont expect to rate limit
|
||||
# "least-busy", # dont expect to rate limit
|
||||
"latency-based-routing",
|
||||
# "latency-based-routing",
|
||||
],
|
||||
)
|
||||
def test_rate_limit(router_factory, num_try_send, num_allowed_send, sync_mode, routing_strategy):
|
||||
def test_rate_limit(
|
||||
router_factory, num_try_send, num_allowed_send, sync_mode, routing_strategy
|
||||
):
|
||||
"""
|
||||
Check if router.completion and router.acompletion can send more messages than they've been limited to.
|
||||
Args:
|
||||
|
@ -105,7 +115,9 @@ def test_rate_limit(router_factory, num_try_send, num_allowed_send, sync_mode, r
|
|||
ExpectNoException: Signfies that no other error has happened. A NOP
|
||||
"""
|
||||
# Can send more messages then we're going to; so don't expect a rate limit error
|
||||
expected_exception = ExpectNoException if num_try_send <= num_allowed_send else ValueError
|
||||
expected_exception = (
|
||||
ExpectNoException if num_try_send <= num_allowed_send else ValueError
|
||||
)
|
||||
|
||||
list_of_messages = generate_list_of_messages(max(num_try_send, num_allowed_send))
|
||||
rpm, tpm = calculate_limits(list_of_messages[:num_allowed_send])
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue