mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
add routing strategy
This commit is contained in:
parent
2872e43fd8
commit
80f0878b88
1 changed files with 15 additions and 6 deletions
|
@ -28,13 +28,13 @@ class RouterConfig(BaseModel):
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest.fixture(scope="function")
|
||||||
def router_factory():
|
def router_factory():
|
||||||
def create_router(rpm, tpm):
|
def create_router(rpm, tpm, routing_strategy):
|
||||||
model_list = base_model_list.copy()
|
model_list = base_model_list.copy()
|
||||||
model_list[0]["rpm"] = rpm
|
model_list[0]["rpm"] = rpm
|
||||||
model_list[0]["tpm"] = tpm
|
model_list[0]["tpm"] = tpm
|
||||||
return Router(
|
return Router(
|
||||||
model_list=model_list,
|
model_list=model_list,
|
||||||
routing_strategy="usage-based-routing",
|
routing_strategy=routing_strategy,
|
||||||
debug_level="DEBUG",
|
debug_level="DEBUG",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -77,13 +77,22 @@ class ExpectNoException(Exception):
|
||||||
"num_try_send, num_allowed_send",
|
"num_try_send, num_allowed_send",
|
||||||
[
|
[
|
||||||
(2, 2), # sending as many as allowed, ExpectNoException
|
(2, 2), # sending as many as allowed, ExpectNoException
|
||||||
# (10, 10), # sending as many as allowed, ExpectNoException
|
(10, 10), # sending as many as allowed, ExpectNoException
|
||||||
(3, 2), # Sending more than allowed, ValueError
|
(3, 2), # Sending more than allowed, ValueError
|
||||||
# (10, 9), # Sending more than allowed, ValueError
|
(10, 9), # Sending more than allowed, ValueError
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@pytest.mark.parametrize("sync_mode", [True, False]) # Use parametrization for sync/async
|
@pytest.mark.parametrize("sync_mode", [True, False]) # Use parametrization for sync/async
|
||||||
def test_rate_limit(router_factory, num_try_send, num_allowed_send, sync_mode):
|
@pytest.mark.parametrize(
|
||||||
|
"routing_strategy",
|
||||||
|
[
|
||||||
|
"usage-based-routing",
|
||||||
|
# "simple-shuffle", # dont expect to rate limit
|
||||||
|
# "least-busy", # dont expect to rate limit
|
||||||
|
"latency-based-routing",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_rate_limit(router_factory, num_try_send, num_allowed_send, sync_mode, routing_strategy):
|
||||||
"""
|
"""
|
||||||
Check if router.completion and router.acompletion can send more messages than they've been limited to.
|
Check if router.completion and router.acompletion can send more messages than they've been limited to.
|
||||||
Args:
|
Args:
|
||||||
|
@ -101,7 +110,7 @@ def test_rate_limit(router_factory, num_try_send, num_allowed_send, sync_mode):
|
||||||
list_of_messages = generate_list_of_messages(max(num_try_send, num_allowed_send))
|
list_of_messages = generate_list_of_messages(max(num_try_send, num_allowed_send))
|
||||||
rpm, tpm = calculate_limits(list_of_messages[:num_allowed_send])
|
rpm, tpm = calculate_limits(list_of_messages[:num_allowed_send])
|
||||||
list_of_messages = list_of_messages[:num_try_send]
|
list_of_messages = list_of_messages[:num_try_send]
|
||||||
router = router_factory(rpm, tpm)
|
router = router_factory(rpm, tpm, routing_strategy)
|
||||||
|
|
||||||
with pytest.raises(expected_exception) as excinfo: # asserts correct type raised
|
with pytest.raises(expected_exception) as excinfo: # asserts correct type raised
|
||||||
if sync_mode:
|
if sync_mode:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue