forked from phoenix/litellm-mirror
Merge pull request #3358 from sumanth13131/usage-based-routing-RPM-fix
usage based routing RPM count fix
This commit is contained in:
commit
32534b5e91
3 changed files with 31 additions and 2 deletions
|
@ -206,7 +206,7 @@ class LowestTPMLoggingHandler(CustomLogger):
|
|||
if item_tpm + input_tokens > _deployment_tpm:
|
||||
continue
|
||||
elif (rpm_dict is not None and item in rpm_dict) and (
|
||||
rpm_dict[item] + 1 > _deployment_rpm
|
||||
rpm_dict[item] + 1 >= _deployment_rpm
|
||||
):
|
||||
continue
|
||||
elif item_tpm < lowest_tpm:
|
||||
|
|
|
@ -366,7 +366,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
|||
if item_tpm + input_tokens > _deployment_tpm:
|
||||
continue
|
||||
elif (rpm_dict is not None and item in rpm_dict) and (
|
||||
rpm_dict[item] + 1 > _deployment_rpm
|
||||
rpm_dict[item] + 1 >= _deployment_rpm
|
||||
):
|
||||
continue
|
||||
elif item_tpm == lowest_tpm:
|
||||
|
|
|
@ -264,3 +264,32 @@ async def test_acompletion_caching_on_router_caching_groups():
|
|||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
def test_rpm_limiting():
|
||||
try:
|
||||
litellm.set_verbose = True
|
||||
model_list = [
|
||||
{
|
||||
"model_name": "gpt-3.5-turbo",
|
||||
"litellm_params": {
|
||||
"model": "gpt-3.5-turbo",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
},
|
||||
"tpm": 10000,
|
||||
"rpm": 3,
|
||||
},
|
||||
]
|
||||
|
||||
router = Router(
|
||||
model_list = model_list,
|
||||
routing_strategy = "usage-based-routing",
|
||||
)
|
||||
failedCount = 0
|
||||
for i in range(10):
|
||||
try:
|
||||
response = router.completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": ""}])
|
||||
except ValueError as e:
|
||||
failedCount += 1
|
||||
assert failedCount == 7
|
||||
except Exception as e:
|
||||
pytest.fail(f"An exception occurred - {str(e)}")
|
Loading…
Add table
Add a link
Reference in a new issue