forked from phoenix/litellm-mirror
Merge pull request #3358 from sumanth13131/usage-based-routing-RPM-fix
usage based routing RPM count fix
This commit is contained in:
commit
32534b5e91
3 changed files with 31 additions and 2 deletions
|
@ -206,7 +206,7 @@ class LowestTPMLoggingHandler(CustomLogger):
|
||||||
if item_tpm + input_tokens > _deployment_tpm:
|
if item_tpm + input_tokens > _deployment_tpm:
|
||||||
continue
|
continue
|
||||||
elif (rpm_dict is not None and item in rpm_dict) and (
|
elif (rpm_dict is not None and item in rpm_dict) and (
|
||||||
rpm_dict[item] + 1 > _deployment_rpm
|
rpm_dict[item] + 1 >= _deployment_rpm
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
elif item_tpm < lowest_tpm:
|
elif item_tpm < lowest_tpm:
|
||||||
|
|
|
@ -366,7 +366,7 @@ class LowestTPMLoggingHandler_v2(CustomLogger):
|
||||||
if item_tpm + input_tokens > _deployment_tpm:
|
if item_tpm + input_tokens > _deployment_tpm:
|
||||||
continue
|
continue
|
||||||
elif (rpm_dict is not None and item in rpm_dict) and (
|
elif (rpm_dict is not None and item in rpm_dict) and (
|
||||||
rpm_dict[item] + 1 > _deployment_rpm
|
rpm_dict[item] + 1 >= _deployment_rpm
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
elif item_tpm == lowest_tpm:
|
elif item_tpm == lowest_tpm:
|
||||||
|
|
|
@ -264,3 +264,32 @@ async def test_acompletion_caching_on_router_caching_groups():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
traceback.print_exc()
|
traceback.print_exc()
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
def test_rpm_limiting():
|
||||||
|
try:
|
||||||
|
litellm.set_verbose = True
|
||||||
|
model_list = [
|
||||||
|
{
|
||||||
|
"model_name": "gpt-3.5-turbo",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-3.5-turbo",
|
||||||
|
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||||
|
},
|
||||||
|
"tpm": 10000,
|
||||||
|
"rpm": 3,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
router = Router(
|
||||||
|
model_list = model_list,
|
||||||
|
routing_strategy = "usage-based-routing",
|
||||||
|
)
|
||||||
|
failedCount = 0
|
||||||
|
for i in range(10):
|
||||||
|
try:
|
||||||
|
response = router.completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": ""}])
|
||||||
|
except ValueError as e:
|
||||||
|
failedCount += 1
|
||||||
|
assert failedCount == 7
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"An exception occurred - {str(e)}")
|
Loading…
Add table
Add a link
Reference in a new issue