Improve rpm check on keys (#8301)

* fix(parallel_request_limiter.py): initial commit that solves the rpm limit check on keys

Fixes https://github.com/BerriAI/litellm/issues/6938

* fix(parallel_request_limiter.py): simpler approach - just increment RPM in pre call hook instead of on success

* fix(parallel_request_limiter.py): pass testing

* fix: fix linting error

* fix(parallel_request_limiter.py): fix parallel request check for keys
This commit is contained in:
Krish Dholakia 2025-02-05 20:23:08 -08:00 committed by GitHub
parent 7e873538f6
commit b4e5c0de69
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 92 additions and 140 deletions

View file

@ -414,7 +414,9 @@ def bytes_to_mb(bytes_value: int):
# helpers used by parallel request limiter to handle model rpm/tpm limits for a given api key
def get_key_model_rpm_limit(user_api_key_dict: UserAPIKeyAuth) -> Optional[dict]:
def get_key_model_rpm_limit(
user_api_key_dict: UserAPIKeyAuth,
) -> Optional[Dict[str, int]]:
if user_api_key_dict.metadata:
if "model_rpm_limit" in user_api_key_dict.metadata:
return user_api_key_dict.metadata["model_rpm_limit"]
@ -428,7 +430,9 @@ def get_key_model_rpm_limit(user_api_key_dict: UserAPIKeyAuth) -> Optional[dict]
return None
def get_key_model_tpm_limit(user_api_key_dict: UserAPIKeyAuth) -> Optional[dict]:
def get_key_model_tpm_limit(
user_api_key_dict: UserAPIKeyAuth,
) -> Optional[Dict[str, int]]:
if user_api_key_dict.metadata:
if "model_tpm_limit" in user_api_key_dict.metadata:
return user_api_key_dict.metadata["model_tpm_limit"]