forked from phoenix/litellm-mirror
Merge pull request #3192 from BerriAI/litellm_calculate_max_parallel_requests
fix(router.py): Make TPM limits concurrency-safe
This commit is contained in:
commit
fcde3ba213
7 changed files with 208 additions and 19 deletions
|
@ -5395,6 +5395,49 @@ def get_optional_params(
|
|||
return optional_params
|
||||
|
||||
|
||||
def calculate_max_parallel_requests(
|
||||
max_parallel_requests: Optional[int],
|
||||
rpm: Optional[int],
|
||||
tpm: Optional[int],
|
||||
default_max_parallel_requests: Optional[int],
|
||||
) -> Optional[int]:
|
||||
"""
|
||||
Returns the max parallel requests to send to a deployment.
|
||||
|
||||
Used in semaphore for async requests on router.
|
||||
|
||||
Parameters:
|
||||
- max_parallel_requests - Optional[int] - max_parallel_requests allowed for that deployment
|
||||
- rpm - Optional[int] - requests per minute allowed for that deployment
|
||||
- tpm - Optional[int] - tokens per minute allowed for that deployment
|
||||
- default_max_parallel_requests - Optional[int] - default_max_parallel_requests allowed for any deployment
|
||||
|
||||
Returns:
|
||||
- int or None (if all params are None)
|
||||
|
||||
Order:
|
||||
max_parallel_requests > rpm > tpm / 6 (azure formula) > default max_parallel_requests
|
||||
|
||||
Azure RPM formula:
|
||||
6 rpm per 1000 TPM
|
||||
https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits
|
||||
|
||||
|
||||
"""
|
||||
if max_parallel_requests is not None:
|
||||
return max_parallel_requests
|
||||
elif rpm is not None:
|
||||
return rpm
|
||||
elif tpm is not None:
|
||||
calculated_rpm = int(tpm / 1000 / 6)
|
||||
if calculated_rpm == 0:
|
||||
calculated_rpm = 1
|
||||
return calculated_rpm
|
||||
elif default_max_parallel_requests is not None:
|
||||
return default_max_parallel_requests
|
||||
return None
|
||||
|
||||
|
||||
def get_api_base(model: str, optional_params: dict) -> Optional[str]:
|
||||
"""
|
||||
Returns the api base used for calling the model.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue