fix(router.py): support pre_call_rpm_check for lowest_tpm_rpm_v2 routing

have routing strategies expose an ‘update rpm’ function; for checking + updating rpm pre call
This commit is contained in:
Krrish Dholakia 2024-04-12 18:25:14 -07:00
parent 2267aeb803
commit c03b0bbb24
3 changed files with 75 additions and 25 deletions

View file

@ -491,23 +491,16 @@ class Router:
deployment=deployment, kwargs=kwargs, client_type="rpm_client"
)
if rpm_semaphore is not None and isinstance(
rpm_semaphore, asyncio.Semaphore
if (
rpm_semaphore is not None
and isinstance(rpm_semaphore, asyncio.Semaphore)
and self.routing_strategy == "usage-based-routing-v2"
):
async with rpm_semaphore:
"""
- Check against in-memory tpm/rpm limits before making the call
- Check rpm limits before making the call
"""
dt = get_utc_datetime()
current_minute = dt.strftime("%H-%M")
id = kwargs["model_info"]["id"]
rpm_key = "{}:rpm:{}".format(id, current_minute)
curr_rpm = await self.cache.async_get_cache(key=rpm_key)
if (
curr_rpm is not None and curr_rpm >= data["rpm"]
): # >= b/c the initial count is 0
raise Exception("Rate Limit error")
await self.cache.async_increment_cache(key=rpm_key, value=1)
await self.lowesttpm_logger_v2.pre_call_rpm_check(deployment)
response = await _response
else:
response = await _response