fix(router.py): improve pre-call check -> get model group cache one-time

This commit is contained in:
Krrish Dholakia 2024-04-06 18:24:23 -07:00
parent 4512510d37
commit 6f94f3d127

View file

@ -2380,6 +2380,13 @@ class Router:
_context_window_error = False
_rate_limit_error = False
## get model group RPM ##
current_minute = datetime.now().strftime("%H-%M")
rpm_key = f"{model}:rpm:{current_minute}"
model_group_cache = (
self.cache.get_cache(key=rpm_key, local_only=True) or {}
) # check the redis + in-memory cache used by lowest_latency and usage-based routing. Only check the local cache.
for idx, deployment in enumerate(_returned_deployments):
# see if we have the info for this model
try:
@ -2414,12 +2421,7 @@ class Router:
current_request_cache_local = (
self.cache.get_cache(key=model_id, local_only=True) or 0
)
### get usage-based routing cache ###
current_minute = datetime.now().strftime("%H-%M")
rpm_key = f"{deployment['model_name']}:rpm:{current_minute}"
model_group_cache = (
self.cache.get_cache(key=rpm_key, local_only=True) or {}
) # check the redis + in-memory cache used by lowest_latency and usage-based routing. Only check the local cache.
### get usage based cache ###
model_group_cache[model_id] = model_group_cache.get(model_id, 0)
current_request = max(