forked from phoenix/litellm-mirror
fix(router.py): improve pre-call check -> get model group cache one-time
This commit is contained in:
parent
4512510d37
commit
6f94f3d127
1 changed files with 8 additions and 6 deletions
|
@ -2380,6 +2380,13 @@ class Router:
|
|||
|
||||
_context_window_error = False
|
||||
_rate_limit_error = False
|
||||
|
||||
## get model group RPM ##
|
||||
current_minute = datetime.now().strftime("%H-%M")
|
||||
rpm_key = f"{model}:rpm:{current_minute}"
|
||||
model_group_cache = (
|
||||
self.cache.get_cache(key=rpm_key, local_only=True) or {}
|
||||
) # check the redis + in-memory cache used by lowest_latency and usage-based routing. Only check the local cache.
|
||||
for idx, deployment in enumerate(_returned_deployments):
|
||||
# see if we have the info for this model
|
||||
try:
|
||||
|
@ -2414,12 +2421,7 @@ class Router:
|
|||
current_request_cache_local = (
|
||||
self.cache.get_cache(key=model_id, local_only=True) or 0
|
||||
)
|
||||
### get usage-based routing cache ###
|
||||
current_minute = datetime.now().strftime("%H-%M")
|
||||
rpm_key = f"{deployment['model_name']}:rpm:{current_minute}"
|
||||
model_group_cache = (
|
||||
self.cache.get_cache(key=rpm_key, local_only=True) or {}
|
||||
) # check the redis + in-memory cache used by lowest_latency and usage-based routing. Only check the local cache.
|
||||
### get usage based cache ###
|
||||
model_group_cache[model_id] = model_group_cache.get(model_id, 0)
|
||||
|
||||
current_request = max(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue