diff --git a/litellm/router.py b/litellm/router.py
index e19afc3c0..e1f7efb43 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2380,6 +2380,13 @@ class Router:
 
         _context_window_error = False
         _rate_limit_error = False
+
+        ## get model group RPM ##
+        current_minute = datetime.now().strftime("%H-%M")
+        rpm_key = f"{model}:rpm:{current_minute}"
+        model_group_cache = (
+            self.cache.get_cache(key=rpm_key, local_only=True) or {}
+        )  # check the redis + in-memory cache used by lowest_latency and usage-based routing. Only check the local cache.
         for idx, deployment in enumerate(_returned_deployments):
             # see if we have the info for this model
             try:
@@ -2414,12 +2421,7 @@ class Router:
             current_request_cache_local = (
                 self.cache.get_cache(key=model_id, local_only=True) or 0
             )
-            ### get usage-based routing cache ###
-            current_minute = datetime.now().strftime("%H-%M")
-            rpm_key = f"{deployment['model_name']}:rpm:{current_minute}"
-            model_group_cache = (
-                self.cache.get_cache(key=rpm_key, local_only=True) or {}
-            )  # check the redis + in-memory cache used by lowest_latency and usage-based routing. Only check the local cache.
+            ### get usage based cache ###
             model_group_cache[model_id] = model_group_cache.get(model_id, 0)
 
             current_request = max(