diff --git a/litellm/__init__.py b/litellm/__init__.py
index e220c89920..4ee604aede 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -146,6 +146,9 @@ enable_caching_on_provider_specific_optional_params: bool = (
 caching: bool = (
     False  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
 )
+always_read_redis: bool = (
+    True  # always use redis for rate limiting logic on litellm proxy
+)
 caching_with_models: bool = (
     False  # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
 )
diff --git a/litellm/caching.py b/litellm/caching.py
index d34686c2ad..8a25230582 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -1777,6 +1777,7 @@ class DualCache(BaseCache):
         redis_cache: Optional[RedisCache] = None,
         default_in_memory_ttl: Optional[float] = None,
         default_redis_ttl: Optional[float] = None,
+        always_read_redis: Optional[bool] = False,
     ) -> None:
         super().__init__()
         # If in_memory_cache is not provided, use the default InMemoryCache
@@ -1788,6 +1789,7 @@ class DualCache(BaseCache):
             default_in_memory_ttl or litellm.default_in_memory_ttl
         )
         self.default_redis_ttl = default_redis_ttl or litellm.default_redis_ttl
+        self.always_read_redis = always_read_redis
 
     def update_cache_ttl(
         self, default_in_memory_ttl: Optional[float], default_redis_ttl: Optional[float]
@@ -1847,8 +1849,12 @@ class DualCache(BaseCache):
                 if in_memory_result is not None:
                     result = in_memory_result
 
-            if result is None and self.redis_cache is not None and local_only == False:
-                # If not found in in-memory cache, try fetching from Redis
+            if (
+                (self.always_read_redis is True)
+                and self.redis_cache is not None
+                and local_only == False
+            ):
+                # If not found in in-memory cache or always_read_redis is True, try fetching from Redis
                 redis_result = self.redis_cache.get_cache(key, **kwargs)
 
                 if redis_result is not None:
@@ -1911,8 +1917,12 @@ class DualCache(BaseCache):
                 if in_memory_result is not None:
                     result = in_memory_result
 
-            if result is None and self.redis_cache is not None and local_only == False:
-                # If not found in in-memory cache, try fetching from Redis
+            if (
+                (self.always_read_redis is True)
+                and self.redis_cache is not None
+                and local_only == False
+            ):
+                # If not found in in-memory cache or always_read_redis is True, try fetching from Redis
                 redis_result = await self.redis_cache.async_get_cache(key, **kwargs)
 
                 if redis_result is not None:
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index ea830c1363..2423fb105a 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -207,7 +207,7 @@ class ProxyLogging:
         self.call_details: dict = {}
         self.call_details["user_api_key_cache"] = user_api_key_cache
         self.internal_usage_cache = DualCache(
-            default_in_memory_ttl=1
+            default_in_memory_ttl=1, always_read_redis=litellm.always_read_redis
         )  # ping redis cache every 1s
         self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler(
             self.internal_usage_cache