diff --git a/litellm/__init__.py b/litellm/__init__.py index 28f7a32b83..8b28ab80c2 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -147,6 +147,9 @@ enable_caching_on_provider_specific_optional_params: bool = ( caching: bool = ( False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 ) +always_read_redis: bool = ( + True # always use redis for rate limiting logic on litellm proxy +) caching_with_models: bool = ( False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648 ) diff --git a/litellm/caching.py b/litellm/caching.py index d34686c2ad..db2f93507d 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -1777,6 +1777,7 @@ class DualCache(BaseCache): redis_cache: Optional[RedisCache] = None, default_in_memory_ttl: Optional[float] = None, default_redis_ttl: Optional[float] = None, + always_read_redis: Optional[bool] = True, ) -> None: super().__init__() # If in_memory_cache is not provided, use the default InMemoryCache @@ -1788,6 +1789,7 @@ class DualCache(BaseCache): default_in_memory_ttl or litellm.default_in_memory_ttl ) self.default_redis_ttl = default_redis_ttl or litellm.default_redis_ttl + self.always_read_redis = always_read_redis def update_cache_ttl( self, default_in_memory_ttl: Optional[float], default_redis_ttl: Optional[float] @@ -1847,8 +1849,12 @@ class DualCache(BaseCache): if in_memory_result is not None: result = in_memory_result - if result is None and self.redis_cache is not None and local_only == False: - # If not found in in-memory cache, try fetching from Redis + if ( + (self.always_read_redis is True) + and self.redis_cache is not None + and local_only == False + ): + # If not found in in-memory cache or always_read_redis is True, try fetching from Redis redis_result = self.redis_cache.get_cache(key, **kwargs) if redis_result is not None: @@ -1911,8 +1917,12 @@ class DualCache(BaseCache): if in_memory_result is not None: result = in_memory_result - if result is None and self.redis_cache is not None and local_only == False: - # If not found in in-memory cache, try fetching from Redis + if ( + (self.always_read_redis is True) + and self.redis_cache is not None + and local_only == False + ): + # If not found in in-memory cache or always_read_redis is True, try fetching from Redis redis_result = await self.redis_cache.async_get_cache(key, **kwargs) if redis_result is not None: diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index ea830c1363..2423fb105a 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -207,7 +207,7 @@ class ProxyLogging: self.call_details: dict = {} self.call_details["user_api_key_cache"] = user_api_key_cache self.internal_usage_cache = DualCache( - default_in_memory_ttl=1 + default_in_memory_ttl=1, always_read_redis=litellm.always_read_redis ) # ping redis cache every 1s self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler( self.internal_usage_cache diff --git a/litellm/tests/test_caching.py b/litellm/tests/test_caching.py index 9b859b451a..a272d2dcf3 100644 --- a/litellm/tests/test_caching.py +++ b/litellm/tests/test_caching.py @@ -1958,3 +1958,42 @@ async def test_cache_default_off_acompletion(): ) print(f"Response4: {response4}") assert response3.id == response4.id + + +@pytest.mark.asyncio() +async def test_dual_cache_uses_redis(): + """ + + - Store diff values in redis and in memory cache + - call get cache + - Assert that value from redis is used + """ + litellm.set_verbose = True + from litellm.caching import DualCache, RedisCache + + current_usage = uuid.uuid4() + + _cache_obj = DualCache(redis_cache=RedisCache(), always_read_redis=True) + + # set cache + await _cache_obj.async_set_cache(key=f"current_usage: {current_usage}", value=10) + + # modify value of in memory cache + _cache_obj.in_memory_cache.cache_dict[f"current_usage: {current_usage}"] = 1 + + # get cache + value = await _cache_obj.async_get_cache(key=f"current_usage: {current_usage}") + print("value from dual cache", value) + assert value == 10 + + +@pytest.mark.asyncio() +async def test_proxy_logging_setup(): + """ + Assert always_read_redis is True when used by internal usage cache + """ + from litellm.caching import DualCache + from litellm.proxy.utils import ProxyLogging + + pl_obj = ProxyLogging(user_api_key_cache=DualCache()) + assert pl_obj.internal_usage_cache.always_read_redis is True diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index f2b3257619..538212dc35 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -23,7 +23,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler from litellm.llms.prompt_templates.factory import anthropic_messages_pt -# litellm.num_retries = 3 +# litellm.num_retries=3 litellm.cache = None litellm.success_callback = [] user_message = "Write a short poem about the sky"