Merge pull request #5484 from BerriAI/litellm_fix_redis

fix proxy server - always read redis for rate limiting logic
2025-04-27 11:43:54 +00:00 · 2024-09-04 12:39:15 -07:00 · 2024-09-04 12:39:15 -07:00 · 4e392ebdb2
commit 4e392ebdb2
parent f8bcd72994 9c573cda0a
5 changed files with 58 additions and 6 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -147,6 +147,9 @@ enable_caching_on_provider_specific_optional_params: bool = (
 caching: bool = (
    False  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
 )
 always_read_redis: bool = (
    True  # always use redis for rate limiting logic on litellm proxy
 )
 caching_with_models: bool = (
    False  # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
 )
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -1777,6 +1777,7 @@ class DualCache(BaseCache):
        redis_cache: Optional[RedisCache] = None,
        default_in_memory_ttl: Optional[float] = None,
        default_redis_ttl: Optional[float] = None,
        always_read_redis: Optional[bool] = True,
    ) -> None:
        super().__init__()
        # If in_memory_cache is not provided, use the default InMemoryCache
@ -1788,6 +1789,7 @@ class DualCache(BaseCache):
            default_in_memory_ttl or litellm.default_in_memory_ttl
        )
        self.default_redis_ttl = default_redis_ttl or litellm.default_redis_ttl
        self.always_read_redis = always_read_redis
    def update_cache_ttl(
        self, default_in_memory_ttl: Optional[float], default_redis_ttl: Optional[float]
@ -1847,8 +1849,12 @@ class DualCache(BaseCache):
                if in_memory_result is not None:
                    result = in_memory_result
-            if result is None and self.redis_cache is not None and local_only == False:
+            if (
-                # If not found in in-memory cache, try fetching from Redis
+                (self.always_read_redis is True)
                and self.redis_cache is not None
                and local_only == False
            ):
                # If not found in in-memory cache or always_read_redis is True, try fetching from Redis
                redis_result = self.redis_cache.get_cache(key, **kwargs)
                if redis_result is not None:
@ -1911,8 +1917,12 @@ class DualCache(BaseCache):
                if in_memory_result is not None:
                    result = in_memory_result
-            if result is None and self.redis_cache is not None and local_only == False:
+            if (
-                # If not found in in-memory cache, try fetching from Redis
+                (self.always_read_redis is True)
                and self.redis_cache is not None
                and local_only == False
            ):
                # If not found in in-memory cache or always_read_redis is True, try fetching from Redis
                redis_result = await self.redis_cache.async_get_cache(key, **kwargs)
                if redis_result is not None:
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@ -207,7 +207,7 @@ class ProxyLogging:
        self.call_details: dict = {}
        self.call_details["user_api_key_cache"] = user_api_key_cache
        self.internal_usage_cache = DualCache(
-            default_in_memory_ttl=1
+            default_in_memory_ttl=1, always_read_redis=litellm.always_read_redis
        )  # ping redis cache every 1s
        self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler(
            self.internal_usage_cache
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -1958,3 +1958,42 @@ async def test_cache_default_off_acompletion():
    )
    print(f"Response4: {response4}")
    assert response3.id == response4.id
@pytest.mark.asyncio()
 async def test_dual_cache_uses_redis():
    """
    - Store diff values in redis and in memory cache
    - call get cache
    - Assert that value from redis is used
    """
    litellm.set_verbose = True
    from litellm.caching import DualCache, RedisCache
    current_usage = uuid.uuid4()
    _cache_obj = DualCache(redis_cache=RedisCache(), always_read_redis=True)
    # set cache
    await _cache_obj.async_set_cache(key=f"current_usage: {current_usage}", value=10)
    # modify value of in memory cache
    _cache_obj.in_memory_cache.cache_dict[f"current_usage: {current_usage}"] = 1
    # get cache
    value = await _cache_obj.async_get_cache(key=f"current_usage: {current_usage}")
    print("value from dual cache", value)
    assert value == 10
@pytest.mark.asyncio()
 async def test_proxy_logging_setup():
    """
    Assert always_read_redis is True when used by internal usage cache
    """
    from litellm.caching import DualCache
    from litellm.proxy.utils import ProxyLogging
    pl_obj = ProxyLogging(user_api_key_cache=DualCache())
    assert pl_obj.internal_usage_cache.always_read_redis is True
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -23,7 +23,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.llms.prompt_templates.factory import anthropic_messages_pt
-# litellm.num_retries = 3
+# litellm.num_retries=3
 litellm.cache = None
 litellm.success_callback = []
 user_message = "Write a short poem about the sky"