Merge pull request #5484 from BerriAI/litellm_fix_redis

fix proxy server - always read redis for rate limiting logic
This commit is contained in:
Ishaan Jaff 2024-09-04 12:39:15 -07:00 committed by GitHub
commit 4e392ebdb2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 58 additions and 6 deletions

View file

@ -147,6 +147,9 @@ enable_caching_on_provider_specific_optional_params: bool = (
caching: bool = (
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
)
always_read_redis: bool = (
True # always use redis for rate limiting logic on litellm proxy
)
caching_with_models: bool = (
False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
)

View file

@ -1777,6 +1777,7 @@ class DualCache(BaseCache):
redis_cache: Optional[RedisCache] = None,
default_in_memory_ttl: Optional[float] = None,
default_redis_ttl: Optional[float] = None,
always_read_redis: Optional[bool] = True,
) -> None:
super().__init__()
# If in_memory_cache is not provided, use the default InMemoryCache
@ -1788,6 +1789,7 @@ class DualCache(BaseCache):
default_in_memory_ttl or litellm.default_in_memory_ttl
)
self.default_redis_ttl = default_redis_ttl or litellm.default_redis_ttl
self.always_read_redis = always_read_redis
def update_cache_ttl(
self, default_in_memory_ttl: Optional[float], default_redis_ttl: Optional[float]
@ -1847,8 +1849,12 @@ class DualCache(BaseCache):
if in_memory_result is not None:
result = in_memory_result
if result is None and self.redis_cache is not None and local_only == False:
# If not found in in-memory cache, try fetching from Redis
if (
(self.always_read_redis is True)
and self.redis_cache is not None
and local_only == False
):
# If not found in in-memory cache or always_read_redis is True, try fetching from Redis
redis_result = self.redis_cache.get_cache(key, **kwargs)
if redis_result is not None:
@ -1911,8 +1917,12 @@ class DualCache(BaseCache):
if in_memory_result is not None:
result = in_memory_result
if result is None and self.redis_cache is not None and local_only == False:
# If not found in in-memory cache, try fetching from Redis
if (
(self.always_read_redis is True)
and self.redis_cache is not None
and local_only == False
):
# If not found in in-memory cache or always_read_redis is True, try fetching from Redis
redis_result = await self.redis_cache.async_get_cache(key, **kwargs)
if redis_result is not None:

View file

@ -207,7 +207,7 @@ class ProxyLogging:
self.call_details: dict = {}
self.call_details["user_api_key_cache"] = user_api_key_cache
self.internal_usage_cache = DualCache(
default_in_memory_ttl=1
default_in_memory_ttl=1, always_read_redis=litellm.always_read_redis
) # ping redis cache every 1s
self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler(
self.internal_usage_cache

View file

@ -1958,3 +1958,42 @@ async def test_cache_default_off_acompletion():
)
print(f"Response4: {response4}")
assert response3.id == response4.id
@pytest.mark.asyncio()
async def test_dual_cache_uses_redis():
"""
- Store diff values in redis and in memory cache
- call get cache
- Assert that value from redis is used
"""
litellm.set_verbose = True
from litellm.caching import DualCache, RedisCache
current_usage = uuid.uuid4()
_cache_obj = DualCache(redis_cache=RedisCache(), always_read_redis=True)
# set cache
await _cache_obj.async_set_cache(key=f"current_usage: {current_usage}", value=10)
# modify value of in memory cache
_cache_obj.in_memory_cache.cache_dict[f"current_usage: {current_usage}"] = 1
# get cache
value = await _cache_obj.async_get_cache(key=f"current_usage: {current_usage}")
print("value from dual cache", value)
assert value == 10
@pytest.mark.asyncio()
async def test_proxy_logging_setup():
"""
Assert always_read_redis is True when used by internal usage cache
"""
from litellm.caching import DualCache
from litellm.proxy.utils import ProxyLogging
pl_obj = ProxyLogging(user_api_key_cache=DualCache())
assert pl_obj.internal_usage_cache.always_read_redis is True