mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
Merge pull request #5484 from BerriAI/litellm_fix_redis
fix proxy server - always read redis for rate limiting logic
This commit is contained in:
commit
4e392ebdb2
5 changed files with 58 additions and 6 deletions
|
@ -147,6 +147,9 @@ enable_caching_on_provider_specific_optional_params: bool = (
|
||||||
caching: bool = (
|
caching: bool = (
|
||||||
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||||
)
|
)
|
||||||
|
always_read_redis: bool = (
|
||||||
|
True # always use redis for rate limiting logic on litellm proxy
|
||||||
|
)
|
||||||
caching_with_models: bool = (
|
caching_with_models: bool = (
|
||||||
False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
|
||||||
)
|
)
|
||||||
|
|
|
@ -1777,6 +1777,7 @@ class DualCache(BaseCache):
|
||||||
redis_cache: Optional[RedisCache] = None,
|
redis_cache: Optional[RedisCache] = None,
|
||||||
default_in_memory_ttl: Optional[float] = None,
|
default_in_memory_ttl: Optional[float] = None,
|
||||||
default_redis_ttl: Optional[float] = None,
|
default_redis_ttl: Optional[float] = None,
|
||||||
|
always_read_redis: Optional[bool] = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
super().__init__()
|
super().__init__()
|
||||||
# If in_memory_cache is not provided, use the default InMemoryCache
|
# If in_memory_cache is not provided, use the default InMemoryCache
|
||||||
|
@ -1788,6 +1789,7 @@ class DualCache(BaseCache):
|
||||||
default_in_memory_ttl or litellm.default_in_memory_ttl
|
default_in_memory_ttl or litellm.default_in_memory_ttl
|
||||||
)
|
)
|
||||||
self.default_redis_ttl = default_redis_ttl or litellm.default_redis_ttl
|
self.default_redis_ttl = default_redis_ttl or litellm.default_redis_ttl
|
||||||
|
self.always_read_redis = always_read_redis
|
||||||
|
|
||||||
def update_cache_ttl(
|
def update_cache_ttl(
|
||||||
self, default_in_memory_ttl: Optional[float], default_redis_ttl: Optional[float]
|
self, default_in_memory_ttl: Optional[float], default_redis_ttl: Optional[float]
|
||||||
|
@ -1847,8 +1849,12 @@ class DualCache(BaseCache):
|
||||||
if in_memory_result is not None:
|
if in_memory_result is not None:
|
||||||
result = in_memory_result
|
result = in_memory_result
|
||||||
|
|
||||||
if result is None and self.redis_cache is not None and local_only == False:
|
if (
|
||||||
# If not found in in-memory cache, try fetching from Redis
|
(self.always_read_redis is True)
|
||||||
|
and self.redis_cache is not None
|
||||||
|
and local_only == False
|
||||||
|
):
|
||||||
|
# If not found in in-memory cache or always_read_redis is True, try fetching from Redis
|
||||||
redis_result = self.redis_cache.get_cache(key, **kwargs)
|
redis_result = self.redis_cache.get_cache(key, **kwargs)
|
||||||
|
|
||||||
if redis_result is not None:
|
if redis_result is not None:
|
||||||
|
@ -1911,8 +1917,12 @@ class DualCache(BaseCache):
|
||||||
if in_memory_result is not None:
|
if in_memory_result is not None:
|
||||||
result = in_memory_result
|
result = in_memory_result
|
||||||
|
|
||||||
if result is None and self.redis_cache is not None and local_only == False:
|
if (
|
||||||
# If not found in in-memory cache, try fetching from Redis
|
(self.always_read_redis is True)
|
||||||
|
and self.redis_cache is not None
|
||||||
|
and local_only == False
|
||||||
|
):
|
||||||
|
# If not found in in-memory cache or always_read_redis is True, try fetching from Redis
|
||||||
redis_result = await self.redis_cache.async_get_cache(key, **kwargs)
|
redis_result = await self.redis_cache.async_get_cache(key, **kwargs)
|
||||||
|
|
||||||
if redis_result is not None:
|
if redis_result is not None:
|
||||||
|
|
|
@ -207,7 +207,7 @@ class ProxyLogging:
|
||||||
self.call_details: dict = {}
|
self.call_details: dict = {}
|
||||||
self.call_details["user_api_key_cache"] = user_api_key_cache
|
self.call_details["user_api_key_cache"] = user_api_key_cache
|
||||||
self.internal_usage_cache = DualCache(
|
self.internal_usage_cache = DualCache(
|
||||||
default_in_memory_ttl=1
|
default_in_memory_ttl=1, always_read_redis=litellm.always_read_redis
|
||||||
) # ping redis cache every 1s
|
) # ping redis cache every 1s
|
||||||
self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler(
|
self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler(
|
||||||
self.internal_usage_cache
|
self.internal_usage_cache
|
||||||
|
|
|
@ -1958,3 +1958,42 @@ async def test_cache_default_off_acompletion():
|
||||||
)
|
)
|
||||||
print(f"Response4: {response4}")
|
print(f"Response4: {response4}")
|
||||||
assert response3.id == response4.id
|
assert response3.id == response4.id
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_dual_cache_uses_redis():
|
||||||
|
"""
|
||||||
|
|
||||||
|
- Store diff values in redis and in memory cache
|
||||||
|
- call get cache
|
||||||
|
- Assert that value from redis is used
|
||||||
|
"""
|
||||||
|
litellm.set_verbose = True
|
||||||
|
from litellm.caching import DualCache, RedisCache
|
||||||
|
|
||||||
|
current_usage = uuid.uuid4()
|
||||||
|
|
||||||
|
_cache_obj = DualCache(redis_cache=RedisCache(), always_read_redis=True)
|
||||||
|
|
||||||
|
# set cache
|
||||||
|
await _cache_obj.async_set_cache(key=f"current_usage: {current_usage}", value=10)
|
||||||
|
|
||||||
|
# modify value of in memory cache
|
||||||
|
_cache_obj.in_memory_cache.cache_dict[f"current_usage: {current_usage}"] = 1
|
||||||
|
|
||||||
|
# get cache
|
||||||
|
value = await _cache_obj.async_get_cache(key=f"current_usage: {current_usage}")
|
||||||
|
print("value from dual cache", value)
|
||||||
|
assert value == 10
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_proxy_logging_setup():
|
||||||
|
"""
|
||||||
|
Assert always_read_redis is True when used by internal usage cache
|
||||||
|
"""
|
||||||
|
from litellm.caching import DualCache
|
||||||
|
from litellm.proxy.utils import ProxyLogging
|
||||||
|
|
||||||
|
pl_obj = ProxyLogging(user_api_key_cache=DualCache())
|
||||||
|
assert pl_obj.internal_usage_cache.always_read_redis is True
|
||||||
|
|
|
@ -23,7 +23,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
|
||||||
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
||||||
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
from litellm.llms.prompt_templates.factory import anthropic_messages_pt
|
||||||
|
|
||||||
# litellm.num_retries = 3
|
# litellm.num_retries=3
|
||||||
litellm.cache = None
|
litellm.cache = None
|
||||||
litellm.success_callback = []
|
litellm.success_callback = []
|
||||||
user_message = "Write a short poem about the sky"
|
user_message = "Write a short poem about the sky"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue