Merge pull request #4366 from BerriAI/litellm_fix_in_mem_usage

[Fix-Proxy] Fix in memory caching memory leak
This commit is contained in:
Ishaan Jaff 2024-06-27 21:12:14 -07:00 committed by GitHub
commit f4521af1fd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 51 additions and 12 deletions

View file

@ -64,16 +64,55 @@ class BaseCache:
class InMemoryCache(BaseCache):
def __init__(self):
# if users don't provider one, use the default litellm cache
self.cache_dict = {}
self.ttl_dict = {}
def __init__(
self,
max_size_in_memory: Optional[int] = 200,
default_ttl: Optional[
int
] = 600, # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute
):
"""
max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default
"""
self.max_size_in_memory = (
max_size_in_memory or 200
) # set an upper bound of 200 items in-memory
self.default_ttl = default_ttl or 600
# in-memory cache
self.cache_dict: dict = {}
self.ttl_dict: dict = {}
def evict_cache(self):
"""
Eviction policy:
- check if any items in ttl_dict are expired -> remove them from ttl_dict and cache_dict
This guarantees the following:
- 1. When item ttl not set: At minimumm each item will remain in memory for 5 minutes
- 2. When ttl is set: the item will remain in memory for at least that amount of time
- 3. the size of in-memory cache is bounded
"""
for key in list(self.ttl_dict.keys()):
if time.time() > self.ttl_dict[key]:
self.cache_dict.pop(key, None)
self.ttl_dict.pop(key, None)
def set_cache(self, key, value, **kwargs):
print_verbose("InMemoryCache: set_cache")
print_verbose(
"InMemoryCache: set_cache. current size= {}".format(len(self.cache_dict))
)
if len(self.cache_dict) >= self.max_size_in_memory:
# only evict when cache is full
self.evict_cache()
self.cache_dict[key] = value
if "ttl" in kwargs:
self.ttl_dict[key] = time.time() + kwargs["ttl"]
else:
self.ttl_dict[key] = time.time() + self.default_ttl
async def async_set_cache(self, key, value, **kwargs):
self.set_cache(key=key, value=value, **kwargs)
@ -139,6 +178,7 @@ class InMemoryCache(BaseCache):
init_value = await self.async_get_cache(key=key) or 0
value = init_value + value
await self.async_set_cache(key, value, **kwargs)
return value
def flush_cache(self):

View file

@ -176,7 +176,6 @@ async def add_litellm_data_to_request(
def _add_otel_traceparent_to_data(data: dict, request: Request):
from litellm.proxy.proxy_server import open_telemetry_logger
if data is None:
return
if open_telemetry_logger is None:

12
poetry.lock generated
View file

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
[[package]]
name = "aiohttp"
@ -343,13 +343,13 @@ uvloop = ["uvloop (>=0.15.2)"]
[[package]]
name = "cachetools"
version = "5.3.3"
version = "5.3.1"
description = "Extensible memoizing collections and decorators"
optional = true
optional = false
python-versions = ">=3.7"
files = [
{file = "cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945"},
{file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"},
{file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"},
{file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"},
]
[[package]]
@ -3300,4 +3300,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<4.0, !=3.9.7"
content-hash = "f400d2f686954c2b12b0ee88546f31d52ebc8e323a3ec850dc46d74748d38cdf"
content-hash = "022481b965a1a6524cc25d52eff59592779aafdf03dc6159c834b9519079f549"