forked from phoenix/litellm-mirror
Merge pull request #4366 from BerriAI/litellm_fix_in_mem_usage
[Fix-Proxy] Fix in memory caching memory leak
This commit is contained in:
commit
f4521af1fd
3 changed files with 51 additions and 12 deletions
|
@ -64,16 +64,55 @@ class BaseCache:
|
|||
|
||||
|
||||
class InMemoryCache(BaseCache):
|
||||
def __init__(self):
|
||||
# if users don't provider one, use the default litellm cache
|
||||
self.cache_dict = {}
|
||||
self.ttl_dict = {}
|
||||
def __init__(
|
||||
self,
|
||||
max_size_in_memory: Optional[int] = 200,
|
||||
default_ttl: Optional[
|
||||
int
|
||||
] = 600, # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute
|
||||
):
|
||||
"""
|
||||
max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default
|
||||
"""
|
||||
self.max_size_in_memory = (
|
||||
max_size_in_memory or 200
|
||||
) # set an upper bound of 200 items in-memory
|
||||
self.default_ttl = default_ttl or 600
|
||||
|
||||
# in-memory cache
|
||||
self.cache_dict: dict = {}
|
||||
self.ttl_dict: dict = {}
|
||||
|
||||
def evict_cache(self):
|
||||
"""
|
||||
Eviction policy:
|
||||
- check if any items in ttl_dict are expired -> remove them from ttl_dict and cache_dict
|
||||
|
||||
|
||||
This guarantees the following:
|
||||
- 1. When item ttl not set: At minimumm each item will remain in memory for 5 minutes
|
||||
- 2. When ttl is set: the item will remain in memory for at least that amount of time
|
||||
- 3. the size of in-memory cache is bounded
|
||||
|
||||
"""
|
||||
for key in list(self.ttl_dict.keys()):
|
||||
if time.time() > self.ttl_dict[key]:
|
||||
self.cache_dict.pop(key, None)
|
||||
self.ttl_dict.pop(key, None)
|
||||
|
||||
def set_cache(self, key, value, **kwargs):
|
||||
print_verbose("InMemoryCache: set_cache")
|
||||
print_verbose(
|
||||
"InMemoryCache: set_cache. current size= {}".format(len(self.cache_dict))
|
||||
)
|
||||
if len(self.cache_dict) >= self.max_size_in_memory:
|
||||
# only evict when cache is full
|
||||
self.evict_cache()
|
||||
|
||||
self.cache_dict[key] = value
|
||||
if "ttl" in kwargs:
|
||||
self.ttl_dict[key] = time.time() + kwargs["ttl"]
|
||||
else:
|
||||
self.ttl_dict[key] = time.time() + self.default_ttl
|
||||
|
||||
async def async_set_cache(self, key, value, **kwargs):
|
||||
self.set_cache(key=key, value=value, **kwargs)
|
||||
|
@ -139,6 +178,7 @@ class InMemoryCache(BaseCache):
|
|||
init_value = await self.async_get_cache(key=key) or 0
|
||||
value = init_value + value
|
||||
await self.async_set_cache(key, value, **kwargs)
|
||||
|
||||
return value
|
||||
|
||||
def flush_cache(self):
|
||||
|
|
|
@ -176,7 +176,6 @@ async def add_litellm_data_to_request(
|
|||
|
||||
def _add_otel_traceparent_to_data(data: dict, request: Request):
|
||||
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||
|
||||
if data is None:
|
||||
return
|
||||
if open_telemetry_logger is None:
|
||||
|
|
12
poetry.lock
generated
12
poetry.lock
generated
|
@ -1,4 +1,4 @@
|
|||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
|
||||
|
||||
[[package]]
|
||||
name = "aiohttp"
|
||||
|
@ -343,13 +343,13 @@ uvloop = ["uvloop (>=0.15.2)"]
|
|||
|
||||
[[package]]
|
||||
name = "cachetools"
|
||||
version = "5.3.3"
|
||||
version = "5.3.1"
|
||||
description = "Extensible memoizing collections and decorators"
|
||||
optional = true
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945"},
|
||||
{file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"},
|
||||
{file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"},
|
||||
{file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3300,4 +3300,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
||||
content-hash = "f400d2f686954c2b12b0ee88546f31d52ebc8e323a3ec850dc46d74748d38cdf"
|
||||
content-hash = "022481b965a1a6524cc25d52eff59592779aafdf03dc6159c834b9519079f549"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue