forked from phoenix/litellm-mirror
Merge pull request #4366 from BerriAI/litellm_fix_in_mem_usage
[Fix-Proxy] Fix in memory caching memory leak
This commit is contained in:
commit
f4521af1fd
3 changed files with 51 additions and 12 deletions
|
@ -64,16 +64,55 @@ class BaseCache:
|
||||||
|
|
||||||
|
|
||||||
class InMemoryCache(BaseCache):
|
class InMemoryCache(BaseCache):
|
||||||
def __init__(self):
|
def __init__(
|
||||||
# if users don't provider one, use the default litellm cache
|
self,
|
||||||
self.cache_dict = {}
|
max_size_in_memory: Optional[int] = 200,
|
||||||
self.ttl_dict = {}
|
default_ttl: Optional[
|
||||||
|
int
|
||||||
|
] = 600, # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default
|
||||||
|
"""
|
||||||
|
self.max_size_in_memory = (
|
||||||
|
max_size_in_memory or 200
|
||||||
|
) # set an upper bound of 200 items in-memory
|
||||||
|
self.default_ttl = default_ttl or 600
|
||||||
|
|
||||||
|
# in-memory cache
|
||||||
|
self.cache_dict: dict = {}
|
||||||
|
self.ttl_dict: dict = {}
|
||||||
|
|
||||||
|
def evict_cache(self):
|
||||||
|
"""
|
||||||
|
Eviction policy:
|
||||||
|
- check if any items in ttl_dict are expired -> remove them from ttl_dict and cache_dict
|
||||||
|
|
||||||
|
|
||||||
|
This guarantees the following:
|
||||||
|
- 1. When item ttl not set: At minimumm each item will remain in memory for 5 minutes
|
||||||
|
- 2. When ttl is set: the item will remain in memory for at least that amount of time
|
||||||
|
- 3. the size of in-memory cache is bounded
|
||||||
|
|
||||||
|
"""
|
||||||
|
for key in list(self.ttl_dict.keys()):
|
||||||
|
if time.time() > self.ttl_dict[key]:
|
||||||
|
self.cache_dict.pop(key, None)
|
||||||
|
self.ttl_dict.pop(key, None)
|
||||||
|
|
||||||
def set_cache(self, key, value, **kwargs):
|
def set_cache(self, key, value, **kwargs):
|
||||||
print_verbose("InMemoryCache: set_cache")
|
print_verbose(
|
||||||
|
"InMemoryCache: set_cache. current size= {}".format(len(self.cache_dict))
|
||||||
|
)
|
||||||
|
if len(self.cache_dict) >= self.max_size_in_memory:
|
||||||
|
# only evict when cache is full
|
||||||
|
self.evict_cache()
|
||||||
|
|
||||||
self.cache_dict[key] = value
|
self.cache_dict[key] = value
|
||||||
if "ttl" in kwargs:
|
if "ttl" in kwargs:
|
||||||
self.ttl_dict[key] = time.time() + kwargs["ttl"]
|
self.ttl_dict[key] = time.time() + kwargs["ttl"]
|
||||||
|
else:
|
||||||
|
self.ttl_dict[key] = time.time() + self.default_ttl
|
||||||
|
|
||||||
async def async_set_cache(self, key, value, **kwargs):
|
async def async_set_cache(self, key, value, **kwargs):
|
||||||
self.set_cache(key=key, value=value, **kwargs)
|
self.set_cache(key=key, value=value, **kwargs)
|
||||||
|
@ -139,6 +178,7 @@ class InMemoryCache(BaseCache):
|
||||||
init_value = await self.async_get_cache(key=key) or 0
|
init_value = await self.async_get_cache(key=key) or 0
|
||||||
value = init_value + value
|
value = init_value + value
|
||||||
await self.async_set_cache(key, value, **kwargs)
|
await self.async_set_cache(key, value, **kwargs)
|
||||||
|
|
||||||
return value
|
return value
|
||||||
|
|
||||||
def flush_cache(self):
|
def flush_cache(self):
|
||||||
|
|
|
@ -176,7 +176,6 @@ async def add_litellm_data_to_request(
|
||||||
|
|
||||||
def _add_otel_traceparent_to_data(data: dict, request: Request):
|
def _add_otel_traceparent_to_data(data: dict, request: Request):
|
||||||
from litellm.proxy.proxy_server import open_telemetry_logger
|
from litellm.proxy.proxy_server import open_telemetry_logger
|
||||||
|
|
||||||
if data is None:
|
if data is None:
|
||||||
return
|
return
|
||||||
if open_telemetry_logger is None:
|
if open_telemetry_logger is None:
|
||||||
|
|
12
poetry.lock
generated
12
poetry.lock
generated
|
@ -1,4 +1,4 @@
|
||||||
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aiohttp"
|
name = "aiohttp"
|
||||||
|
@ -343,13 +343,13 @@ uvloop = ["uvloop (>=0.15.2)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cachetools"
|
name = "cachetools"
|
||||||
version = "5.3.3"
|
version = "5.3.1"
|
||||||
description = "Extensible memoizing collections and decorators"
|
description = "Extensible memoizing collections and decorators"
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945"},
|
{file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"},
|
||||||
{file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"},
|
{file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -3300,4 +3300,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi-
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
python-versions = ">=3.8.1,<4.0, !=3.9.7"
|
||||||
content-hash = "f400d2f686954c2b12b0ee88546f31d52ebc8e323a3ec850dc46d74748d38cdf"
|
content-hash = "022481b965a1a6524cc25d52eff59592779aafdf03dc6159c834b9519079f549"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue