diff --git a/litellm/caching.py b/litellm/caching.py index 95cad01cf..64488289a 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -64,16 +64,55 @@ class BaseCache: class InMemoryCache(BaseCache): - def __init__(self): - # if users don't provider one, use the default litellm cache - self.cache_dict = {} - self.ttl_dict = {} + def __init__( + self, + max_size_in_memory: Optional[int] = 200, + default_ttl: Optional[ + int + ] = 600, # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute + ): + """ + max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default + """ + self.max_size_in_memory = ( + max_size_in_memory or 200 + ) # set an upper bound of 200 items in-memory + self.default_ttl = default_ttl or 600 + + # in-memory cache + self.cache_dict: dict = {} + self.ttl_dict: dict = {} + + def evict_cache(self): + """ + Eviction policy: + - check if any items in ttl_dict are expired -> remove them from ttl_dict and cache_dict + + + This guarantees the following: + - 1. When item ttl not set: At minimumm each item will remain in memory for 5 minutes + - 2. When ttl is set: the item will remain in memory for at least that amount of time + - 3. the size of in-memory cache is bounded + + """ + for key in list(self.ttl_dict.keys()): + if time.time() > self.ttl_dict[key]: + self.cache_dict.pop(key, None) + self.ttl_dict.pop(key, None) def set_cache(self, key, value, **kwargs): - print_verbose("InMemoryCache: set_cache") + print_verbose( + "InMemoryCache: set_cache. current size= {}".format(len(self.cache_dict)) + ) + if len(self.cache_dict) >= self.max_size_in_memory: + # only evict when cache is full + self.evict_cache() + self.cache_dict[key] = value if "ttl" in kwargs: self.ttl_dict[key] = time.time() + kwargs["ttl"] + else: + self.ttl_dict[key] = time.time() + self.default_ttl async def async_set_cache(self, key, value, **kwargs): self.set_cache(key=key, value=value, **kwargs) @@ -139,6 +178,7 @@ class InMemoryCache(BaseCache): init_value = await self.async_get_cache(key=key) or 0 value = init_value + value await self.async_set_cache(key, value, **kwargs) + return value def flush_cache(self): diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 673b027ca..aec6215ce 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -176,7 +176,6 @@ async def add_litellm_data_to_request( def _add_otel_traceparent_to_data(data: dict, request: Request): from litellm.proxy.proxy_server import open_telemetry_logger - if data is None: return if open_telemetry_logger is None: diff --git a/poetry.lock b/poetry.lock index 290d19f7a..88927576c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. [[package]] name = "aiohttp" @@ -343,13 +343,13 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "cachetools" -version = "5.3.3" +version = "5.3.1" description = "Extensible memoizing collections and decorators" -optional = true +optional = false python-versions = ">=3.7" files = [ - {file = "cachetools-5.3.3-py3-none-any.whl", hash = "sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945"}, - {file = "cachetools-5.3.3.tar.gz", hash = "sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105"}, + {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"}, + {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"}, ] [[package]] @@ -3300,4 +3300,4 @@ proxy = ["PyJWT", "apscheduler", "backoff", "cryptography", "fastapi", "fastapi- [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0, !=3.9.7" -content-hash = "f400d2f686954c2b12b0ee88546f31d52ebc8e323a3ec850dc46d74748d38cdf" +content-hash = "022481b965a1a6524cc25d52eff59592779aafdf03dc6159c834b9519079f549"