mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
fix(in_memory_cache.py): add max value limits to in-memory cache. Prevents OOM errors in prod
This commit is contained in:
parent
068102233c
commit
93d5c8537d
2 changed files with 66 additions and 1 deletions
|
@ -10,7 +10,7 @@ Has 4 methods:
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
from typing import List, Optional
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
from .base_cache import BaseCache
|
from .base_cache import BaseCache
|
||||||
|
|
||||||
|
@ -30,11 +30,40 @@ class InMemoryCache(BaseCache):
|
||||||
max_size_in_memory or 200
|
max_size_in_memory or 200
|
||||||
) # set an upper bound of 200 items in-memory
|
) # set an upper bound of 200 items in-memory
|
||||||
self.default_ttl = default_ttl or 600
|
self.default_ttl = default_ttl or 600
|
||||||
|
self.max_size_per_item = 1024 # 1MB = 1024KB
|
||||||
|
|
||||||
# in-memory cache
|
# in-memory cache
|
||||||
self.cache_dict: dict = {}
|
self.cache_dict: dict = {}
|
||||||
self.ttl_dict: dict = {}
|
self.ttl_dict: dict = {}
|
||||||
|
|
||||||
|
def check_value_size(self, value: Any):
|
||||||
|
"""
|
||||||
|
Check if value size exceeds max_size_per_item (1MB)
|
||||||
|
Returns True if value size is acceptable, False otherwise
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Handle special types
|
||||||
|
if hasattr(value, "model_dump"): # Pydantic v2
|
||||||
|
value = value.model_dump()
|
||||||
|
elif hasattr(value, "dict"): # Pydantic v1
|
||||||
|
value = value.dict()
|
||||||
|
elif hasattr(value, "isoformat"): # datetime objects
|
||||||
|
value = value.isoformat()
|
||||||
|
|
||||||
|
# Convert value to JSON string to get a consistent size measurement
|
||||||
|
if not isinstance(value, (str, bytes)):
|
||||||
|
value = json.dumps(
|
||||||
|
value, default=str
|
||||||
|
) # default=str handles any remaining datetime objects
|
||||||
|
|
||||||
|
# Get size in KB (1KB = 1024 bytes)
|
||||||
|
value_size = len(str(value).encode("utf-8")) / 1024
|
||||||
|
|
||||||
|
return value_size <= self.max_size_per_item
|
||||||
|
except Exception:
|
||||||
|
# If we can't measure the size, assume it's too large
|
||||||
|
return False
|
||||||
|
|
||||||
def evict_cache(self):
|
def evict_cache(self):
|
||||||
"""
|
"""
|
||||||
Eviction policy:
|
Eviction policy:
|
||||||
|
@ -61,6 +90,8 @@ class InMemoryCache(BaseCache):
|
||||||
if len(self.cache_dict) >= self.max_size_in_memory:
|
if len(self.cache_dict) >= self.max_size_in_memory:
|
||||||
# only evict when cache is full
|
# only evict when cache is full
|
||||||
self.evict_cache()
|
self.evict_cache()
|
||||||
|
if not self.check_value_size(value):
|
||||||
|
return
|
||||||
|
|
||||||
self.cache_dict[key] = value
|
self.cache_dict[key] = value
|
||||||
if "ttl" in kwargs and kwargs["ttl"] is not None:
|
if "ttl" in kwargs and kwargs["ttl"] is not None:
|
||||||
|
|
34
tests/litellm/caching/test_in_memory_cache.py
Normal file
34
tests/litellm/caching/test_in_memory_cache.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import pytest
|
||||||
|
import respx
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
sys.path.insert(
|
||||||
|
0, os.path.abspath("../../..")
|
||||||
|
) # Adds the parent directory to the system path
|
||||||
|
from unittest.mock import AsyncMock
|
||||||
|
|
||||||
|
from litellm.caching.in_memory_cache import InMemoryCache
|
||||||
|
|
||||||
|
|
||||||
|
def test_in_memory_openai_obj_cache():
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
openai_obj = OpenAI(api_key="my-fake-key")
|
||||||
|
|
||||||
|
in_memory_cache = InMemoryCache()
|
||||||
|
|
||||||
|
in_memory_cache.set_cache(key="my-fake-key", value=openai_obj)
|
||||||
|
|
||||||
|
cached_obj = in_memory_cache.get_cache(key="my-fake-key")
|
||||||
|
|
||||||
|
assert cached_obj is not None
|
||||||
|
|
||||||
|
assert cached_obj == openai_obj
|
Loading…
Add table
Add a link
Reference in a new issue