fix(in_memory_cache.py): add max value limits to in-memory cache. Prevents OOM errors in prod

2025-04-27 11:43:54 +00:00 · 2025-03-21 14:51:12 -07:00 · 2025-03-21 14:51:12 -07:00 · 93d5c8537d
commit 93d5c8537d
parent 068102233c
2 changed files with 66 additions and 1 deletions
--- a/litellm/caching/in_memory_cache.py
+++ b/litellm/caching/in_memory_cache.py
@ -10,7 +10,7 @@ Has 4 methods:

 import json
 import time
-from typing import List, Optional
+from typing import Any, List, Optional

 from .base_cache import BaseCache

@ -30,11 +30,40 @@ class InMemoryCache(BaseCache):
            max_size_in_memory or 200
        )  # set an upper bound of 200 items in-memory
        self.default_ttl = default_ttl or 600
+        self.max_size_per_item = 1024  # 1MB = 1024KB

        # in-memory cache
        self.cache_dict: dict = {}
        self.ttl_dict: dict = {}

+    def check_value_size(self, value: Any):
+        """
+        Check if value size exceeds max_size_per_item (1MB)
+        Returns True if value size is acceptable, False otherwise
+        """
+        try:
+            # Handle special types
+            if hasattr(value, "model_dump"):  # Pydantic v2
+                value = value.model_dump()
+            elif hasattr(value, "dict"):  # Pydantic v1
+                value = value.dict()
+            elif hasattr(value, "isoformat"):  # datetime objects
+                value = value.isoformat()
+
+            # Convert value to JSON string to get a consistent size measurement
+            if not isinstance(value, (str, bytes)):
+                value = json.dumps(
+                    value, default=str
+                )  # default=str handles any remaining datetime objects
+
+            # Get size in KB (1KB = 1024 bytes)
+            value_size = len(str(value).encode("utf-8")) / 1024
+
+            return value_size <= self.max_size_per_item
+        except Exception:
+            # If we can't measure the size, assume it's too large
+            return False
+
    def evict_cache(self):
        """
        Eviction policy:
@ -61,6 +90,8 @@ class InMemoryCache(BaseCache):
        if len(self.cache_dict) >= self.max_size_in_memory:
            # only evict when cache is full
            self.evict_cache()
+        if not self.check_value_size(value):
+            return

        self.cache_dict[key] = value
        if "ttl" in kwargs and kwargs["ttl"] is not None:
--- a/tests/litellm/caching/test_in_memory_cache.py
+++ b/tests/litellm/caching/test_in_memory_cache.py
@ -0,0 +1,34 @@
+import asyncio
+import json
+import os
+import sys
+import time
+from unittest.mock import MagicMock, patch
+
+import httpx
+import pytest
+import respx
+from fastapi.testclient import TestClient
+
+sys.path.insert(
+    0, os.path.abspath("../../..")
+)  # Adds the parent directory to the system path
+from unittest.mock import AsyncMock
+
+from litellm.caching.in_memory_cache import InMemoryCache
+
+
+def test_in_memory_openai_obj_cache():
+    from openai import OpenAI
+
+    openai_obj = OpenAI(api_key="my-fake-key")
+
+    in_memory_cache = InMemoryCache()
+
+    in_memory_cache.set_cache(key="my-fake-key", value=openai_obj)
+
+    cached_obj = in_memory_cache.get_cache(key="my-fake-key")
+
+    assert cached_obj is not None
+
+    assert cached_obj == openai_obj