mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
test: add unit testing
This commit is contained in:
parent
dfea55a1e7
commit
c7b17495a1
3 changed files with 44 additions and 14 deletions
|
@ -9,9 +9,13 @@ Has 4 methods:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import json
|
import json
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
from typing import Any, List, Optional
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from ..constants import MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB
|
||||||
from .base_cache import BaseCache
|
from .base_cache import BaseCache
|
||||||
|
|
||||||
|
|
||||||
|
@ -22,6 +26,7 @@ class InMemoryCache(BaseCache):
|
||||||
default_ttl: Optional[
|
default_ttl: Optional[
|
||||||
int
|
int
|
||||||
] = 600, # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute
|
] = 600, # default ttl is 10 minutes. At maximum litellm rate limiting logic requires objects to be in memory for 1 minute
|
||||||
|
max_size_per_item: Optional[int] = 1024, # 1MB = 1024KB
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default
|
max_size_in_memory [int]: Maximum number of items in cache. done to prevent memory leaks. Use 200 items as a default
|
||||||
|
@ -30,7 +35,9 @@ class InMemoryCache(BaseCache):
|
||||||
max_size_in_memory or 200
|
max_size_in_memory or 200
|
||||||
) # set an upper bound of 200 items in-memory
|
) # set an upper bound of 200 items in-memory
|
||||||
self.default_ttl = default_ttl or 600
|
self.default_ttl = default_ttl or 600
|
||||||
self.max_size_per_item = 1024 # 1MB = 1024KB
|
self.max_size_per_item = (
|
||||||
|
max_size_per_item or MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB
|
||||||
|
) # 1MB = 1024KB
|
||||||
|
|
||||||
# in-memory cache
|
# in-memory cache
|
||||||
self.cache_dict: dict = {}
|
self.cache_dict: dict = {}
|
||||||
|
@ -42,26 +49,37 @@ class InMemoryCache(BaseCache):
|
||||||
Returns True if value size is acceptable, False otherwise
|
Returns True if value size is acceptable, False otherwise
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Handle special types
|
# Fast path for common primitive types that are typically small
|
||||||
if hasattr(value, "model_dump"): # Pydantic v2
|
if (
|
||||||
|
isinstance(value, (bool, int, float, str))
|
||||||
|
and len(str(value)) < self.max_size_per_item * 512
|
||||||
|
): # Conservative estimate
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Direct size check for bytes objects
|
||||||
|
if isinstance(value, bytes):
|
||||||
|
return sys.getsizeof(value) / 1024 <= self.max_size_per_item
|
||||||
|
|
||||||
|
# Handle special types without full conversion when possible
|
||||||
|
if hasattr(value, "__sizeof__"): # Use __sizeof__ if available
|
||||||
|
size = value.__sizeof__() / 1024
|
||||||
|
return size <= self.max_size_per_item
|
||||||
|
|
||||||
|
# Fallback for complex types
|
||||||
|
if isinstance(value, BaseModel) and hasattr(
|
||||||
|
value, "model_dump"
|
||||||
|
): # Pydantic v2
|
||||||
value = value.model_dump()
|
value = value.model_dump()
|
||||||
elif hasattr(value, "dict"): # Pydantic v1
|
|
||||||
value = value.dict()
|
|
||||||
elif hasattr(value, "isoformat"): # datetime objects
|
elif hasattr(value, "isoformat"): # datetime objects
|
||||||
value = value.isoformat()
|
return True # datetime strings are always small
|
||||||
|
|
||||||
# Convert value to JSON string to get a consistent size measurement
|
# Only convert to JSON if absolutely necessary
|
||||||
if not isinstance(value, (str, bytes)):
|
if not isinstance(value, (str, bytes)):
|
||||||
value = json.dumps(
|
value = json.dumps(value, default=str)
|
||||||
value, default=str
|
|
||||||
) # default=str handles any remaining datetime objects
|
|
||||||
|
|
||||||
# Get size in KB (1KB = 1024 bytes)
|
return sys.getsizeof(value) / 1024 <= self.max_size_per_item
|
||||||
value_size = len(str(value).encode("utf-8")) / 1024
|
|
||||||
|
|
||||||
return value_size <= self.max_size_per_item
|
|
||||||
except Exception:
|
except Exception:
|
||||||
# If we can't measure the size, assume it's too large
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def evict_cache(self):
|
def evict_cache(self):
|
||||||
|
|
|
@ -14,6 +14,7 @@ DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = 1
|
||||||
DEFAULT_IMAGE_TOKEN_COUNT = 250
|
DEFAULT_IMAGE_TOKEN_COUNT = 250
|
||||||
DEFAULT_IMAGE_WIDTH = 300
|
DEFAULT_IMAGE_WIDTH = 300
|
||||||
DEFAULT_IMAGE_HEIGHT = 300
|
DEFAULT_IMAGE_HEIGHT = 300
|
||||||
|
MAX_SIZE_PER_ITEM_IN_MEMORY_CACHE_IN_KB = 1024 # 1MB = 1024KB
|
||||||
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
|
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
|
||||||
#### RELIABILITY ####
|
#### RELIABILITY ####
|
||||||
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
|
REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
|
||||||
|
|
|
@ -32,3 +32,14 @@ def test_in_memory_openai_obj_cache():
|
||||||
assert cached_obj is not None
|
assert cached_obj is not None
|
||||||
|
|
||||||
assert cached_obj == openai_obj
|
assert cached_obj == openai_obj
|
||||||
|
|
||||||
|
|
||||||
|
def test_in_memory_cache_max_size_per_item():
|
||||||
|
"""
|
||||||
|
Test that the cache will not store items larger than the max size per item
|
||||||
|
"""
|
||||||
|
in_memory_cache = InMemoryCache(max_size_per_item=100)
|
||||||
|
|
||||||
|
result = in_memory_cache.check_value_size("a" * 100000000)
|
||||||
|
|
||||||
|
assert result is False
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue