forked from phoenix/litellm-mirror
* fix(caching): convert arg to equivalent kwargs in llm caching handler prevent unexpected errors * fix(caching_handler.py): don't pass args to caching * fix(caching): remove all *args from caching.py * fix(caching): consistent function signatures + abc method * test(caching_unit_tests.py): add unit tests for llm caching ensures coverage for common caching scenarios across different implementations * refactor(litellm_logging.py): move to using cache key from hidden params instead of regenerating one * fix(router.py): drop redis password requirement * fix(proxy_server.py): fix faulty slack alerting check * fix(langfuse.py): avoid copying functions/thread lock objects in metadata fixes metadata copy error when parent otel span in metadata * test: update test
247 lines
8.5 KiB
Python
247 lines
8.5 KiB
Python
import os
|
|
import sys
|
|
import time
|
|
import traceback
|
|
import uuid
|
|
|
|
from dotenv import load_dotenv
|
|
from test_rerank import assert_response_shape
|
|
|
|
load_dotenv()
|
|
import os
|
|
|
|
sys.path.insert(
|
|
0, os.path.abspath("../..")
|
|
) # Adds the parent directory to the system path
|
|
import asyncio
|
|
import hashlib
|
|
import random
|
|
|
|
import pytest
|
|
|
|
import litellm
|
|
from litellm import aembedding, completion, embedding
|
|
from litellm.caching.caching import Cache
|
|
|
|
from unittest.mock import AsyncMock, patch, MagicMock, call
|
|
import datetime
|
|
from datetime import timedelta
|
|
from litellm.caching import *
|
|
|
|
|
|
@pytest.mark.parametrize("is_async", [True, False])
|
|
@pytest.mark.asyncio
|
|
async def test_dual_cache_get_set(is_async):
|
|
"""Test that DualCache reads from in-memory cache first for both sync and async operations"""
|
|
in_memory = InMemoryCache()
|
|
redis_cache = RedisCache(host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"))
|
|
dual_cache = DualCache(in_memory_cache=in_memory, redis_cache=redis_cache)
|
|
|
|
# Test basic set/get
|
|
test_key = f"test_key_{str(uuid.uuid4())}"
|
|
test_value = {"test": "value"}
|
|
|
|
if is_async:
|
|
await dual_cache.async_set_cache(test_key, test_value)
|
|
mock_method = "async_get_cache"
|
|
else:
|
|
dual_cache.set_cache(test_key, test_value)
|
|
mock_method = "get_cache"
|
|
|
|
# Mock Redis get to ensure we're not calling it
|
|
# this should only read in memory since we just set test_key
|
|
with patch.object(redis_cache, mock_method) as mock_redis_get:
|
|
if is_async:
|
|
result = await dual_cache.async_get_cache(test_key)
|
|
else:
|
|
result = dual_cache.get_cache(test_key)
|
|
|
|
assert result == test_value
|
|
mock_redis_get.assert_not_called() # Verify Redis wasn't accessed
|
|
|
|
|
|
@pytest.mark.parametrize("is_async", [True, False])
|
|
@pytest.mark.asyncio
|
|
async def test_dual_cache_local_only(is_async):
|
|
"""Test that when local_only=True, only in-memory cache is used"""
|
|
in_memory = InMemoryCache()
|
|
redis_cache = RedisCache(host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"))
|
|
dual_cache = DualCache(in_memory_cache=in_memory, redis_cache=redis_cache)
|
|
|
|
test_key = f"test_key_{str(uuid.uuid4())}"
|
|
test_value = {"test": "value"}
|
|
|
|
# Mock Redis methods to ensure they're not called
|
|
redis_set_method = "async_set_cache" if is_async else "set_cache"
|
|
redis_get_method = "async_get_cache" if is_async else "get_cache"
|
|
|
|
with patch.object(redis_cache, redis_set_method) as mock_redis_set, patch.object(
|
|
redis_cache, redis_get_method
|
|
) as mock_redis_get:
|
|
|
|
# Set value with local_only=True
|
|
if is_async:
|
|
await dual_cache.async_set_cache(test_key, test_value, local_only=True)
|
|
result = await dual_cache.async_get_cache(test_key, local_only=True)
|
|
else:
|
|
dual_cache.set_cache(test_key, test_value, local_only=True)
|
|
result = dual_cache.get_cache(test_key, local_only=True)
|
|
|
|
assert result == test_value
|
|
mock_redis_set.assert_not_called() # Verify Redis set wasn't called
|
|
mock_redis_get.assert_not_called() # Verify Redis get wasn't called
|
|
|
|
|
|
@pytest.mark.parametrize("is_async", [True, False])
|
|
@pytest.mark.asyncio
|
|
async def test_dual_cache_value_not_in_memory(is_async):
|
|
"""Test that DualCache falls back to Redis when value isn't in memory,
|
|
and subsequent requests use in-memory cache"""
|
|
|
|
in_memory = InMemoryCache()
|
|
redis_cache = RedisCache(host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"))
|
|
dual_cache = DualCache(in_memory_cache=in_memory, redis_cache=redis_cache)
|
|
|
|
test_key = f"test_key_{str(uuid.uuid4())}"
|
|
test_value = {"test": "value"}
|
|
|
|
# First, set value only in Redis
|
|
if is_async:
|
|
await redis_cache.async_set_cache(test_key, test_value)
|
|
else:
|
|
redis_cache.set_cache(test_key, test_value)
|
|
|
|
# First request - should fall back to Redis and populate in-memory
|
|
if is_async:
|
|
result = await dual_cache.async_get_cache(test_key)
|
|
else:
|
|
result = dual_cache.get_cache(test_key)
|
|
|
|
assert result == test_value
|
|
|
|
# Second request - should now use in-memory cache
|
|
with patch.object(
|
|
redis_cache, "async_get_cache" if is_async else "get_cache"
|
|
) as mock_redis_get:
|
|
if is_async:
|
|
result = await dual_cache.async_get_cache(test_key)
|
|
else:
|
|
result = dual_cache.get_cache(test_key)
|
|
|
|
assert result == test_value
|
|
mock_redis_get.assert_not_called() # Verify Redis wasn't accessed second time
|
|
|
|
|
|
@pytest.mark.parametrize("is_async", [True, False])
|
|
@pytest.mark.asyncio
|
|
async def test_dual_cache_batch_operations(is_async):
|
|
"""Test batch get/set operations use in-memory cache correctly"""
|
|
in_memory = InMemoryCache()
|
|
redis_cache = RedisCache(host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"))
|
|
dual_cache = DualCache(in_memory_cache=in_memory, redis_cache=redis_cache)
|
|
|
|
test_keys = [f"test_key_{str(uuid.uuid4())}" for _ in range(3)]
|
|
test_values = [{"test": f"value_{i}"} for i in range(3)]
|
|
cache_list = list(zip(test_keys, test_values))
|
|
|
|
# Set values
|
|
if is_async:
|
|
await dual_cache.async_set_cache_pipeline(cache_list)
|
|
else:
|
|
for key, value in cache_list:
|
|
dual_cache.set_cache(key, value)
|
|
|
|
# Verify in-memory cache is used for subsequent reads
|
|
with patch.object(
|
|
redis_cache, "async_batch_get_cache" if is_async else "batch_get_cache"
|
|
) as mock_redis_get:
|
|
if is_async:
|
|
results = await dual_cache.async_batch_get_cache(test_keys)
|
|
else:
|
|
results = dual_cache.batch_get_cache(test_keys, parent_otel_span=None)
|
|
|
|
assert results == test_values
|
|
mock_redis_get.assert_not_called()
|
|
|
|
|
|
@pytest.mark.parametrize("is_async", [True, False])
|
|
@pytest.mark.asyncio
|
|
async def test_dual_cache_increment(is_async):
|
|
"""Test increment operations only use in memory when local_only=True"""
|
|
in_memory = InMemoryCache()
|
|
redis_cache = RedisCache(host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"))
|
|
dual_cache = DualCache(in_memory_cache=in_memory, redis_cache=redis_cache)
|
|
|
|
test_key = f"counter_{str(uuid.uuid4())}"
|
|
increment_value = 1
|
|
|
|
# increment should use in-memory cache
|
|
with patch.object(
|
|
redis_cache, "async_increment" if is_async else "increment_cache"
|
|
) as mock_redis_increment:
|
|
if is_async:
|
|
result = await dual_cache.async_increment_cache(
|
|
test_key,
|
|
increment_value,
|
|
local_only=True,
|
|
parent_otel_span=None,
|
|
)
|
|
else:
|
|
result = dual_cache.increment_cache(
|
|
test_key, increment_value, local_only=True
|
|
)
|
|
|
|
assert result == increment_value
|
|
mock_redis_increment.assert_not_called()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_dual_cache_sadd():
|
|
"""Test set add operations use in-memory cache for reads"""
|
|
in_memory = InMemoryCache()
|
|
redis_cache = RedisCache(host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"))
|
|
dual_cache = DualCache(in_memory_cache=in_memory, redis_cache=redis_cache)
|
|
|
|
test_key = f"set_{str(uuid.uuid4())}"
|
|
test_values = ["value1", "value2", "value3"]
|
|
|
|
# Add values to set
|
|
await dual_cache.async_set_cache_sadd(test_key, test_values)
|
|
|
|
# Verify in-memory cache is used for subsequent operations
|
|
with patch.object(redis_cache, "async_get_cache") as mock_redis_get:
|
|
result = await dual_cache.async_get_cache(test_key)
|
|
assert set(result) == set(test_values)
|
|
mock_redis_get.assert_not_called()
|
|
|
|
|
|
@pytest.mark.parametrize("is_async", [True, False])
|
|
@pytest.mark.asyncio
|
|
async def test_dual_cache_delete(is_async):
|
|
"""Test delete operations remove from both caches"""
|
|
in_memory = InMemoryCache()
|
|
redis_cache = RedisCache(host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"))
|
|
dual_cache = DualCache(in_memory_cache=in_memory, redis_cache=redis_cache)
|
|
|
|
test_key = f"test_key_{str(uuid.uuid4())}"
|
|
test_value = {"test": "value"}
|
|
|
|
# Set value
|
|
if is_async:
|
|
await dual_cache.async_set_cache(test_key, test_value)
|
|
else:
|
|
dual_cache.set_cache(test_key, test_value)
|
|
|
|
# Delete value
|
|
if is_async:
|
|
await dual_cache.async_delete_cache(test_key)
|
|
else:
|
|
dual_cache.delete_cache(test_key)
|
|
|
|
# Verify value is deleted from both caches
|
|
if is_async:
|
|
result = await dual_cache.async_get_cache(test_key)
|
|
else:
|
|
result = dual_cache.get_cache(test_key)
|
|
|
|
assert result is None
|