litellm/tests/local_testing/cache_unit_tests.py
Krish Dholakia 9160d80fa5
LiteLLM Minor Fixes & Improvements (11/12/2024) (#6705)
* fix(caching): convert arg to equivalent kwargs in llm caching handler

prevent unexpected errors

* fix(caching_handler.py): don't pass args to caching

* fix(caching): remove all *args from caching.py

* fix(caching): consistent function signatures + abc method

* test(caching_unit_tests.py): add unit tests for llm caching

ensures coverage for common caching scenarios across different implementations

* refactor(litellm_logging.py): move to using cache key from hidden params instead of regenerating one

* fix(router.py): drop redis password requirement

* fix(proxy_server.py): fix faulty slack alerting check

* fix(langfuse.py): avoid copying functions/thread lock objects in metadata

fixes metadata copy error when parent otel span in metadata

* test: update test
2024-11-12 22:50:51 +05:30

223 lines
7.2 KiB
Python

from abc import ABC, abstractmethod
from litellm.caching import LiteLLMCacheType
import os
import sys
import time
import traceback
import uuid
from dotenv import load_dotenv
from test_rerank import assert_response_shape
load_dotenv()
import os
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import asyncio
import hashlib
import random
import pytest
import litellm
from litellm.caching import Cache
from litellm import completion, embedding
class LLMCachingUnitTests(ABC):
@abstractmethod
def get_cache_type(self) -> LiteLLMCacheType:
pass
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_cache_completion(self, sync_mode):
litellm._turn_on_debug()
random_number = random.randint(
1, 100000
) # add a random number to ensure it's always adding / reading from cache
messages = [
{
"role": "user",
"content": f"write a one sentence poem about: {random_number}",
}
]
cache_type = self.get_cache_type()
litellm.cache = Cache(
type=cache_type,
)
if sync_mode:
response1 = completion(
"gpt-3.5-turbo",
messages=messages,
caching=True,
max_tokens=20,
mock_response="This number is so great!",
)
else:
response1 = await litellm.acompletion(
"gpt-3.5-turbo",
messages=messages,
caching=True,
max_tokens=20,
mock_response="This number is so great!",
)
# response2 is mocked to a different response from response1,
# but the completion from the cache should be used instead of the mock
# response since the input is the same as response1
await asyncio.sleep(0.5)
if sync_mode:
response2 = completion(
"gpt-3.5-turbo",
messages=messages,
caching=True,
max_tokens=20,
mock_response="This number is great!",
)
else:
response2 = await litellm.acompletion(
"gpt-3.5-turbo",
messages=messages,
caching=True,
max_tokens=20,
mock_response="This number is great!",
)
if (
response1["choices"][0]["message"]["content"]
!= response2["choices"][0]["message"]["content"]
): # 1 and 2 should be the same
# 1&2 have the exact same input params. This MUST Be a CACHE HIT
print(f"response1: {response1}")
print(f"response2: {response2}")
pytest.fail(
f"Error occurred: response1 - {response1['choices'][0]['message']['content']} != response2 - {response2['choices'][0]['message']['content']}"
)
# Since the parameters are not the same as response1, response3 should actually
# be the mock response
if sync_mode:
response3 = completion(
"gpt-3.5-turbo",
messages=messages,
caching=True,
temperature=0.5,
mock_response="This number is awful!",
)
else:
response3 = await litellm.acompletion(
"gpt-3.5-turbo",
messages=messages,
caching=True,
temperature=0.5,
mock_response="This number is awful!",
)
print("\nresponse 1", response1)
print("\nresponse 2", response2)
print("\nresponse 3", response3)
# print("\nresponse 4", response4)
litellm.cache = None
litellm.success_callback = []
litellm._async_success_callback = []
# 1 & 2 should be exactly the same
# 1 & 3 should be different, since input params are diff
if (
response1["choices"][0]["message"]["content"]
== response3["choices"][0]["message"]["content"]
):
# if input params like max_tokens, temperature are diff it should NOT be a cache hit
print(f"response1: {response1}")
print(f"response3: {response3}")
pytest.fail(
f"Response 1 == response 3. Same model, diff params shoudl not cache Error"
f" occurred:"
)
assert response1.id == response2.id
assert response1.created == response2.created
assert (
response1.choices[0].message.content == response2.choices[0].message.content
)
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_disk_cache_embedding(self, sync_mode):
litellm._turn_on_debug()
random_number = random.randint(
1, 100000
) # add a random number to ensure it's always adding / reading from cache
input = [f"hello {random_number}"]
litellm.cache = Cache(
type="disk",
)
if sync_mode:
response1 = embedding(
"openai/text-embedding-ada-002",
input=input,
caching=True,
)
else:
response1 = await litellm.aembedding(
"openai/text-embedding-ada-002",
input=input,
caching=True,
)
# response2 is mocked to a different response from response1,
# but the completion from the cache should be used instead of the mock
# response since the input is the same as response1
await asyncio.sleep(0.5)
if sync_mode:
response2 = embedding(
"openai/text-embedding-ada-002",
input=input,
caching=True,
)
else:
response2 = await litellm.aembedding(
"openai/text-embedding-ada-002",
input=input,
caching=True,
)
if response2._hidden_params["cache_hit"] is not True:
pytest.fail("Cache hit should be True")
# Since the parameters are not the same as response1, response3 should actually
# be the mock response
if sync_mode:
response3 = embedding(
"openai/text-embedding-ada-002",
input=input,
user="charlie",
caching=True,
)
else:
response3 = await litellm.aembedding(
"openai/text-embedding-ada-002",
input=input,
caching=True,
user="charlie",
)
print("\nresponse 1", response1)
print("\nresponse 2", response2)
print("\nresponse 3", response3)
# print("\nresponse 4", response4)
litellm.cache = None
litellm.success_callback = []
litellm._async_success_callback = []
# 1 & 2 should be exactly the same
# 1 & 3 should be different, since input params are diff
if response3._hidden_params.get("cache_hit") is True:
pytest.fail("Cache hit should not be True")