litellm-mirror/litellm/tests/test_cache.py
2023-08-10 13:21:06 -07:00

43 lines
1.4 KiB
Python

import sys, os
import traceback
from dotenv import load_dotenv
load_dotenv()
import os
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
import pytest
import litellm
from litellm import embedding, completion
# set cache to True
litellm.cache = True
litellm.cache_similarity_threshold = 0.5
user_message = "Hello, whats the weather in San Francisco??"
messages = [{ "content": user_message,"role": "user"}]
def test_completion_gpt():
try:
# in this test make the same call twice, measure the response time
# the 2nd response time should be less than half of the first, ensuring that the cache is working
import time
start = time.time()
response = completion(model="gpt-4", messages=messages)
end = time.time()
first_call_time = end-start
print(f"first call: {first_call_time}")
start = time.time()
response = completion(model="gpt-4", messages=messages)
end = time.time()
second_call_time = end-start
print(f"second call: {second_call_time}")
if second_call_time > first_call_time/2:
# the 2nd call should be less than half of the first call
pytest.fail(f"Cache is not working")
# Add any assertions here to check the response
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")