diff --git a/.circleci/config.yml b/.circleci/config.yml index 2ec13464b..e4ed3a257 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -16,7 +16,8 @@ jobs: pip install infisical pip install pytest pip install openai[datalib] - pip install -Uq chromadb + pip install -Uq chromadb==0.3.29 + # Run pytest and generate JUnit XML report - run: diff --git a/litellm/tests/test_cache.py b/litellm/tests/test_cache.py index d97d19142..05004d1a6 100644 --- a/litellm/tests/test_cache.py +++ b/litellm/tests/test_cache.py @@ -1,44 +1,44 @@ -# import sys, os -# import traceback -# from dotenv import load_dotenv -# load_dotenv() -# import os +import sys, os +import traceback +from dotenv import load_dotenv +load_dotenv() +import os -# sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path -# import pytest -# import litellm +sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path +import pytest +import litellm -# # set cache to True -# litellm.cache = True -# litellm.cache_similarity_threshold = 0.5 +# set cache to True +litellm.cache = True +litellm.cache_similarity_threshold = 0.5 -# user_message = "Hello, whats the weather in San Francisco??" -# messages = [{ "content": user_message,"role": "user"}] +user_message = "Hello, whats the weather in San Francisco??" +messages = [{ "content": user_message,"role": "user"}] -# def test_completion_with_cache_gpt4(): -# try: -# # in this test make the same call twice, measure the response time -# # the 2nd response time should be less than half of the first, ensuring that the cache is working -# import time -# start = time.time() -# print(litellm.cache) -# response = litellm.completion(model="gpt-4", messages=messages) -# end = time.time() -# first_call_time = end-start -# print(f"first call: {first_call_time}") +def test_completion_with_cache_gpt4(): + try: + # in this test make the same call twice, measure the response time + # the 2nd response time should be less than half of the first, ensuring that the cache is working + import time + start = time.time() + print(litellm.cache) + response = litellm.completion(model="gpt-4", messages=messages) + end = time.time() + first_call_time = end-start + print(f"first call: {first_call_time}") -# start = time.time() -# response = litellm.completion(model="gpt-4", messages=messages) -# end = time.time() -# second_call_time = end-start -# print(f"second call: {second_call_time}") + start = time.time() + response = litellm.completion(model="gpt-4", messages=messages) + end = time.time() + second_call_time = end-start + print(f"second call: {second_call_time}") -# if second_call_time > 1: -# # the 2nd call should be less than 1s -# pytest.fail(f"Cache is not working") -# # Add any assertions here to check the response -# print(response) -# except Exception as e: -# pytest.fail(f"Error occurred: {e}") + if second_call_time > 1: + # the 2nd call should be less than 1s + pytest.fail(f"Cache is not working") + # Add any assertions here to check the response + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") -# litellm.cache = False \ No newline at end of file +litellm.cache = False \ No newline at end of file