add tutorial

This commit is contained in:
ishaan-jaff 2023-08-10 16:46:45 -07:00
parent 8e7479a1a1
commit 7071955f6d
2 changed files with 377 additions and 37 deletions

File diff suppressed because one or more lines are too long

View file

@ -1,44 +1,44 @@
import sys, os # import sys, os
import traceback # import traceback
from dotenv import load_dotenv # from dotenv import load_dotenv
load_dotenv() # load_dotenv()
import os # import os
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path # sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
import pytest # import pytest
import litellm # import litellm
# set cache to True # # set cache to True
litellm.cache = True # litellm.cache = True
litellm.cache_similarity_threshold = 0.5 # litellm.cache_similarity_threshold = 0.5
user_message = "Hello, whats the weather in San Francisco??" # user_message = "Hello, whats the weather in San Francisco??"
messages = [{ "content": user_message,"role": "user"}] # messages = [{ "content": user_message,"role": "user"}]
def test_completion_with_cache_gpt4(): # def test_completion_with_cache_gpt4():
try: # try:
# in this test make the same call twice, measure the response time # # in this test make the same call twice, measure the response time
# the 2nd response time should be less than half of the first, ensuring that the cache is working # # the 2nd response time should be less than half of the first, ensuring that the cache is working
import time # import time
start = time.time() # start = time.time()
print(litellm.cache) # print(litellm.cache)
response = litellm.completion(model="gpt-4", messages=messages) # response = litellm.completion(model="gpt-4", messages=messages)
end = time.time() # end = time.time()
first_call_time = end-start # first_call_time = end-start
print(f"first call: {first_call_time}") # print(f"first call: {first_call_time}")
start = time.time() # start = time.time()
response = litellm.completion(model="gpt-4", messages=messages) # response = litellm.completion(model="gpt-4", messages=messages)
end = time.time() # end = time.time()
second_call_time = end-start # second_call_time = end-start
print(f"second call: {second_call_time}") # print(f"second call: {second_call_time}")
if second_call_time > 1: # if second_call_time > 1:
# the 2nd call should be less than 1s # # the 2nd call should be less than 1s
pytest.fail(f"Cache is not working") # pytest.fail(f"Cache is not working")
# Add any assertions here to check the response # # Add any assertions here to check the response
print(response) # print(response)
except Exception as e: # except Exception as e:
pytest.fail(f"Error occurred: {e}") # pytest.fail(f"Error occurred: {e}")
litellm.cache = False # litellm.cache = False