forked from phoenix/litellm-mirror
add tutorial
This commit is contained in:
parent
8e7479a1a1
commit
7071955f6d
2 changed files with 377 additions and 37 deletions
340
cookbook/liteLLM_ChromaDB_Cache.ipynb
Normal file
340
cookbook/liteLLM_ChromaDB_Cache.ipynb
Normal file
File diff suppressed because one or more lines are too long
|
@ -1,44 +1,44 @@
|
||||||
import sys, os
|
# import sys, os
|
||||||
import traceback
|
# import traceback
|
||||||
from dotenv import load_dotenv
|
# from dotenv import load_dotenv
|
||||||
load_dotenv()
|
# load_dotenv()
|
||||||
import os
|
# import os
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
# sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
||||||
import pytest
|
# import pytest
|
||||||
import litellm
|
# import litellm
|
||||||
|
|
||||||
# set cache to True
|
# # set cache to True
|
||||||
litellm.cache = True
|
# litellm.cache = True
|
||||||
litellm.cache_similarity_threshold = 0.5
|
# litellm.cache_similarity_threshold = 0.5
|
||||||
|
|
||||||
user_message = "Hello, whats the weather in San Francisco??"
|
# user_message = "Hello, whats the weather in San Francisco??"
|
||||||
messages = [{ "content": user_message,"role": "user"}]
|
# messages = [{ "content": user_message,"role": "user"}]
|
||||||
|
|
||||||
def test_completion_with_cache_gpt4():
|
# def test_completion_with_cache_gpt4():
|
||||||
try:
|
# try:
|
||||||
# in this test make the same call twice, measure the response time
|
# # in this test make the same call twice, measure the response time
|
||||||
# the 2nd response time should be less than half of the first, ensuring that the cache is working
|
# # the 2nd response time should be less than half of the first, ensuring that the cache is working
|
||||||
import time
|
# import time
|
||||||
start = time.time()
|
# start = time.time()
|
||||||
print(litellm.cache)
|
# print(litellm.cache)
|
||||||
response = litellm.completion(model="gpt-4", messages=messages)
|
# response = litellm.completion(model="gpt-4", messages=messages)
|
||||||
end = time.time()
|
# end = time.time()
|
||||||
first_call_time = end-start
|
# first_call_time = end-start
|
||||||
print(f"first call: {first_call_time}")
|
# print(f"first call: {first_call_time}")
|
||||||
|
|
||||||
start = time.time()
|
# start = time.time()
|
||||||
response = litellm.completion(model="gpt-4", messages=messages)
|
# response = litellm.completion(model="gpt-4", messages=messages)
|
||||||
end = time.time()
|
# end = time.time()
|
||||||
second_call_time = end-start
|
# second_call_time = end-start
|
||||||
print(f"second call: {second_call_time}")
|
# print(f"second call: {second_call_time}")
|
||||||
|
|
||||||
if second_call_time > 1:
|
# if second_call_time > 1:
|
||||||
# the 2nd call should be less than 1s
|
# # the 2nd call should be less than 1s
|
||||||
pytest.fail(f"Cache is not working")
|
# pytest.fail(f"Cache is not working")
|
||||||
# Add any assertions here to check the response
|
# # Add any assertions here to check the response
|
||||||
print(response)
|
# print(response)
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
litellm.cache = False
|
# litellm.cache = False
|
Loading…
Add table
Add a link
Reference in a new issue