add caching with chromDB - not a dependency

This commit is contained in:
ishaan-jaff 2023-08-10 13:19:48 -07:00
parent 09fcd88799
commit d80f847fde
6 changed files with 113 additions and 2 deletions

View file

@ -702,3 +702,52 @@ class CustomStreamWrapper:
completion_obj["content"] = chunk.text
# return this for all models
return {"choices": [{"delta": completion_obj}]}
############# Caching Implementation v0 using chromaDB ############################
cache_collection = None
def make_collection():
global cache_collection
import chromadb
client = chromadb.Client()
cache_collection = client.create_collection("llm_responses")
def message_to_user_question(messages):
user_question = ""
for message in messages:
if message['role'] == 'user':
user_question += message["content"]
return user_question
def add_cache(messages, model_response):
global cache_collection
user_question = message_to_user_question(messages)
cache_collection.add(
documents=[user_question],
metadatas=[{"model_response": str(model_response)}],
ids = [ str(uuid.uuid4())]
)
return
def get_cache(messages):
try:
global cache_collection
if cache_collection == None:
make_collection()
user_question = message_to_user_question(messages)
results = cache_collection.query(
query_texts=[user_question],
n_results=1
)
distance = results['distances'][0][0]
sim = (1 - distance)
if sim >= litellm.cache_similarity_threshold:
# return response
print("got cache hit!")
return dict(results['metadatas'][0][0])
else:
# no hit
return None
except:
return None