add caching with chromDB - not a dependency

2023-08-10 13:19:48 -07:00 · 2023-08-10 13:19:48 -07:00 · d80f847fde
commit d80f847fde
parent 09fcd88799
6 changed files with 113 additions and 2 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -702,3 +702,52 @@ class CustomStreamWrapper:
          completion_obj["content"] = chunk.text
        # return this for all models
        return {"choices": [{"delta": completion_obj}]}
+
+
+############# Caching Implementation v0  using chromaDB ############################
+cache_collection = None
+def make_collection():
+  global cache_collection
+  import chromadb
+  client = chromadb.Client()
+  cache_collection = client.create_collection("llm_responses")
+
+def message_to_user_question(messages):
+  user_question = ""
+  for message in messages:
+    if message['role'] == 'user':
+      user_question += message["content"]
+  return user_question
+
+
+def add_cache(messages, model_response):
+  global cache_collection
+  user_question = message_to_user_question(messages)
+  cache_collection.add(
+    documents=[user_question],
+    metadatas=[{"model_response": str(model_response)}],
+    ids = [ str(uuid.uuid4())]
+  )
+  return
+
+def get_cache(messages):
+  try:
+    global cache_collection
+    if cache_collection == None:
+      make_collection()
+    user_question = message_to_user_question(messages)
+    results = cache_collection.query(
+      query_texts=[user_question],
+      n_results=1
+    )
+    distance = results['distances'][0][0]
+    sim = (1 - distance)
+    if sim >= litellm.cache_similarity_threshold:
+      # return response
+      print("got cache hit!")
+      return dict(results['metadatas'][0][0])
+    else:
+      # no hit 
+      return None
+  except:
+     return None