forked from phoenix/litellm-mirror
add caching with chromDB - not a dependency
This commit is contained in:
parent
09fcd88799
commit
d80f847fde
6 changed files with 113 additions and 2 deletions
|
@ -702,3 +702,52 @@ class CustomStreamWrapper:
|
|||
completion_obj["content"] = chunk.text
|
||||
# return this for all models
|
||||
return {"choices": [{"delta": completion_obj}]}
|
||||
|
||||
|
||||
############# Caching Implementation v0 using chromaDB ############################
|
||||
cache_collection = None
|
||||
def make_collection():
|
||||
global cache_collection
|
||||
import chromadb
|
||||
client = chromadb.Client()
|
||||
cache_collection = client.create_collection("llm_responses")
|
||||
|
||||
def message_to_user_question(messages):
|
||||
user_question = ""
|
||||
for message in messages:
|
||||
if message['role'] == 'user':
|
||||
user_question += message["content"]
|
||||
return user_question
|
||||
|
||||
|
||||
def add_cache(messages, model_response):
|
||||
global cache_collection
|
||||
user_question = message_to_user_question(messages)
|
||||
cache_collection.add(
|
||||
documents=[user_question],
|
||||
metadatas=[{"model_response": str(model_response)}],
|
||||
ids = [ str(uuid.uuid4())]
|
||||
)
|
||||
return
|
||||
|
||||
def get_cache(messages):
|
||||
try:
|
||||
global cache_collection
|
||||
if cache_collection == None:
|
||||
make_collection()
|
||||
user_question = message_to_user_question(messages)
|
||||
results = cache_collection.query(
|
||||
query_texts=[user_question],
|
||||
n_results=1
|
||||
)
|
||||
distance = results['distances'][0][0]
|
||||
sim = (1 - distance)
|
||||
if sim >= litellm.cache_similarity_threshold:
|
||||
# return response
|
||||
print("got cache hit!")
|
||||
return dict(results['metadatas'][0][0])
|
||||
else:
|
||||
# no hit
|
||||
return None
|
||||
except:
|
||||
return None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue