add streaming_caching support

This commit is contained in:
ishaan-jaff 2023-08-28 19:17:53 -07:00
parent 8af6d967eb
commit fbef73d043
3 changed files with 48 additions and 44 deletions

View file

@ -1,4 +1,5 @@
import litellm
import time
def get_prompt(*args, **kwargs):
# make this safe checks, it should not throw any exceptions
if len(args) > 1:
@ -30,9 +31,9 @@ class InMemoryCache():
self.cache_dict = {}
def set_cache(self, key, value):
print("in set cache for inmem")
#print("in set cache for inmem")
self.cache_dict[key] = value
print(self.cache_dict)
#print(self.cache_dict)
def get_cache(self, key):
#print("in get cache for inmem")
@ -65,11 +66,23 @@ class Cache():
return None
return cache_key
def generate_streaming_content(self, content):
chunk_size = 5 # Adjust the chunk size as needed
for i in range(0, len(content), chunk_size):
yield {'choices': [{'delta': {'role': 'assistant', 'content': content[i:i+chunk_size]}}]}
time.sleep(0.02)
def get_cache(self, *args, **kwargs):
try: # never block execution
cache_key = self.get_cache_key(*args, **kwargs)
if cache_key is not None:
return self.cache.get_cache(cache_key)
cached_result = self.cache.get_cache(cache_key)
if cached_result != None and 'stream' in kwargs and kwargs['stream'] == True:
# if streaming is true and we got a cache hit, return a generator
#print("cache hit and stream=True")
#print(cached_result)
return self.generate_streaming_content(cached_result["choices"][0]['message']['content'])
return cached_result
except:
return None