forked from phoenix/litellm-mirror
add streaming_caching support
This commit is contained in:
parent
8af6d967eb
commit
fbef73d043
3 changed files with 48 additions and 44 deletions
|
@ -1,4 +1,5 @@
|
|||
import litellm
|
||||
import time
|
||||
def get_prompt(*args, **kwargs):
|
||||
# make this safe checks, it should not throw any exceptions
|
||||
if len(args) > 1:
|
||||
|
@ -30,9 +31,9 @@ class InMemoryCache():
|
|||
self.cache_dict = {}
|
||||
|
||||
def set_cache(self, key, value):
|
||||
print("in set cache for inmem")
|
||||
#print("in set cache for inmem")
|
||||
self.cache_dict[key] = value
|
||||
print(self.cache_dict)
|
||||
#print(self.cache_dict)
|
||||
|
||||
def get_cache(self, key):
|
||||
#print("in get cache for inmem")
|
||||
|
@ -65,11 +66,23 @@ class Cache():
|
|||
return None
|
||||
return cache_key
|
||||
|
||||
def generate_streaming_content(self, content):
|
||||
chunk_size = 5 # Adjust the chunk size as needed
|
||||
for i in range(0, len(content), chunk_size):
|
||||
yield {'choices': [{'delta': {'role': 'assistant', 'content': content[i:i+chunk_size]}}]}
|
||||
time.sleep(0.02)
|
||||
|
||||
def get_cache(self, *args, **kwargs):
|
||||
try: # never block execution
|
||||
cache_key = self.get_cache_key(*args, **kwargs)
|
||||
if cache_key is not None:
|
||||
return self.cache.get_cache(cache_key)
|
||||
cached_result = self.cache.get_cache(cache_key)
|
||||
if cached_result != None and 'stream' in kwargs and kwargs['stream'] == True:
|
||||
# if streaming is true and we got a cache hit, return a generator
|
||||
#print("cache hit and stream=True")
|
||||
#print(cached_result)
|
||||
return self.generate_streaming_content(cached_result["choices"][0]['message']['content'])
|
||||
return cached_result
|
||||
except:
|
||||
return None
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue