add streaming_caching support

2023-08-28 19:17:53 -07:00 · 2023-08-28 19:17:53 -07:00 · fbef73d043
commit fbef73d043
parent 8af6d967eb
3 changed files with 48 additions and 44 deletions
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -1,4 +1,5 @@
 import litellm
+import time
 def get_prompt(*args, **kwargs):
    # make this safe checks, it should not throw any exceptions
    if len(args) > 1:
@ -30,9 +31,9 @@ class InMemoryCache():
        self.cache_dict = {}

    def set_cache(self, key, value):
-        print("in set cache for inmem")
+        #print("in set cache for inmem")
        self.cache_dict[key] = value
-        print(self.cache_dict)
+        #print(self.cache_dict)

    def get_cache(self, key):
        #print("in get cache for inmem")
@ -65,11 +66,23 @@ class Cache():
            return None
        return cache_key

+    def generate_streaming_content(self, content):
+        chunk_size = 5  # Adjust the chunk size as needed
+        for i in range(0, len(content), chunk_size):
+            yield {'choices': [{'delta': {'role': 'assistant', 'content': content[i:i+chunk_size]}}]}
+            time.sleep(0.02)
+    
    def get_cache(self, *args, **kwargs):
        try:  # never block execution
            cache_key = self.get_cache_key(*args, **kwargs)
            if cache_key is not None:
-                return self.cache.get_cache(cache_key)
+                cached_result = self.cache.get_cache(cache_key)
+                if cached_result != None and 'stream' in kwargs and kwargs['stream'] == True:
+                    # if streaming is true and we got a cache hit, return a generator
+                    #print("cache hit and stream=True")
+                    #print(cached_result)
+                    return self.generate_streaming_content(cached_result["choices"][0]['message']['content'])
+                return cached_result
        except:
            return None