working with embeddings

2023-08-28 14:07:51 -07:00 · 2023-08-28 14:07:51 -07:00 · 71ff0c69ab
commit 71ff0c69ab
parent 53018e4b23
3 changed files with 56 additions and 18 deletions
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -32,11 +32,15 @@ class InMemoryCache():
        self.cache_dict = {}

    def set_cache(self, key, value):
+        #print("in set cache for inmem")
        self.cache_dict[key] = value

    def get_cache(self, key):
+        #print("in get cache for inmem")
        if key in self.cache_dict:
+            #print("got a cache hit")
            return self.cache_dict[key]
+        #print("got a cache miss")
        return None

 class Cache():
@ -46,27 +50,35 @@ class Cache():
        if type == "local":
            self.cache = InMemoryCache()

-    def check_cache(self, *args, **kwargs):
-        try:  # never block execution
+    def get_cache_key(self, *args, **kwargs):
        prompt = get_prompt(*args, **kwargs)
-            if prompt != None:  # check if messages / prompt exists
-                if "model" in kwargs: # default to caching with `model + prompt` as key
-                    cache_key = prompt + kwargs["model"]
-                    return self.cache.get_cache(cache_key)
+        if prompt is not None:
+            cache_key = prompt
+            if "model" in kwargs:
+                cache_key += kwargs["model"]
+        elif "input" in kwargs:
+            cache_key = " ".join(kwargs["input"])
+            if "model" in kwargs:
+                cache_key += kwargs["model"]
        else:
-                return self.cache.get_cache(prompt)
+            return None
+        return cache_key
+
+    def get_cache(self, *args, **kwargs):
+        try:  # never block execution
+            cache_key = self.get_cache_key(*args, **kwargs)
+            if cache_key is not None:
+                return self.cache.get_cache(cache_key)
        except:
            return None

    def add_cache(self, result, *args, **kwargs):
        try:
-            prompt = get_prompt(*args, **kwargs)
-            if "model" in kwargs: # default to caching with `model + prompt` as key
-                cache_key = prompt + kwargs["model"]
+            cache_key = self.get_cache_key(*args, **kwargs)
+            if cache_key is not None:
                self.cache.set_cache(cache_key, result)
-            else:
-                self.cache.set_cache(prompt, result)
        except:
+        
            pass


@ -77,5 +89,3 @@ class Cache():



-
-
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -12,6 +12,7 @@ import pytest
 import litellm
 from litellm import embedding, completion
 from litellm.caching import Cache
+# litellm.set_verbose=True

 messages = [{"role": "user", "content": "who is ishaan Github?  "}]
 # comment
@ -83,7 +84,7 @@ def test_gpt_cache():

 ####### Updated Caching as of Aug 28, 2023 ###################
 messages = [{"role": "user", "content": "who is ishaan 5222"}]
-def test_caching():
+def test_caching_v2():
    try:
        litellm.cache = Cache()
        response1 = completion(model="gpt-3.5-turbo", messages=messages)
@ -102,7 +103,7 @@ def test_caching():
 # test_caching()


-def test_caching_with_models():
+def test_caching_with_models_v2():
    messages = [{"role": "user", "content": "who is ishaan CTO of litellm from litellm 2023"}]
    litellm.cache = Cache()
    print("test2 for caching")
@ -123,6 +124,33 @@ def test_caching_with_models():
        print(f"response2: {response2}")
        pytest.fail(f"Error occurred:")

+
+embedding_large_text = """
+small text
+""" * 5
+
 # test_caching_with_models()
+def test_embedding_caching():
+    import time
+    litellm.cache = Cache()
+    text_to_embed = [embedding_large_text]
+    start_time = time.time()
+    embedding1 = embedding(model="text-embedding-ada-002", input=text_to_embed)
+    end_time = time.time()
+    print(f"Embedding 1 response time: {end_time - start_time} seconds")
+    
+    time.sleep(1)
+    start_time = time.time()
+    embedding2 = embedding(model="text-embedding-ada-002", input=text_to_embed)
+    end_time = time.time()
+    print(f"Embedding 2 response time: {end_time - start_time} seconds")
+    
+    litellm.cache = None
+    if embedding2 != embedding1:
+        print(f"embedding1: {embedding1}")
+        print(f"embedding2: {embedding2}")
+        pytest.fail("Error occurred: Embedding caching failed")
+
+# test_embedding_caching()


--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -403,7 +403,7 @@ def client(original_function):
            start_time = datetime.datetime.now()
            # [OPTIONAL] CHECK CACHE
            if (litellm.caching or litellm.caching_with_models or litellm.cache != None) and (
-                cached_result := litellm.cache.check_cache(*args, **kwargs)
+                cached_result := litellm.cache.get_cache(*args, **kwargs)
            ) is not None:
                result = cached_result
                return result