updates to caching tutorial

2023-08-30 11:31:08 -07:00 · 2023-08-30 11:31:08 -07:00 · cd9b9abde3
commit cd9b9abde3
parent f3d8445ff1
2 changed files with 361 additions and 6 deletions
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -59,18 +59,37 @@ def test_caching_with_models():
 def test_gpt_cache():
    # INIT GPT Cache #
    from gptcache import cache
-    from litellm.gpt_cache import completion
+    import gptcache

-    cache.init()
+    from gptcache.processor.pre import last_content_without_prompt
+    from litellm.gpt_cache import completion
+    from typing import Dict, Any
+
+    def pre_cache_func(data: Dict[str, Any], **params: Dict[str, Any]) -> Any:
+        # use this to set cache key
+        print("in do nothing")
+        last_content_without_prompt_val = last_content_without_prompt(data, **params)
+        print("last content without prompt", last_content_without_prompt_val)
+        print("model", data["model"])
+        cache_key = last_content_without_prompt_val + data["model"]
+        print("cache_key", cache_key)
+        return cache_key
+        
+
+    cache.init(pre_func=pre_cache_func)
    cache.set_openai_key()

-    messages = [{"role": "user", "content": "what is litellm YC paul graham, partner?"}]
+    messages = [{"role": "user", "content": "why should I use LiteLLM today"}]
+    response1 = completion(model="gpt-3.5-turbo", messages=messages)
    response2 = completion(model="gpt-3.5-turbo", messages=messages)
    response3 = completion(model="command-nightly", messages=messages)
-    print(f"response2: {response2}")
-    print(f"response3: {response3}")

-    if response3["choices"] != response2["choices"]:
+    if response1["choices"] != response2["choices"]: # same models should cache 
+        print(f"response1: {response1}")
+        print(f"response2: {response2}")
+        pytest.fail(f"Error occurred:")
+
+    if response3["choices"] == response2["choices"]: # different models, don't cache 
        # if models are different, it should not return cached response
        print(f"response2: {response2}")
        print(f"response3: {response3}")