adding exact match caching

2025-04-27 11:43:54 +00:00 · 2023-08-16 21:44:50 -07:00 · 2023-08-16 21:44:50 -07:00 · 79bcb59e0b
commit 79bcb59e0b
parent 4d475793ee
7 changed files with 41 additions and 3 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -15,7 +15,7 @@ openrouter_key = None
 huggingface_key = None
 vertex_project = None
 vertex_location = None
-
+caching = False
 hugging_api_token = None
 model_cost = {
    "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
--- a/litellm/pycache/init.cpython-311.pyc
+++ b/litellm/pycache/init.cpython-311.pyc
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -0,0 +1,27 @@
+import sys, os
+import traceback
+from dotenv import load_dotenv
+load_dotenv()
+import os
+sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
+import pytest
+import litellm
+from litellm import embedding, completion
+
+litellm.caching = True
+messages = [{"role": "user", "content": "Hey, how's it going?"}]
+
+
+
+# test if response cached
+try:
+    response1 = completion(model="gpt-3.5-turbo", messages=messages)
+    response2 = completion(model="gpt-3.5-turbo", messages=messages)
+    if response2 != response1:
+        print(f"response1: {response1}")
+        print(f"response2: {response2}")
+        raise Exception
+except Exception as e:
+    print(f"error occurred: {traceback.format_exc()}") 
+    pytest.fail(f"Error occurred: {e}")
+
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -213,7 +213,7 @@ def test_completion_together_ai_stream():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")

-
+test_completion_together_ai_stream()
 def test_petals():
    model_name = "stabilityai/StableBeluga2"
    try:
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -28,6 +28,7 @@ supabaseClient = None
 callback_list = []
 user_logger_fn = None
 additional_details = {}
+local_cache = {}

 def print_verbose(print_statement):
  if litellm.set_verbose:
@ -138,12 +139,22 @@ def client(original_function):

    def wrapper(*args, **kwargs):
        start_time = None
+        result = None
        try:
          function_setup(*args, **kwargs)
          ## MODEL CALL
          start_time = datetime.datetime.now()
+          ## CHECK CACHE RESPONSES 
+          messages = args[1] if len(args) > 1 else kwargs["messages"]
+          prompt = " ".join(message["content"] for message in messages) 
+          if litellm.caching and prompt in local_cache:
+            result = local_cache[prompt]
+          else:
            result = original_function(*args, **kwargs)
          end_time = datetime.datetime.now()
+          ## CACHE RESPONSES 
+          if litellm.caching:
+            local_cache[prompt] = result
          ## LOG SUCCESS 
          crash_reporting(*args, **kwargs)
          my_thread = threading.Thread(target=handle_success, args=(args, kwargs, result, start_time, end_time)) # don't interrupt execution of main thread