adding exact match caching

2025-04-27 11:43:54 +00:00 · 2023-08-16 21:44:50 -07:00 · 2023-08-16 21:44:50 -07:00 · 79bcb59e0b
commit 79bcb59e0b
parent 4d475793ee
7 changed files with 41 additions and 3 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -15,7 +15,7 @@ openrouter_key = None
 huggingface_key = None
 vertex_project = None
 vertex_location = None
-
+caching = False
 hugging_api_token = None
 model_cost = {
    "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
--- a/litellm/pycache/init.cpython-311.pyc
+++ b/litellm/pycache/init.cpython-311.pyc
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -0,0 +1,27 @@
 import sys, os
 import traceback
 from dotenv import load_dotenv
 load_dotenv()
 import os
 sys.path.insert(0, os.path.abspath('../..'))  # Adds the parent directory to the system path
 import pytest
 import litellm
 from litellm import embedding, completion
 litellm.caching = True
 messages = [{"role": "user", "content": "Hey, how's it going?"}]
 # test if response cached
 try:
    response1 = completion(model="gpt-3.5-turbo", messages=messages)
    response2 = completion(model="gpt-3.5-turbo", messages=messages)
    if response2 != response1:
        print(f"response1: {response1}")
        print(f"response2: {response2}")
        raise Exception
 except Exception as e:
    print(f"error occurred: {traceback.format_exc()}") 
    pytest.fail(f"Error occurred: {e}")
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -213,7 +213,7 @@ def test_completion_together_ai_stream():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
-
+test_completion_together_ai_stream()
 def test_petals():
    model_name = "stabilityai/StableBeluga2"
    try:
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -28,6 +28,7 @@ supabaseClient = None
 callback_list = []
 user_logger_fn = None
 additional_details = {}
 local_cache = {}
 def print_verbose(print_statement):
  if litellm.set_verbose:
@ -138,12 +139,22 @@ def client(original_function):
    def wrapper(*args, **kwargs):
        start_time = None
        result = None
        try:
          function_setup(*args, **kwargs)
          ## MODEL CALL
          start_time = datetime.datetime.now()
          ## CHECK CACHE RESPONSES 
          messages = args[1] if len(args) > 1 else kwargs["messages"]
          prompt = " ".join(message["content"] for message in messages) 
          if litellm.caching and prompt in local_cache:
            result = local_cache[prompt]
          else:
            result = original_function(*args, **kwargs)
          end_time = datetime.datetime.now()
          ## CACHE RESPONSES 
          if litellm.caching:
            local_cache[prompt] = result
          ## LOG SUCCESS 
          crash_reporting(*args, **kwargs)
          my_thread = threading.Thread(target=handle_success, args=(args, kwargs, result, start_time, end_time)) # don't interrupt execution of main thread