diff --git a/litellm/tests/test_caching.py b/litellm/tests/test_caching.py index fefe024a8..8c4a428df 100644 --- a/litellm/tests/test_caching.py +++ b/litellm/tests/test_caching.py @@ -14,7 +14,6 @@ from litellm import embedding, completion messages = [{"role": "user", "content": "who is ishaan Github? "}] - # test if response cached def test_caching(): try: @@ -36,6 +35,7 @@ def test_caching(): def test_caching_with_models(): litellm.caching_with_models = True + response1 = completion(model="gpt-3.5-turbo", messages=messages) response2 = completion(model="gpt-3.5-turbo", messages=messages) response3 = completion(model="command-nightly", messages=messages) print(f"response2: {response2}") @@ -46,6 +46,12 @@ def test_caching_with_models(): print(f"response2: {response2}") print(f"response3: {response3}") pytest.fail(f"Error occurred:") + if response1 != response2: + print(f"response1: {response1}") + print(f"response2: {response2}") + pytest.fail(f"Error occurred:") +# test_caching_with_models() + def test_gpt_cache(): diff --git a/litellm/utils.py b/litellm/utils.py index 719ec6514..309d90396 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -410,19 +410,20 @@ def client(original_function): def check_cache(*args, **kwargs): try: # never block execution prompt = get_prompt(*args, **kwargs) - if (prompt != None and prompt - in local_cache): # check if messages / prompt exists + if (prompt != None): # check if messages / prompt exists if litellm.caching_with_models: # if caching with model names is enabled, key is prompt + model name - if ("model" in kwargs and kwargs["model"] - in local_cache[prompt]["models"]): + if ("model" in kwargs): cache_key = prompt + kwargs["model"] - return local_cache[cache_key] + if cache_key in local_cache: + return local_cache[cache_key] else: # caching only with prompts - result = local_cache[prompt] - return result + if prompt in local_cache: + result = local_cache[prompt] + return result else: return None + return None # default to return None except: return None @@ -430,8 +431,7 @@ def client(original_function): try: # never block execution prompt = get_prompt(*args, **kwargs) if litellm.caching_with_models: # caching with model + prompt - if ("model" in kwargs - and kwargs["model"] in local_cache[prompt]["models"]): + if ("model" in kwargs): cache_key = prompt + kwargs["model"] local_cache[cache_key] = result else: # caching based only on prompts