diff --git a/litellm/tests/test_caching.py b/litellm/tests/test_caching.py
index fefe024a8..8c4a428df 100644
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@@ -14,7 +14,6 @@ from litellm import embedding, completion
 
 messages = [{"role": "user", "content": "who is ishaan Github?  "}]
 
-
 # test if response cached
 def test_caching():
     try:
@@ -36,6 +35,7 @@ def test_caching():
 
 def test_caching_with_models():
     litellm.caching_with_models = True
+    response1 = completion(model="gpt-3.5-turbo", messages=messages)
     response2 = completion(model="gpt-3.5-turbo", messages=messages)
     response3 = completion(model="command-nightly", messages=messages)
     print(f"response2: {response2}")
@@ -46,6 +46,12 @@ def test_caching_with_models():
         print(f"response2: {response2}")
         print(f"response3: {response3}")
         pytest.fail(f"Error occurred:")
+    if response1 != response2:
+        print(f"response1: {response1}")
+        print(f"response2: {response2}")
+        pytest.fail(f"Error occurred:")
+# test_caching_with_models()
+
 
 
 def test_gpt_cache():
diff --git a/litellm/utils.py b/litellm/utils.py
index 719ec6514..309d90396 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -410,19 +410,20 @@ def client(original_function):
     def check_cache(*args, **kwargs):
         try:  # never block execution
             prompt = get_prompt(*args, **kwargs)
-            if (prompt != None and prompt
-                    in local_cache):  # check if messages / prompt exists
+            if (prompt != None):  # check if messages / prompt exists
                 if litellm.caching_with_models:
                     # if caching with model names is enabled, key is prompt + model name
-                    if ("model" in kwargs and kwargs["model"]
-                            in local_cache[prompt]["models"]):
+                    if ("model" in kwargs):
                         cache_key = prompt + kwargs["model"]
-                        return local_cache[cache_key]
+                        if cache_key in local_cache:
+                            return local_cache[cache_key]
                 else:  # caching only with prompts
-                    result = local_cache[prompt]
-                    return result
+                    if prompt in local_cache:
+                        result = local_cache[prompt]
+                        return result
             else:
                 return None
+            return None # default to return None
         except:
             return None
 
@@ -430,8 +431,7 @@ def client(original_function):
         try:  # never block execution
             prompt = get_prompt(*args, **kwargs)
             if litellm.caching_with_models:  # caching with model + prompt
-                if ("model" in kwargs
-                        and kwargs["model"] in local_cache[prompt]["models"]):
+                if ("model" in kwargs):
                     cache_key = prompt + kwargs["model"]
                     local_cache[cache_key] = result
             else:  # caching based only on prompts