diff --git a/litellm/__init__.py b/litellm/__init__.py
index 363cb6205..ef2a5b3cf 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -30,9 +30,9 @@ togetherai_api_key: Optional[str] = None
 baseten_key: Optional[str] = None
 use_client = False
 logging = True
-caching = False
-cache: Optional[Cache] = None # set to litellm.caching Cache() object
-caching_with_models = False  # if you want the caching key to be model + prompt
+caching = False # deprecated son
+caching_with_models = False  # if you want the caching key to be model + prompt # deprecated soon
+cache: Optional[Cache] = None # cache object
 model_alias_map: Dict[str, str] = {}
 model_cost = {
     "babbage-002": {
diff --git a/litellm/caching.py b/litellm/caching.py
index 3cab7d460..d6a7ddacd 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -75,7 +75,6 @@ class Cache():
             if cache_key is not None:
                 self.cache.set_cache(cache_key, result)
         except:
-        
             pass
 
 
@@ -83,6 +82,3 @@ class Cache():
 
 
 
-
-
-
diff --git a/litellm/tests/test_caching.py b/litellm/tests/test_caching.py
index 1845f3b47..7a6ca0882 100644
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@@ -29,13 +29,11 @@ def test_caching():
         if response2 != response1:
             print(f"response1: {response1}")
             print(f"response2: {response2}")
-            pytest.fail(f"Error occurred: {e}")
+            pytest.fail(f"Error occurred: responses are not equal")
     except Exception as e:
         litellm.caching = False
-        print(f"error occurred: {traceback.format_exc()}")
         pytest.fail(f"Error occurred: {e}")
 
-
 def test_caching_with_models():
     litellm.caching_with_models = True
     response1 = completion(model="gpt-3.5-turbo", messages=messages)
diff --git a/litellm/utils.py b/litellm/utils.py
index 8e9b1dd0e..bc9789125 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -26,6 +26,7 @@ from .exceptions import (
     OpenAIError,
 )
 from typing import List, Dict, Union, Optional
+from .caching import Cache
 
 
 ####### ENVIRONMENT VARIABLES ####################
@@ -402,11 +403,16 @@ def client(original_function):
             kwargs["litellm_call_id"] = litellm_call_id
             start_time = datetime.datetime.now()
             # [OPTIONAL] CHECK CACHE
-            if (litellm.caching or litellm.caching_with_models or litellm.cache != None) and (
-                cached_result := litellm.cache.get_cache(*args, **kwargs)
-            ) is not None:
-                result = cached_result
-                return result
+            # remove this after deprecating litellm.caching
+            if (litellm.caching or litellm.caching_with_models) and litellm.cache is None:
+                litellm.cache = Cache() 
+
+            # checking cache
+            if (litellm.cache != None or litellm.caching or litellm.caching_with_models):
+                cached_result = litellm.cache.get_cache(*args, **kwargs)
+                if cached_result != None:
+                    return cached_result
+
             # MODEL CALL
             result = original_function(*args, **kwargs)
             if "stream" in kwargs and kwargs["stream"] == True: