diff --git a/dist/litellm-0.12.4.dev1-py3-none-any.whl b/dist/litellm-0.12.4.dev1-py3-none-any.whl
new file mode 100644
index 000000000..91f0678cf
Binary files /dev/null and b/dist/litellm-0.12.4.dev1-py3-none-any.whl differ
diff --git a/dist/litellm-0.12.4.dev1.tar.gz b/dist/litellm-0.12.4.dev1.tar.gz
new file mode 100644
index 000000000..36d5d61e6
Binary files /dev/null and b/dist/litellm-0.12.4.dev1.tar.gz differ
diff --git a/litellm/router.py b/litellm/router.py
index 796cbb985..7123154cf 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -25,7 +25,8 @@ class Router:
                  model_list: Optional[list]=None,
                  redis_host: Optional[str] = None,
                  redis_port: Optional[int] = None,
-                 redis_password: Optional[str] = None) -> None:
+                 redis_password: Optional[str] = None, 
+                 cache_responses: bool = False) -> None:
         if model_list:
             self.model_list = model_list
             self.model_names = [m["model_name"] for m in model_list]
@@ -41,7 +42,8 @@ class Router:
                 "type": "local"
             }
         self.cache = litellm.Cache(cache_config) # use Redis for tracking load balancing
-        litellm.cache = litellm.Cache(**cache_config) # use Redis for caching completion requests 
+        if cache_responses:
+            litellm.cache = litellm.Cache(**cache_config) # use Redis for caching completion requests 
         litellm.success_callback = [self.deployment_callback]
     
     def completion(self,
@@ -58,9 +60,10 @@ class Router:
 
         # pick the one that is available (lowest TPM/RPM)
         deployment = self.get_available_deployment(model=model, messages=messages)
-
+        print(f"kwargs: {kwargs}")
         data = deployment["litellm_params"]
         data["messages"] = messages
+        print(f"data: {data}")
         # call via litellm.completion() 
         return litellm.completion(**{**data, **kwargs})
     
diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py
index a2a311c5a..e581c435d 100644
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@@ -65,7 +65,6 @@ def test_multiple_deployments():
 ### FUNCTION CALLING 
 
 def test_function_calling(): 
-	litellm.set_verbose =True
 	model_list = [
 		{
 			"model_name": "gpt-3.5-turbo-0613",
@@ -151,3 +150,5 @@ def test_litellm_params_not_overwritten_by_function_calling():
 		assert response.choices[0].finish_reason != "function_call"
 	except Exception as e:
 		pytest.fail(f"Error occurred: {e}")
+
+test_litellm_params_not_overwritten_by_function_calling()
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index 9c2611012..e872c8922 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -771,13 +771,15 @@ def client(original_function):
 
             # [OPTIONAL] CHECK CACHE
             # remove this after deprecating litellm.caching
+            print_verbose(f"litellm.caching: {litellm.caching}; litellm.caching_with_models: {litellm.caching_with_models}")
             if (litellm.caching or litellm.caching_with_models) and litellm.cache is None:
                 litellm.cache = Cache() 
 
+            print_verbose(f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}")
             if kwargs.get("caching", False) or litellm.cache is not None: # allow users to control returning cached responses from the completion function
                 # checking cache
                 if (litellm.cache != None or litellm.caching or litellm.caching_with_models):
-                    print_verbose(f"LiteLLM: Checking Cache")
+                    print_verbose(f"Checking Cache")
                     cached_result = litellm.cache.get_cache(*args, **kwargs)
                     if cached_result != None:
                         print_verbose(f"Cache Hit!")