diff --git a/.flake8 b/.flake8
new file mode 100644
index 000000000..b51cc0045
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+ignore = E,F,W,B,B9,C,D,I,N,S,W503,W504,E203, TCE,TCA,EXE999,E999,TD
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 000000000..8bda916bc
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,8 @@
+repos:
+-   repo: https://github.com/pycqa/flake8
+    rev: 3.8.4  # The version of flake8 to use
+    hooks:
+    -  id: flake8
+       exclude: ^litellm/tests/|^litellm/proxy/|^litellm/integrations/
+       additional_dependencies: [flake8-print]
+       files: litellm/.*\.py
\ No newline at end of file
diff --git a/litellm/budget_manager.py b/litellm/budget_manager.py
index 77a1e51f2..6a9d1e520 100644
--- a/litellm/budget_manager.py
+++ b/litellm/budget_manager.py
@@ -14,7 +14,8 @@ class BudgetManager:
     
     def print_verbose(self, print_statement):
         if litellm.set_verbose:
-            print(print_statement)
+            import logging
+            logging.info(print_statement)
     
     def load_data(self):
         if self.client_type == "local":
@@ -149,8 +150,6 @@ class BudgetManager:
                 'project_name' : self.project_name, 
                 "user_dict": self.user_dict
             }
-            print(f"data: {data}")
             response = requests.post(url, headers=headers, json=data)
-            print(f"response: {response.text}")
             response = response.json()
             return response
\ No newline at end of file
diff --git a/litellm/caching.py b/litellm/caching.py
index 667eff622..9632a6b03 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -8,7 +8,7 @@
 #  Thank you users! We ❤️ you! - Krrish & Ishaan
 
 import litellm
-import time
+import time, logging
 import json, traceback
 
 
@@ -37,7 +37,6 @@ class RedisCache(BaseCache):
     def __init__(self, host, port, password):
         import redis
         # if users don't provider one, use the default litellm cache
-        print(f"HOST: {host}; PORT: {port}; PASSWORD: {password}")
         self.redis_client = redis.Redis(host=host, port=port, password=password)
 
     def set_cache(self, key, value, **kwargs):
@@ -46,7 +45,7 @@ class RedisCache(BaseCache):
             self.redis_client.set(name=key, value=str(value), ex=ttl)
         except Exception as e:
             # NON blocking - notify users Redis is throwing an exception
-            print("LiteLLM Caching: set() - Got exception from REDIS : ", e)
+            logging.debug("LiteLLM Caching: set() - Got exception from REDIS : ", e)
 
     def get_cache(self, key, **kwargs):
         try:
@@ -61,13 +60,13 @@ class RedisCache(BaseCache):
         except Exception as e:
             # NON blocking - notify users Redis is throwing an exception
             traceback.print_exc()
-            print("LiteLLM Caching: get() - Got exception from REDIS: ", e)
+            logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
 
 
 class HostedCache(BaseCache):
     def set_cache(self, key, value, **kwargs):
         if "ttl" in kwargs:
-            print("LiteLLM Caching: TTL is not supported for hosted cache!")
+            logging.debug("LiteLLM Caching: TTL is not supported for hosted cache!")
         # make a post request to api.litellm.ai/set_cache
         import requests
         url = f"https://api.litellm.ai/set_cache?key={key}&value={str(value)}"
@@ -200,12 +199,10 @@ class Cache:
                 cached_result = self.cache.get_cache(cache_key)
                 if cached_result != None and 'stream' in kwargs and kwargs['stream'] == True:
                     # if streaming is true and we got a cache hit, return a generator
-                    # print("cache hit and stream=True")
-                    # print(cached_result)
                     return self.generate_streaming_content(cached_result["choices"][0]['message']['content'])
                 return cached_result
         except Exception as e:
-            print(f"An exception occurred: {traceback.format_exc()}")
+            logging.debug(f"An exception occurred: {traceback.format_exc()}")
             return None
 
     def add_cache(self, result, *args, **kwargs):
@@ -224,10 +221,7 @@ class Cache:
                 cache_key = kwargs["cache_key"]
             else:
                 cache_key = self.get_cache_key(*args, **kwargs)
-            # print("adding to cache", cache_key, result)
-            # print(cache_key)
             if cache_key is not None:
-                # print("adding to cache", cache_key, result)
                 self.cache.set_cache(cache_key, result, **kwargs)
         except:
             pass
diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py
index 20a1c1fde..e6f48a5bd 100644
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@@ -36,10 +36,6 @@ class LangFuseLogger:
             print_verbose(
                 f"Langfuse Logging - Enters logging function for model {kwargs}"
             )
-            # print(response_obj)
-            # print(response_obj['choices'][0]['message']['content'])
-            # print(response_obj['usage']['prompt_tokens'])
-            # print(response_obj['usage']['completion_tokens'])
             metadata = kwargs.get("metadata", {})
             prompt = [kwargs['messages']]
 
diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py
index e1835d8c0..20e96686f 100644
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@@ -397,7 +397,6 @@ def completion(
         outputText = response_body.get('results')[0].get('outputText')
 
     response_metadata = response.get("ResponseMetadata", {})
-    print(f"response_metadata: {response_metadata}")
     if response_metadata.get("HTTPStatusCode", 500) >= 400:
         raise BedrockError(
             message=outputText,
diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py
index 3a0530803..add9c8d7f 100644
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@@ -147,7 +147,6 @@ def get_ollama_response_stream(
                                 yield completion_obj
                 except Exception as e:
                     traceback.print_exc()
-                    print(f"Error decoding JSON: {e}")
     session.close()
 
 if async_generator_imported:
@@ -198,5 +197,6 @@ if async_generator_imported:
                                     completion_obj["content"] = j["response"]
                                     await yield_({"choices": [{"delta": completion_obj}]})
                     except Exception as e:
-                        print(f"Error decoding JSON: {e}")
+                        import logging
+                        logging.debug(f"Error decoding JSON: {e}")
         session.close()
\ No newline at end of file
diff --git a/litellm/main.py b/litellm/main.py
index 143aa29ea..b4bb6d9a2 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -1624,13 +1624,9 @@ def batch_completion_models_all_responses(*args, **kwargs):
 
     with concurrent.futures.ThreadPoolExecutor(max_workers=len(models)) as executor:
         for idx, model in enumerate(models):
-            print(f"{GREEN}LiteLLM: Making request to model: {model}{RESET}")
             future = executor.submit(completion, *args, model=model, **kwargs)
             if future.result() is not None:
                 responses.append(future.result())
-                print(f"{GREEN}LiteLLM: Model {model} returned response{RESET}")
-            else:
-                print(f"{RED}LiteLLM: Model {model } did not return a response{RESET}")
 
     return responses
 
@@ -1863,6 +1859,7 @@ def embedding(
 
 ###### Text Completion ################
 def text_completion(*args, **kwargs):
+    global print_verbose
     import copy
     """
     This maps to the Openai.Completion.create format, which has a different I/O (accepts prompt, returning ["choices"]["text"].
@@ -1930,7 +1927,7 @@ def text_completion(*args, **kwargs):
             raw_response = response._hidden_params.get("original_response", None)
             transformed_logprobs = litellm.utils.transform_logprobs(raw_response)
         except Exception as e:
-            print("LiteLLM non blocking exception", e)
+            print_verbose("LiteLLM non blocking exception", e)
         text_completion_response["id"] = response["id"]
         text_completion_response["object"] = "text_completion"
         text_completion_response["created"] = response["created"]
@@ -1964,7 +1961,8 @@ def moderation(input: str, api_key: Optional[str]=None):
 ## Set verbose to true -> ```litellm.set_verbose = True```
 def print_verbose(print_statement):
     if litellm.set_verbose:
-        print(f"LiteLLM: {print_statement}")
+        import logging
+        logging.info(f"LiteLLM: {print_statement}")
 
 def config_completion(**kwargs):
     if litellm.config_path != None:
diff --git a/litellm/utils.py b/litellm/utils.py
index 7e4587700..935f5e36c 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -285,7 +285,8 @@ class TextCompletionResponse(OpenAIObject):
 ############################################################
 def print_verbose(print_statement):
     if litellm.set_verbose:
-        print(f"LiteLLM: {print_statement}")
+        import logging
+        logging.info(f"LiteLLM: {print_statement}")
 
 ####### LOGGING ###################
 from enum import Enum
@@ -538,8 +539,6 @@ class Logging:
                         print_verbose("reaches api manager for updating model cost")
                         litellm.apiManager.update_cost(completion_obj=result, user=self.user)
                     if callback == "cache":
-                        # print("entering logger first time")
-                        # print(self.litellm_params["stream_response"])
                         if litellm.cache != None and self.model_call_details.get('optional_params', {}).get('stream', False) == True:
                             litellm_call_id = self.litellm_params["litellm_call_id"]
                             if litellm_call_id in self.litellm_params["stream_response"]:
@@ -550,10 +549,7 @@ class Logging:
                                     self.litellm_params["stream_response"][litellm_call_id]["choices"][0]["message"]["content"] += result["content"]
                             else: # init a streaming response for this call id
                                 new_model_response = ModelResponse(choices=[Choices(message=Message(content="default"))])
-                                #print("creating new model response")
-                                #print(new_model_response)
                                 self.litellm_params["stream_response"][litellm_call_id] = new_model_response
-                            #print("adding to cache for", litellm_call_id)                              
                             litellm.cache.add_cache(self.litellm_params["stream_response"][litellm_call_id], **self.model_call_details)
                     if callback == "promptlayer":
                         print_verbose("reaches promptlayer for logging!")
@@ -576,7 +572,6 @@ class Logging:
                                 print_verbose("reaches supabase for streaming logging!")
                                 result = kwargs["complete_streaming_response"]
       
-                        # print(kwargs)
                         model = kwargs["model"]
                         messages = kwargs["messages"]
                         optional_params = kwargs.get("optional_params", {})
@@ -732,11 +727,11 @@ def exception_logging(
                     model_call_details
                 )  # Expectation: any logger function passed in by the user should accept a dict object
             except Exception as e:
-                print(
+                print_verbose(
                     f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
                 )
     except Exception as e:
-        print(
+        print_verbose(
             f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while logging {traceback.format_exc()}"
         )
         pass
@@ -799,7 +794,6 @@ def client(original_function):
             return logging_obj
         except Exception as e:  # DO NOT BLOCK running the function because of this
             print_verbose(f"[Non-Blocking] {traceback.format_exc()}; args - {args}; kwargs - {kwargs}")
-            print(e)
         pass
     
     def crash_reporting(*args, **kwargs):
@@ -1776,9 +1770,9 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_
             custom_llm_provider = "bedrock"
 
         if custom_llm_provider is None or custom_llm_provider=="":
-            print()
-            print("\033[1;31mProvider List: https://docs.litellm.ai/docs/providers\033[0m")
-            print()
+            print() # noqa
+            print("\033[1;31mProvider List: https://docs.litellm.ai/docs/providers\033[0m") # noqa
+            print() # noqa
             raise ValueError(f"LLM Provider NOT provided. Pass in the LLM provider you are trying to call. E.g. For 'Huggingface' inference endpoints pass in `completion(model='huggingface/{model}',..)` Learn more: https://docs.litellm.ai/docs/providers")
         return model, custom_llm_provider, dynamic_api_key, api_base
     except Exception as e: 
@@ -2772,7 +2766,7 @@ def get_all_keys(llm_provider=None):
 
 
 def get_model_list():
-    global last_fetched_at
+    global last_fetched_at, print_verbose
     try:
         # if user is using hosted product -> get their updated model list
         user_email = (
@@ -2784,7 +2778,7 @@ def get_model_list():
         if user_email:
             # make the api call
             last_fetched_at = time.time()
-            print(f"last_fetched_at: {last_fetched_at}")
+            print_verbose(f"last_fetched_at: {last_fetched_at}")
             response = requests.post(
                 url="http://api.litellm.ai/get_model_list",
                 headers={"content-type": "application/json"},
@@ -2820,10 +2814,10 @@ def exception_type(
     global user_logger_fn, liteDebuggerClient
     exception_mapping_worked = False
     if litellm.suppress_debug_info is False:
-        print()
-        print("\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m")
-        print("LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.")
-        print()
+        print() # noqa
+        print("\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m") # noqa
+        print("LiteLLM.Info: If you need to debug this error, use `litellm.set_verbose=True'.") # noqa
+        print() # noqa
     try:
         if isinstance(original_exception, OriginalError):
             # Handle the OpenAIError
@@ -3401,7 +3395,7 @@ def exception_type(
                         model=model
                     )
                 elif hasattr(original_exception, "status_code"):
-                    print(f"status code: {original_exception.status_code}")
+                    print_verbose(f"status code: {original_exception.status_code}")
                     if original_exception.status_code == 401:
                         exception_mapping_worked = True
                         raise AuthenticationError(
@@ -4267,12 +4261,11 @@ def completion_with_fallbacks(**kwargs):
                     return response
 
             except Exception as e:
-                print(e)
+                print_verbose(e)
                 rate_limited_models.add(model)
                 model_expiration_times[model] = (
                     time.time() + 60
                 )  # cool down this selected model
-                # print(f"rate_limited_models {rate_limited_models}")
                 pass
     return response
 
@@ -4417,7 +4410,7 @@ def trim_messages(
 
         return final_messages
     except Exception as e: # [NON-Blocking, if error occurs just return final_messages
-        print("Got exception while token trimming", e)
+        print_verbose("Got exception while token trimming", e)
         return messages
 
 def get_valid_models():