diff --git a/litellm/__init__.py b/litellm/__init__.py
index b0c53be1c..01e40bc69 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -3,6 +3,7 @@ failure_callback = []
 set_verbose=False
 telemetry=True
 max_tokens = 256 # OpenAI Defaults
+retry = True # control tenacity retries. 
 ####### PROXY PARAMS ################### configurable params if you use proxy models like Helicone
 api_base = None
 headers = None
diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 000000000..72f69328e
Binary files /dev/null and b/litellm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
new file mode 100644
index 000000000..114fba278
Binary files /dev/null and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/timeout.cpython-311.pyc b/litellm/__pycache__/timeout.cpython-311.pyc
new file mode 100644
index 000000000..c7211e4a7
Binary files /dev/null and b/litellm/__pycache__/timeout.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
new file mode 100644
index 000000000..15aa08235
Binary files /dev/null and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/integrations/__pycache__/__init__.cpython-311.pyc b/litellm/integrations/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 000000000..6ece0d4ef
Binary files /dev/null and b/litellm/integrations/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/main.py b/litellm/main.py
index 898f40ae4..c414fd5fc 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -7,6 +7,11 @@ import traceback
 import litellm
 from litellm import client, logging, exception_type, timeout, success_callback, failure_callback
 import random
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_random_exponential,
+)  # for exponential backoff
 ####### ENVIRONMENT VARIABLES ###################
 dotenv.load_dotenv() # Loading env variables using dotenv
 
@@ -55,6 +60,7 @@ def get_optional_params(
 ####### COMPLETION ENDPOINTS ################
 #############################################
 @client
+@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(2), reraise=True, retry_error_callback=lambda retry_state: setattr(retry_state.outcome, 'retry_variable', litellm.retry)) # retry call, turn this off by setting `litellm.retry = False`
 @timeout(60) ## set timeouts, in case calls hang (e.g. Azure) - default is 60s, override with `force_timeout`
 def completion(
     model, messages, # required params
diff --git a/litellm/tests/test_api_key_param.py b/litellm/tests/test_api_key_param.py
index bd9569495..7ffffcda8 100644
--- a/litellm/tests/test_api_key_param.py
+++ b/litellm/tests/test_api_key_param.py
@@ -15,13 +15,14 @@ def logger_fn(model_call_object: dict):
 user_message = "Hello, how are you?"
 messages = [{ "content": user_message,"role": "user"}]
 
+print(os.environ)
 temp_key = os.environ.get("OPENAI_API_KEY")
 os.environ["OPENAI_API_KEY"] = "bad-key"
 # test on openai completion call 
 try:
     response = completion(model="gpt-3.5-turbo", messages=messages, logger_fn=logger_fn, api_key=temp_key)
+    print(f"response: {response}")
 except:
     print(f"error occurred: {traceback.format_exc()}") 
     pass
-
 os.environ["OPENAI_API_KEY"] = temp_key
diff --git a/litellm/tests/test_bad_params.py b/litellm/tests/test_bad_params.py
index 53872e5fa..0a2313c78 100644
--- a/litellm/tests/test_bad_params.py
+++ b/litellm/tests/test_bad_params.py
@@ -4,7 +4,8 @@
 
 import sys, os
 import traceback
-
+from dotenv import load_dotenv
+load_dotenv()
 # Get the current directory of the script
 current_dir = os.path.dirname(os.path.abspath(__file__))
 
@@ -19,7 +20,7 @@ import litellm
 from litellm import embedding, completion
 
 
-
+litellm.set_verbose = True
 litellm.success_callback = ["posthog"]
 litellm.failure_callback = ["slack", "sentry", "posthog"]
 
@@ -35,4 +36,17 @@ def test_completion_with_empty_model():
         response = completion(model=model_val, messages=messages)
     except Exception as e:
         print(f"error occurred: {e}") 
-        pass
\ No newline at end of file
+        pass
+
+
+#bad key
+temp_key = os.environ.get("OPENAI_API_KEY")
+os.environ["OPENAI_API_KEY"] = "bad-key"
+# test on openai completion call 
+try:
+    response = completion(model="gpt-3.5-turbo", messages=messages)
+    print(f"response: {response}")
+except:
+    print(f"error occurred: {traceback.format_exc()}") 
+    pass
+os.environ["OPENAI_API_KEY"] = temp_key
\ No newline at end of file
diff --git a/litellm/tests/test_timeout.py b/litellm/tests/test_timeout.py
index fd36effc3..31f27e12b 100644
--- a/litellm/tests/test_timeout.py
+++ b/litellm/tests/test_timeout.py
@@ -18,7 +18,8 @@ start_time = time.time()
 
 try:
   stop_after_10_s(force_timeout=1)
-except:
+except Exception as e:
+  print(e)
   pass
 
 end_time = time.time()
diff --git a/litellm/timeout.py b/litellm/timeout.py
index 8cbe650ed..37bbbffc1 100644
--- a/litellm/timeout.py
+++ b/litellm/timeout.py
@@ -37,26 +37,29 @@ def timeout(
             thread = _LoopWrapper()
             thread.start()
             future = asyncio.run_coroutine_threadsafe(async_func(), thread.loop)
+            local_timeout_duration = timeout_duration
+            if "force_timeout" in kwargs:
+                local_timeout_duration = kwargs["force_timeout"]
             try:
-                local_timeout_duration = timeout_duration
-                if "force_timeout" in kwargs:
-                    local_timeout_duration = kwargs["force_timeout"]
                 result = future.result(timeout=local_timeout_duration)
             except futures.TimeoutError:
                 thread.stop_loop()
-                raise exception_to_raise()
+                raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).")
             thread.stop_loop()
             return result
 
         @wraps(func)
         async def async_wrapper(*args, **kwargs):
+            local_timeout_duration = timeout_duration
+            if "force_timeout" in kwargs:
+                local_timeout_duration = kwargs["force_timeout"]
             try:
                 value = await asyncio.wait_for(
                     func(*args, **kwargs), timeout=timeout_duration
                 )
                 return value
             except asyncio.TimeoutError:
-                raise exception_to_raise()
+                raise exception_to_raise(f"A timeout error occurred. The function call took longer than {local_timeout_duration} second(s).")
 
         if iscoroutinefunction(func):
             return async_wrapper
diff --git a/setup.py b/setup.py
index 98597b52c..d5b99c706 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 
 setup(
     name='litellm',
-    version='0.1.226',
+    version='0.1.227',
     description='Library to easily interface with LLM API providers',
     author='BerriAI',
     packages=[