diff --git a/docs/my-website/docs/completion/reliable_completions.md b/docs/my-website/docs/completion/reliable_completions.md
index a58975c1b..7aecbed0c 100644
--- a/docs/my-website/docs/completion/reliable_completions.md
+++ b/docs/my-website/docs/completion/reliable_completions.md
@@ -1,12 +1,9 @@
 # Reliability
-
-
-
 ## Helper utils 
 LiteLLM supports the following functions for reliability:
 * `litellm.longer_context_model_fallback_dict`: Dictionary which has a mapping for those models which have larger equivalents  
 * `completion_with_retries`: use tenacity retries
-* `completion()` with fallback models: set `fallback_models=['gpt-3.5-turbo', 'command-nightly', 'llama2`]. If primary model fails try fallback models
+* `completion()` with fallbacks: switch between models/keys/api bases in case of errors. 
 
 ## Context Window Errors 
 
@@ -55,7 +52,8 @@ def test_completion_custom_provider_model_name():
         printf"Error occurred: {e}")
 ```
 
-## Specify fallback models
+## Switch Models/API Keys/API Bases
+
 LLM APIs can be unstable, completion() with fallbacks ensures you'll always get a response from your calls
 
 ## Usage 
@@ -63,8 +61,19 @@ To use fallback models with `completion()`, specify a list of models in the `fal
 
 The `fallbacks` list should include the primary model you want to use, followed by additional models that can be used as backups in case the primary model fails to provide a response.
 
+### switch models 
 ```python
-response = completion(model="bad-model", fallbacks=["gpt-3.5-turbo" "command-nightly"], messages=messages)
+response = completion(model="bad-model", messages=messages, 
+    fallbacks=["gpt-3.5-turbo" "command-nightly"])
+```
+
+### switch api keys/bases (E.g. azure deployment)
+Switch between different keys for the same azure deployment, or use another deployment as well. 
+
+```python
+api_key="bad-key"
+response = completion(model="azure/gpt-4", messages=messages, api_key=api_key,
+    fallbacks=[{"api_key": "good-key-1"}, {"api_key": "good-key-2", "api_base": "good-api-base-2"}])
 ```
 
 ### Output from calls
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 9773545ee..f1f66b5eb 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/main.py b/litellm/main.py
index 20e554c76..761080b35 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -672,22 +672,29 @@ def completion(
                 litellm.openai_key or
                 get_secret("DEEPINFRA_API_KEY")
             )
+            openai.api_base = "https://api.deepinfra.com/v1/openai"
             ## LOGGING
             logging.pre_call(
                 input=messages,
                 api_key=api_key,
             )
             ## COMPLETION CALL
-            openai.api_key = api_key # set key for deep infra 
+            openai.api_key = "1LntUh4fmg5z6iEW7UPPRhGdBDNuJx5y"
+            openai.api_base = "https://api.deepinfra.com/v1/openai"
             try:
-                response = openai.ChatCompletion.create(
-                    model=model,
-                    messages=messages,
-                    api_base="https://api.deepinfra.com/v1/openai", # use the deepinfra api base
-                    api_type="openai",
-                    api_version=api_version, # default None
-                    **optional_params,
+                chat_completion = openai.ChatCompletion.create(
+                    model="meta-llama/Llama-2-70b-chat-hf",
+                    messages=[{"role": "user", "content": "Hello world"}]
                 )
+                print(f"chat_completion: {chat_completion}")
+                # response = openai.ChatCompletion.create(
+                #     model=model,
+                #     messages=messages,
+                #     api_base="https://api.deepinfra.com/v1/openai", # use the deepinfra api base
+                #     api_type="openai",
+                #     api_version=api_version, # default None
+                #     **optional_params,
+                # )
             except Exception as e:
                 ## LOGGING - log the original exception returned
                 logging.post_call(
diff --git a/litellm/tests/test_bad_params.py b/litellm/tests/test_bad_params.py
index 4296d997b..576cc18f9 100644
--- a/litellm/tests/test_bad_params.py
+++ b/litellm/tests/test_bad_params.py
@@ -12,7 +12,7 @@ import litellm
 from litellm import embedding, completion
 
 
-litellm.set_verbose = True
+# litellm.set_verbose = True
 user_message = "Hello, how are you?"
 messages = [{"content": user_message, "role": "user"}]
 model_val = None
@@ -41,7 +41,7 @@ def test_completion_with_no_provider():
         print(f"error occurred: {e}")
         pass
 
-test_completion_with_no_provider()
+# test_completion_with_no_provider()
 # # bad key
 # temp_key = os.environ.get("OPENAI_API_KEY")
 # os.environ["OPENAI_API_KEY"] = "bad-key"
@@ -53,3 +53,10 @@ test_completion_with_no_provider()
 #     print(f"error occurred: {traceback.format_exc()}")
 #     pass
 # os.environ["OPENAI_API_KEY"] = str(temp_key)  # this passes linting#5
+
+def logger_fn(model_details):
+    print(model_details)
+os.environ['OPENAI_API_KEY'] = "1LntUh4fmg5z6iEW7UPPRhGdBDNuJx5y"
+messages = [{"role":"user","content":"Request boss to grant me 1 day leave"}]
+litellm.api_base = "https://api.deepinfra.com/v1/"
+response = litellm.completion(model="meta-llama/Llama-2-70b-chat-hf", messages=messages, custom_llm_provider="openai", logger_fn=logger_fn)
\ No newline at end of file