diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
index d18aa4f225..871c98cbb7 100644
Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 396f89a3bd..4ae00fb4ec 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index e0f896c0d1..368e334d69 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 7007e923a9..61f5ef3751 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -15,7 +15,6 @@ from litellm import embedding, completion, text_completion, completion_cost
 user_message = "Write a short poem about the sky"
 messages = [{"content": user_message, "role": "user"}]
 
-print(f"os path: {os.path.isfile('litellm_uuid.txt')}")
 def logger_fn(user_model_dict):
     print(f"user_model_dict: {user_model_dict}")
 
@@ -774,6 +773,20 @@ def test_completion_with_fallbacks():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
+
+def test_completion_with_fallbacks_multiple_keys():
+    backup_keys = [{"api_key": os.getenv("BACKUP_OPENAI_API_KEY_1")}, {"api_key": os.getenv("BACKUP_OPENAI_API_KEY_2")}]
+    try:
+        api_key = "bad-key"
+        response = completion(
+            model="gpt-3.5-turbo", messages=messages, force_timeout=120, fallbacks=backup_keys, api_key=api_key
+        )
+        # Add any assertions here to check the response
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+# test_completion_with_fallbacks_multiple_keys() 
 # def test_petals():
 #     try:
 #         response = completion(model="petals-team/StableBeluga2", messages=messages)
diff --git a/litellm/utils.py b/litellm/utils.py
index 7f21e94504..5016eb4db0 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -3288,6 +3288,7 @@ def completion_with_fallbacks(**kwargs):
     rate_limited_models = set()
     model_expiration_times = {}
     start_time = time.time()
+    original_model = kwargs["model"]
     fallbacks = [kwargs["model"]] + kwargs["fallbacks"]
     del kwargs["fallbacks"]  # remove fallbacks so it's not recursive
 
@@ -3295,7 +3296,13 @@ def completion_with_fallbacks(**kwargs):
         for model in fallbacks:
             # loop thru all models
             try:
-                if (
+                # check if it's dict or new model string 
+                if isinstance(model, dict): # completion(model="gpt-4", fallbacks=[{"api_key": "", "api_base": ""}, {"api_key": "", "api_base": ""}])
+                    kwargs["api_key"] = model.get("api_key", None)
+                    kwargs["api_base"] = model.get("api_base", None)
+                    model = original_model
+                    print(f"switched api keys")
+                elif (
                     model in rate_limited_models
                 ):  # check if model is currently cooling down
                     if (
@@ -3318,6 +3325,7 @@ def completion_with_fallbacks(**kwargs):
                     return response
 
             except Exception as e:
+                print(e)
                 rate_limited_models.add(model)
                 model_expiration_times[model] = (
                     time.time() + 60
diff --git a/pyproject.toml b/pyproject.toml
index 7de1bddf9b..b10ad4bf5e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.754"
+version = "0.1.755"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"