diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
index 0bee58842..5f5a1a503 100644
Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 9ad726ac5..420ed270f 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 7c768446d..5286ad6f3 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/tests/test_config.py b/litellm/tests/test_config.py
index 0df5928f9..6a2cbc238 100644
--- a/litellm/tests/test_config.py
+++ b/litellm/tests/test_config.py
@@ -14,6 +14,7 @@ from litellm import completion_with_config
 
 config = {
     "function": "completion",
+    "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
     "model": {
         "claude-instant-1": {
             "needs_moderation": True
@@ -26,12 +27,20 @@ config = {
     }
 }
 
-def test_config():
+def test_config_context_window_exceeded():
     try:
         sample_text = "how does a court case get to the Supreme Court?" * 1000
         messages = [{"content": sample_text, "role": "user"}]
         response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
         print(response)
+    except Exception as e:
+        print(f"Exception: {e}")
+        pytest.fail(f"An exception occurred: {e}")
+
+# test_config_context_window_exceeded() 
+
+def test_config_context_moderation():
+    try:
         messages=[{"role": "user", "content": "I want to kill them."}]
         response = completion_with_config(model="claude-instant-1", messages=messages, config=config)
         print(response)
@@ -39,4 +48,15 @@ def test_config():
         print(f"Exception: {e}")
         pytest.fail(f"An exception occurred: {e}")
 
-# test_config() 
\ No newline at end of file
+# test_config_context_moderation() 
+
+def test_config_context_default_fallback():
+    try:
+        messages=[{"role": "user", "content": "Hey, how's it going?"}]
+        response = completion_with_config(model="claude-instant-1", messages=messages, config=config, api_key="bad-key")
+        print(response)
+    except Exception as e:
+        print(f"Exception: {e}")
+        pytest.fail(f"An exception occurred: {e}")
+
+test_config_context_default_fallback() 
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index c754c26ea..51e0ebdb0 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2795,36 +2795,45 @@ def completion_with_config(*args, config: Union[dict, str], **kwargs):
     models_with_config = completion_config["model"].keys() 
     model = args[0] if len(args) > 0 else kwargs["model"]
     messages = args[1] if len(args) > 1 else kwargs["messages"]
-    if model in models_with_config: 
-        ## Moderation check
-        if completion_config["model"][model].get("needs_moderation"):
-            input = " ".join(message["content"] for message in messages)
-            response = litellm.moderation(input=input)
-            flagged = response["results"][0]["flagged"]
-            if flagged: 
-                raise Exception("This response was flagged as inappropriate")
-        
-        ## Load Error Handling Logic
-        error_handling = None
-        if completion_config["model"][model].get("error_handling"):
-            error_handling = completion_config["model"][model]["error_handling"]
 
-        try:
-            response = litellm.completion(*args, **kwargs)
-            return response
-        except Exception as e:
-            exception_name = type(e).__name__
-            fallback_model = None
-            if error_handling and exception_name in error_handling: 
-                error_handler = error_handling[exception_name]
-                # either switch model or api key 
-                fallback_model = error_handler.get("fallback_model", None)
-            if fallback_model: 
-                kwargs["model"] = fallback_model
-                return litellm.completion(*args, **kwargs)
-            raise e
-    else: 
-        return litellm.completion(*args, **kwargs)
+    ## Default fallback models 
+    fallback_models = completion_config.get("default_fallback_models")
+    try: 
+        if model in models_with_config: 
+            ## Moderation check
+            if completion_config["model"][model].get("needs_moderation"):
+                input = " ".join(message["content"] for message in messages)
+                response = litellm.moderation(input=input)
+                flagged = response["results"][0]["flagged"]
+                if flagged: 
+                    raise Exception("This response was flagged as inappropriate")
+            
+            ## Model-specific Error Handling
+            error_handling = None
+            if completion_config["model"][model].get("error_handling"):
+                error_handling = completion_config["model"][model]["error_handling"]
+
+            try:
+                response = litellm.completion(*args, **kwargs)
+                return response
+            except Exception as e:
+                exception_name = type(e).__name__
+                fallback_model = None
+                if error_handling and exception_name in error_handling: 
+                    error_handler = error_handling[exception_name]
+                    # either switch model or api key 
+                    fallback_model = error_handler.get("fallback_model", None)
+                if fallback_model: 
+                    kwargs["model"] = fallback_model
+                    return litellm.completion(*args, **kwargs)
+                raise e
+        else: 
+            return litellm.completion(*args, **kwargs)
+    except Exception as e:
+        if fallback_models:
+            model = fallback_models.pop(0)
+            return completion_with_fallbacks(model=model, messages=messages, fallbacks=fallback_models)
+        raise e
 
 
 
@@ -2924,8 +2933,7 @@ def completion_with_fallbacks(**kwargs):
                 # delete model from kwargs if it exists
                 if kwargs.get("model"):
                     del kwargs["model"]
-
-                print("making completion call", model)
+                    
                 response = litellm.completion(**kwargs, model=model)
 
                 if response != None: