diff --git a/docs/my-website/docs/completion/config.md b/docs/my-website/docs/completion/config.md
new file mode 100644
index 0000000000..72ccf8e726
--- /dev/null
+++ b/docs/my-website/docs/completion/config.md
@@ -0,0 +1,81 @@
+# Model Config
+
+Model-specific changes can make our code complicated, making it harder to debug errors. Use model configs to simplify this. 
+
+### usage
+
+E.g. If we want to implement: 
+* Moderations check for Anthropic models (to avoid violating their safety policy)
+* Model Fallbacks - specific + general
+
+```python
+from litellm import completion_with_config 
+import os 
+
+config = {
+    "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"],
+    "model": {
+        "claude-instant-1": {
+            "needs_moderation": True
+        },
+        "gpt-3.5-turbo": {
+            "error_handling": {
+                "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"} 
+            }
+        },
+    }
+}
+
+# set env var
+os.environ["OPENAI_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your openai key
+os.environ["ANTHROPIC_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your anthropic key
+
+
+sample_text = "how does a court case get to the Supreme Court?" * 1000
+messages = [{"content": sample_text, "role": "user"}]
+response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
+```
+[**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783)
+### select model based on prompt size 
+
+You can also use model configs to automatically select a model based on the prompt size. It checks the number of tokens in the prompt and max tokens for each model. It selects the model with max tokens > prompt tokens. 
+
+```python
+from litellm import completion_with_config 
+import os 
+
+config = {
+    "available_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"],
+    "adapt_to_prompt_size": True, # 👈 key change
+}
+
+# set env var
+os.environ["OPENAI_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your openai key
+os.environ["ANTHROPIC_API_KEY"] = "sk-litellm-7_NPZhMGxY2GoHC59LgbDw" # [OPTIONAL] replace with your anthropic key
+
+
+sample_text = "how does a court case get to the Supreme Court?" * 1000
+messages = [{"content": sample_text, "role": "user"}]
+response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
+```
+
+### Complete Config Structure
+
+```python
+config = {
+    "function": "completion", 
+    "default_fallback_models": # [Optional] List of model names to try if a call fails
+    "available_models": # [Optional] List of all possible models you could call 
+    "adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models)
+    "model": {
+        "model-name": {
+            "needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged. 
+            "error_handling": {
+                "error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list
+                    "fallback_model": "" # str, name of the model it should try instead, when that error occurs 
+                }
+            }
+        }
+    }
+}
+```
\ No newline at end of file
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 11062fc670..51e1b8e8c2 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -35,6 +35,7 @@ const sidebars = {
         "completion/message_trimming",
         "completion/model_alias", 
         "completion/reliable_completions", 
+        "completion/config",
         "completion/batching",
         "completion/mock_requests",
       ],
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 420ed270f7..f5b05bfc91 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index f601789679..dc14ce03a0 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/tests/test_config.py b/litellm/tests/test_config.py
index 86b796ed95..b2b48cfb3b 100644
--- a/litellm/tests/test_config.py
+++ b/litellm/tests/test_config.py
@@ -13,7 +13,6 @@ import litellm
 from litellm import completion_with_config
 
 config = {
-    "function": "completion",
     "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
     "model": {
         "claude-instant-1": {
@@ -63,7 +62,6 @@ def test_config_context_default_fallback():
 
 
 config = {
-    "function": "completion",
     "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
     "available_models": ["gpt-3.5-turbo", "gpt-3.5-turbo-0301", "gpt-3.5-turbo-0613", "gpt-4", "gpt-4-0314", "gpt-4-0613", 
                          "j2-ultra", "command-nightly", "togethercomputer/llama-2-70b-chat", "chat-bison", "chat-bison@001", "claude-2"],
@@ -90,4 +88,4 @@ def test_config_context_adapt_to_prompt():
         print(f"Exception: {e}")
         pytest.fail(f"An exception occurred: {e}")
 
-test_config_context_adapt_to_prompt() 
\ No newline at end of file
+# test_config_context_adapt_to_prompt() 
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index 325fea5e34..4eff739738 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2780,7 +2780,7 @@ def read_config_args(config_path) -> dict:
 
 ########## experimental completion variants ############################
 
-def completion_with_config(*, config: Union[dict, str], **kwargs):
+def completion_with_config(config: Union[dict, str], **kwargs):
     if config is not None:
         if isinstance(config, str):
             config = read_config_args(config)
@@ -2791,23 +2791,17 @@ def completion_with_config(*, config: Union[dict, str], **kwargs):
     else:
         raise Exception("Config path not passed in.")
     
-    ## load the completion config 
-    completion_config = None
-
-    if config["function"] == "completion":
-        completion_config = config
-
-    if completion_config is None:
+    if config is None:
         raise Exception("No completion config in the config file")
     
-    models_with_config = completion_config["model"].keys() 
+    models_with_config = config["model"].keys() 
     model = kwargs["model"]
     messages = kwargs["messages"]
 
     ## completion config
-    fallback_models = completion_config.get("default_fallback_models", None)
-    available_models = completion_config.get("available_models", None)
-    adapt_to_prompt_size = completion_config.get("adapt_to_prompt_size", False)
+    fallback_models = config.get("default_fallback_models", None)
+    available_models = config.get("available_models", None)
+    adapt_to_prompt_size = config.get("adapt_to_prompt_size", False)
     start_time = time.time()
     if adapt_to_prompt_size:
         ## Pick model based on token window 
@@ -2829,7 +2823,7 @@ def completion_with_config(*, config: Union[dict, str], **kwargs):
     try: 
         if model in models_with_config: 
             ## Moderation check
-            if completion_config["model"][model].get("needs_moderation"):
+            if config["model"][model].get("needs_moderation"):
                 input = " ".join(message["content"] for message in messages)
                 response = litellm.moderation(input=input)
                 flagged = response["results"][0]["flagged"]
@@ -2838,8 +2832,8 @@ def completion_with_config(*, config: Union[dict, str], **kwargs):
             
             ## Model-specific Error Handling
             error_handling = None
-            if completion_config["model"][model].get("error_handling"):
-                error_handling = completion_config["model"][model]["error_handling"]
+            if config["model"][model].get("error_handling"):
+                error_handling = config["model"][model]["error_handling"]
 
             try:
                 response = litellm.completion(**kwargs)
@@ -2968,7 +2962,6 @@ def completion_with_fallbacks(**kwargs):
                     return response
 
             except Exception as e:
-                print(f"got exception {e} for model {model}")
                 rate_limited_models.add(model)
                 model_expiration_times[model] = (
                     time.time() + 60