diff --git a/docs/my-website/docs/completion/config.md b/docs/my-website/docs/completion/config.md
deleted file mode 100644
index 44ff6e8a70..0000000000
--- a/docs/my-website/docs/completion/config.md
+++ /dev/null
@@ -1,49 +0,0 @@
-# Model Config
-
-Model-specific changes can make our code complicated, making it harder to debug errors. Use model configs to simplify this. 
-
-### usage
-
-Handling prompt logic. Different models have different context windows. Use `adapt_to_prompt_size` to select the right model for the prompt (in case the current model is too small).
-
-
-```python
-from litellm import completion_with_config 
-import os 
-
-config = {
-    "available_models": ["gpt-3.5-turbo", "claude-instant-1", "gpt-3.5-turbo-16k"],
-    "adapt_to_prompt_size": True, # 👈 key change
-}
-
-# set env var
-os.environ["OPENAI_API_KEY"] = "your-api-key"
-os.environ["ANTHROPIC_API_KEY"] = "your-api-key"
-
-
-sample_text = "how does a court case get to the Supreme Court?" * 1000
-messages = [{"content": sample_text, "role": "user"}]
-response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
-```
-
-[**See Code**](https://github.com/BerriAI/litellm/blob/30724d9e51cdc2c3e0eb063271b4f171bc01b382/litellm/utils.py#L2783)
-
-### Complete Config Structure
-
-```python
-config = {
-    "default_fallback_models": # [Optional] List of model names to try if a call fails
-    "available_models": # [Optional] List of all possible models you could call 
-    "adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models)
-    "model": {
-        "model-name": {
-            "needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged. 
-            "error_handling": {
-                "error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list
-                    "fallback_model": "" # str, name of the model it should try instead, when that error occurs 
-                }
-            }
-        }
-    }
-}
-```
\ No newline at end of file
diff --git a/litellm/__init__.py b/litellm/__init__.py
index f848dd3243..018b8bb148 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -500,7 +500,6 @@ from .utils import (
     validate_environment,
     check_valid_key,
     get_llm_provider,
-    completion_with_config,
     register_model,
     encode,
     decode,
diff --git a/litellm/tests/test_config.py b/litellm/tests/test_config.py
deleted file mode 100644
index 69e37cf87c..0000000000
--- a/litellm/tests/test_config.py
+++ /dev/null
@@ -1,118 +0,0 @@
-import sys, os
-import traceback
-from dotenv import load_dotenv
-
-load_dotenv()
-import os
-
-sys.path.insert(
-    0, os.path.abspath("../..")
-)  # Adds the parent directory to the system path
-import pytest
-import litellm
-from litellm import completion_with_config
-
-config = {
-    "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
-    "model": {
-        "claude-instant-1": {"needs_moderation": True},
-        "gpt-3.5-turbo": {
-            "error_handling": {
-                "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"}
-            }
-        },
-    },
-}
-
-
-def test_config_context_window_exceeded():
-    try:
-        sample_text = "how does a court case get to the Supreme Court?" * 1000
-        messages = [{"content": sample_text, "role": "user"}]
-        response = completion_with_config(
-            model="gpt-3.5-turbo", messages=messages, config=config
-        )
-        print(response)
-    except Exception as e:
-        print(f"Exception: {e}")
-        pytest.fail(f"An exception occurred: {e}")
-
-
-# test_config_context_window_exceeded()
-
-
-def test_config_context_moderation():
-    try:
-        messages = [{"role": "user", "content": "I want to kill them."}]
-        response = completion_with_config(
-            model="claude-instant-1", messages=messages, config=config
-        )
-        print(response)
-    except Exception as e:
-        print(f"Exception: {e}")
-        pytest.fail(f"An exception occurred: {e}")
-
-
-# test_config_context_moderation()
-
-
-def test_config_context_default_fallback():
-    try:
-        messages = [{"role": "user", "content": "Hey, how's it going?"}]
-        response = completion_with_config(
-            model="claude-instant-1",
-            messages=messages,
-            config=config,
-            api_key="bad-key",
-        )
-        print(response)
-    except Exception as e:
-        print(f"Exception: {e}")
-        pytest.fail(f"An exception occurred: {e}")
-
-
-# test_config_context_default_fallback()
-
-
-config = {
-    "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
-    "available_models": [
-        "gpt-3.5-turbo",
-        "gpt-3.5-turbo-0301",
-        "gpt-3.5-turbo-0613",
-        "gpt-4",
-        "gpt-4-0314",
-        "gpt-4-0613",
-        "j2-ultra",
-        "command-nightly",
-        "togethercomputer/llama-2-70b-chat",
-        "chat-bison",
-        "chat-bison@001",
-        "claude-2",
-    ],
-    "adapt_to_prompt_size": True,  # type: ignore
-    "model": {
-        "claude-instant-1": {"needs_moderation": True},
-        "gpt-3.5-turbo": {
-            "error_handling": {
-                "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"}
-            }
-        },
-    },
-}
-
-
-def test_config_context_adapt_to_prompt():
-    try:
-        sample_text = "how does a court case get to the Supreme Court?" * 1000
-        messages = [{"content": sample_text, "role": "user"}]
-        response = completion_with_config(
-            model="gpt-3.5-turbo", messages=messages, config=config
-        )
-        print(response)
-    except Exception as e:
-        print(f"Exception: {e}")
-        pytest.fail(f"An exception occurred: {e}")
-
-
-test_config_context_adapt_to_prompt()
diff --git a/litellm/utils.py b/litellm/utils.py
index 4520bee621..47c5695bb8 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -7843,133 +7843,6 @@ def read_config_args(config_path) -> dict:
 ########## experimental completion variants ############################
 
 
-def completion_with_config(config: Union[dict, str], **kwargs):
-    """
-    Generate a litellm.completion() using a config dict and all supported completion args
-
-    Example config;
-    config = {
-        "default_fallback_models": # [Optional] List of model names to try if a call fails
-        "available_models": # [Optional] List of all possible models you could call
-        "adapt_to_prompt_size": # [Optional] True/False - if you want to select model based on prompt size (will pick from available_models)
-        "model": {
-            "model-name": {
-                "needs_moderation": # [Optional] True/False - if you want to call openai moderations endpoint before making completion call. Will raise exception, if flagged.
-                "error_handling": {
-                    "error-type": { # One of the errors listed here - https://docs.litellm.ai/docs/exception_mapping#custom-mapping-list
-                        "fallback_model": "" # str, name of the model it should try instead, when that error occurs
-                    }
-                }
-            }
-        }
-    }
-
-    Parameters:
-        config (Union[dict, str]): A configuration for litellm
-        **kwargs: Additional keyword arguments for litellm.completion
-
-    Returns:
-        litellm.ModelResponse: A ModelResponse with the generated completion
-
-    """
-    if config is not None:
-        if isinstance(config, str):
-            config = read_config_args(config)
-        elif isinstance(config, dict):
-            config = config
-        else:
-            raise Exception("Config path must be a string or a dictionary.")
-    else:
-        raise Exception("Config path not passed in.")
-
-    if config is None:
-        raise Exception("No completion config in the config file")
-
-    models_with_config = config["model"].keys()
-    model = kwargs["model"]
-    messages = kwargs["messages"]
-
-    ## completion config
-    fallback_models = config.get("default_fallback_models", None)
-    available_models = config.get("available_models", None)
-    adapt_to_prompt_size = config.get("adapt_to_prompt_size", False)
-    trim_messages_flag = config.get("trim_messages", False)
-    prompt_larger_than_model = False
-    max_model = model
-    try:
-        max_tokens = litellm.get_max_tokens(model)["max_tokens"]
-    except:
-        max_tokens = 2048  # assume curr model's max window is 2048 tokens
-    if adapt_to_prompt_size:
-        ## Pick model based on token window
-        prompt_tokens = litellm.token_counter(
-            model="gpt-3.5-turbo",
-            text="".join(message["content"] for message in messages),
-        )
-        try:
-            curr_max_tokens = litellm.get_max_tokens(model)["max_tokens"]
-        except:
-            curr_max_tokens = 2048
-        if curr_max_tokens < prompt_tokens:
-            prompt_larger_than_model = True
-            for available_model in available_models:
-                try:
-                    curr_max_tokens = litellm.get_max_tokens(available_model)[
-                        "max_tokens"
-                    ]
-                    if curr_max_tokens > max_tokens:
-                        max_tokens = curr_max_tokens
-                        max_model = available_model
-                    if curr_max_tokens > prompt_tokens:
-                        model = available_model
-                        prompt_larger_than_model = False
-                except:
-                    continue
-        if prompt_larger_than_model:
-            messages = trim_messages(messages=messages, model=max_model)
-            kwargs["messages"] = messages
-
-    kwargs["model"] = model
-    try:
-        if model in models_with_config:
-            ## Moderation check
-            if config["model"][model].get("needs_moderation"):
-                input = " ".join(message["content"] for message in messages)
-                response = litellm.moderation(input=input)
-                flagged = response["results"][0]["flagged"]
-                if flagged:
-                    raise Exception("This response was flagged as inappropriate")
-
-            ## Model-specific Error Handling
-            error_handling = None
-            if config["model"][model].get("error_handling"):
-                error_handling = config["model"][model]["error_handling"]
-
-            try:
-                response = litellm.completion(**kwargs)
-                return response
-            except Exception as e:
-                exception_name = type(e).__name__
-                fallback_model = None
-                if error_handling and exception_name in error_handling:
-                    error_handler = error_handling[exception_name]
-                    # either switch model or api key
-                    fallback_model = error_handler.get("fallback_model", None)
-                if fallback_model:
-                    kwargs["model"] = fallback_model
-                    return litellm.completion(**kwargs)
-                raise e
-        else:
-            return litellm.completion(**kwargs)
-    except Exception as e:
-        if fallback_models:
-            model = fallback_models.pop(0)
-            return completion_with_fallbacks(
-                model=model, messages=messages, fallbacks=fallback_models
-            )
-        raise e
-
-
 def completion_with_fallbacks(**kwargs):
     nested_kwargs = kwargs.pop("kwargs", {})
     response = None