diff --git a/docs/my-website/docs/providers/custom_llm_server.md b/docs/my-website/docs/providers/custom_llm_server.md
new file mode 100644
index 000000000..f8d5fb551
--- /dev/null
+++ b/docs/my-website/docs/providers/custom_llm_server.md
@@ -0,0 +1,73 @@
+# Custom API Server (Custom Format)
+
+LiteLLM allows you to call your custom endpoint in the OpenAI ChatCompletion format
+
+
+:::info
+
+For calling an openai-compatible endpoint, [go here](./openai_compatible.md)
+:::
+
+## Quick Start 
+
+```python
+import litellm
+from litellm import CustomLLM, completion, get_llm_provider
+
+
+class MyCustomLLM(CustomLLM):
+    def completion(self, *args, **kwargs) -> litellm.ModelResponse:
+        return litellm.completion(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "Hello world"}],
+            mock_response="Hi!",
+        )  # type: ignore
+
+litellm.custom_provider_map = [ # 👈 KEY STEP - REGISTER HANDLER
+        {"provider": "my-custom-llm", "custom_handler": my_custom_llm}
+    ]
+
+resp = completion(
+        model="my-custom-llm/my-fake-model",
+        messages=[{"role": "user", "content": "Hello world!"}],
+    )
+
+assert resp.choices[0].message.content == "Hi!"
+```
+
+
+## Custom Handler Spec
+
+```python
+from litellm.types.utils import GenericStreamingChunk, ModelResponse
+from typing import Iterator, AsyncIterator
+from litellm.llms.base import BaseLLM
+
+class CustomLLMError(Exception):  # use this for all your exceptions
+    def __init__(
+        self,
+        status_code,
+        message,
+    ):
+        self.status_code = status_code
+        self.message = message
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+
+class CustomLLM(BaseLLM):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def completion(self, *args, **kwargs) -> ModelResponse:
+        raise CustomLLMError(status_code=500, message="Not implemented yet!")
+
+    def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]:
+        raise CustomLLMError(status_code=500, message="Not implemented yet!")
+
+    async def acompletion(self, *args, **kwargs) -> ModelResponse:
+        raise CustomLLMError(status_code=500, message="Not implemented yet!")
+
+    async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]:
+        raise CustomLLMError(status_code=500, message="Not implemented yet!")
+```
\ No newline at end of file
diff --git a/docs/my-website/docs/providers/custom_openai_proxy.md b/docs/my-website/docs/providers/custom_openai_proxy.md
deleted file mode 100644
index b6f2eccac..000000000
--- a/docs/my-website/docs/providers/custom_openai_proxy.md
+++ /dev/null
@@ -1,129 +0,0 @@
-# Custom API Server (OpenAI Format)
-
-LiteLLM allows you to call your custom endpoint in the OpenAI ChatCompletion format
-
-## API KEYS
-No api keys required
-
-## Set up your Custom API Server
-Your server should have the following Endpoints:
-
-Here's an example OpenAI proxy server with routes: https://replit.com/@BerriAI/openai-proxy#main.py
-
-### Required Endpoints
-- POST `/chat/completions` - chat completions endpoint 
-
-### Optional Endpoints
-- POST `/completions` - completions endpoint 
-- Get `/models` - available models on server
-- POST `/embeddings` - creates an embedding vector representing the input text.
-
-
-## Example Usage
-
-### Call `/chat/completions`
-In order to use your custom OpenAI Chat Completion proxy with LiteLLM, ensure you set
-
-* `api_base` to your proxy url, example "https://openai-proxy.berriai.repl.co"
-* `custom_llm_provider` to `openai` this ensures litellm uses the `openai.ChatCompletion` to your api_base
-
-```python
-import os
-from litellm import completion
-
-## set ENV variables
-os.environ["OPENAI_API_KEY"] = "anything" #key is not used for proxy
-
-messages = [{ "content": "Hello, how are you?","role": "user"}]
-
-response = completion(
-    model="command-nightly", 
-    messages=[{ "content": "Hello, how are you?","role": "user"}],
-    api_base="https://openai-proxy.berriai.repl.co",
-    custom_llm_provider="openai" # litellm will use the openai.ChatCompletion to make the request
-
-)
-print(response)
-```
-
-#### Response
-```json
-{
-    "object":
-    "chat.completion",
-    "choices": [{
-      "finish_reason": "stop",
-      "index": 0,
-      "message": {
-        "content":
-        "The sky, a canvas of blue,\nA work of art, pure and true,\nA",
-        "role": "assistant"
-      }
-    }],
-    "id":
-    "chatcmpl-7fbd6077-de10-4cb4-a8a4-3ef11a98b7c8",
-    "created":
-    1699290237.408061,
-    "model":
-    "togethercomputer/llama-2-70b-chat",
-    "usage": {
-      "completion_tokens": 18,
-      "prompt_tokens": 14,
-      "total_tokens": 32
-    }
-  }
-```
-
-
-### Call `/completions`
-In order to use your custom OpenAI Completion proxy with LiteLLM, ensure you set
-
-* `api_base` to your proxy url, example "https://openai-proxy.berriai.repl.co"
-* `custom_llm_provider` to `text-completion-openai` this ensures litellm uses the `openai.Completion` to your api_base
-
-```python
-import os
-from litellm import completion
-
-## set ENV variables
-os.environ["OPENAI_API_KEY"] = "anything" #key is not used for proxy
-
-messages = [{ "content": "Hello, how are you?","role": "user"}]
-
-response = completion(
-    model="command-nightly", 
-    messages=[{ "content": "Hello, how are you?","role": "user"}],
-    api_base="https://openai-proxy.berriai.repl.co",
-    custom_llm_provider="text-completion-openai" # litellm will use the openai.Completion to make the request
-
-)
-print(response)
-```
-
-#### Response 
-```json
-{
-    "warning":
-    "This model version is deprecated. Migrate before January 4, 2024 to avoid disruption of service. Learn more https://platform.openai.com/docs/deprecations",
-    "id":
-    "cmpl-8HxHqF5dymQdALmLplS0dWKZVFe3r",
-    "object":
-    "text_completion",
-    "created":
-    1699290166,
-    "model":
-    "text-davinci-003",
-    "choices": [{
-      "text":
-      "\n\nThe weather in San Francisco varies depending on what time of year and time",
-      "index": 0,
-      "logprobs": None,
-      "finish_reason": "length"
-    }],
-    "usage": {
-      "prompt_tokens": 7,
-      "completion_tokens": 16,
-      "total_tokens": 23
-    }
-  }
-```
\ No newline at end of file
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index d228e09d2..c1ce83068 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -175,7 +175,8 @@ const sidebars = {
         "providers/aleph_alpha", 
         "providers/baseten", 
         "providers/openrouter", 
-        "providers/custom_openai_proxy",
+        // "providers/custom_openai_proxy",
+        "providers/custom_llm_server",
         "providers/petals",
         
       ],