diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md
index b3a3298dea..cb3722c229 100644
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@@ -64,37 +64,7 @@ print(response)
 
 ### Deploy Router 
 
-1. Clone repo
-```shell
- git clone https://github.com/BerriAI/litellm
-```
-
-2. Create + Modify router_config.yaml (save your azure/openai/etc. deployment info)
-
-```shell
-cp ./router_config_template.yaml ./router_config.yaml
-```
-
-3. Build + Run docker image 
-
-```shell
-docker build -t litellm-proxy . --build-arg CONFIG_FILE=./router_config.yaml 
-```
-
-```shell
-docker run --name litellm-proxy -e PORT=8000 -p 8000:8000 litellm-proxy
-```
-
-### Test 
-
-```curl
-curl 'http://0.0.0.0:8000/router/completions' \
---header 'Content-Type: application/json' \
---data '{
-    "model": "gpt-3.5-turbo",
-    "messages": [{"role": "user", "content": "Hey"}]
-}'
-```
+If you want a server to just route requests to different LLM APIs, use our [OpenAI Proxy Server](./simple_proxy.md)
 
 ## Retry failed requests
 
diff --git a/docs/my-website/docs/simple_proxy.md b/docs/my-website/docs/simple_proxy.md
index 4b3ac7e30d..8040af8741 100644
--- a/docs/my-website/docs/simple_proxy.md
+++ b/docs/my-website/docs/simple_proxy.md
@@ -8,6 +8,9 @@ LiteLLM Server manages:
 
 * Calling 100+ LLMs [Huggingface/Bedrock/TogetherAI/etc.](#other-supported-models) in the OpenAI `ChatCompletions` & `Completions` format
 * Set custom prompt templates + model-specific configs (`temperature`, `max_tokens`, etc.)
+* Routing between [Multiple Models](#multiple-models---quick-start) + [Deployments of the same model](#multiple-instances-of-1-model)
+
+[**See code**](https://github.com/BerriAI/litellm/tree/main/litellm/proxy)
 
 ## Quick Start 
 View all the supported args for the Proxy CLI [here](https://docs.litellm.ai/docs/simple_proxy#proxy-cli-arguments)
@@ -593,8 +596,11 @@ model_list:
       api_key: sk-claude    
 ```
 
-#### Default Model - Config:
+:::info
+
 The proxy uses the first model in the config as the default model - in this config the default model is `zephyr-alpha`
+:::
+
 
 #### Step 2: Start Proxy with config
 
@@ -602,11 +608,7 @@ The proxy uses the first model in the config as the default model - in this conf
 $ litellm --config /path/to/config.yaml
 ```
 
-#### Step 3: Start Proxy with config
-
-If you're repo let's you set model name, you can call the specific model by just passing in that model's name - 
-
-#### Step 4: Use proxy
+#### Step 3: Use proxy
 Curl Command
 ```shell
 curl --location 'http://0.0.0.0:8000/chat/completions' \
@@ -703,6 +705,28 @@ model_list:
         api_base: http://0.0.0.0:8003
 ```
 
+#### Step 2: Start Proxy with config
+
+```shell
+$ litellm --config /path/to/config.yaml
+```
+
+#### Step 3: Use proxy
+Curl Command
+```shell
+curl --location 'http://0.0.0.0:8000/chat/completions' \
+--header 'Content-Type: application/json' \
+--data ' {
+      "model": "zephyr-beta",
+      "messages": [
+        {
+          "role": "user",
+          "content": "what llm are you"
+        }
+      ],
+    }
+'
+```
 
 ### Set Custom Prompt Templates
 
diff --git a/litellm/router.py b/litellm/router.py
index ecb73ece1b..ca5b5511b9 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -153,7 +153,10 @@ class Router:
         for current_attempt in range(self.num_retries):
             try:
                 # if the function call is successful, no exception will be raised and we'll break out of the loop
-                return await original_function(*args, **kwargs)
+                response = await original_function(*args, **kwargs)
+                if isinstance(response, asyncio.coroutines.Coroutine): # async errors are often returned as coroutines 
+                    response = await response
+                return response
 
             except openai.RateLimitError as e:
                 # on RateLimitError we'll wait for an exponential time before trying again
@@ -231,6 +234,8 @@ class Router:
             deployment = self.get_available_deployment(model=model, messages=messages)
             data = deployment["litellm_params"]
             response = await litellm.acompletion(**{**data, "messages": messages, "caching": self.cache_responses, **kwargs})
+            if isinstance(response, asyncio.coroutines.Coroutine): # async errors are often returned as coroutines 
+                response = await response
             return response
         except Exception as e: 
             kwargs["model"] = model
diff --git a/litellm/utils.py b/litellm/utils.py
index 682d6dec3e..4a3f4db9b7 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1057,12 +1057,12 @@ def client(original_function):
             if litellm.client_session is None: 
                 litellm.client_session = httpx.Client(
                     limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
-                    timeout = httpx.Timeout(timeout=600.0, connect=5.0)
+                    timeout = None
                 )
             if litellm.aclient_session is None: 
                 litellm.aclient_session = httpx.AsyncClient(
                     limits=httpx.Limits(max_connections=100, max_keepalive_connections=20),
-                    timeout = httpx.Timeout(timeout=600.0, connect=5.0)
+                    timeout = None
                 )
             if litellm.use_client or ("use_client" in kwargs and kwargs["use_client"] == True): 
                 print_verbose(f"litedebugger initialized")