diff --git a/.gitignore b/.gitignore
index e3e1bee69..1278b7867 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,3 +14,4 @@ litellm/proxy/api_log.json
 .idea/
 router_config.yaml
 litellm_server/config.yaml
+litellm/proxy/_secret_config.yaml
diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md
index 20dfdcdb8..2f19b4bdb 100644
--- a/docs/my-website/docs/routing.md
+++ b/docs/my-website/docs/routing.md
@@ -1,8 +1,6 @@
 import Image from '@theme/IdealImage';
 
-# Reliability - Fallbacks, Azure Deployments, etc.
-
-## Manage Multiple Deployments
+# Manage Multiple Deployments
 
 Use this if you're trying to load-balance across multiple deployments (e.g. Azure/OpenAI). 
 
diff --git a/litellm/exceptions.py b/litellm/exceptions.py
index 941d79bd2..999d9baa0 100644
--- a/litellm/exceptions.py
+++ b/litellm/exceptions.py
@@ -110,12 +110,13 @@ class APIError(APIError): # type: ignore
 
 # raised if an invalid request (not get, delete, put, post) is made
 class APIConnectionError(APIConnectionError):  # type: ignore 
-    def __init__(self, message, llm_provider, model):
+    def __init__(self, message, llm_provider, model, request: httpx.Request):
         self.message = message
         self.llm_provider = llm_provider
         self.model = model
         super().__init__(
-            self.message
+            message=self.message,
+            request=request
         )
 
 class OpenAIError(OpenAIError):  # type: ignore
diff --git a/litellm/llms/azure.py b/litellm/llms/azure.py
index 9ec140a50..b95f05a13 100644
--- a/litellm/llms/azure.py
+++ b/litellm/llms/azure.py
@@ -195,7 +195,7 @@ class AzureChatCompletion(BaseLLM):
                     method="POST"
                 ) as response: 
                     if response.status_code != 200:
-                        raise AzureOpenAIError(status_code=response.status_code, message=response.text)
+                        raise AzureOpenAIError(status_code=response.status_code, message="An error occurred while streaming")
                     
                     completion_stream = response.iter_lines()
                     streamwrapper = CustomStreamWrapper(completion_stream=completion_stream, model=model, custom_llm_provider="azure",logging_obj=logging_obj)
diff --git a/litellm/llms/vllm.py b/litellm/llms/vllm.py
index 47144bf2f..ce391d4b5 100644
--- a/litellm/llms/vllm.py
+++ b/litellm/llms/vllm.py
@@ -2,7 +2,7 @@ import os
 import json
 from enum import Enum
 import requests
-import time
+import time, httpx
 from typing import Callable, Any
 from litellm.utils import ModelResponse, Usage
 from .prompt_templates.factory import prompt_factory, custom_prompt
@@ -11,6 +11,8 @@ class VLLMError(Exception):
     def __init__(self, status_code, message):
         self.status_code = status_code
         self.message = message
+        self.request = httpx.Request(method="POST", url="http://0.0.0.0:8000")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
         super().__init__(
             self.message
         )  # Call the base class constructor with the parameters it needs
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index bc402e0ac..1519769d4 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -453,25 +453,18 @@ def litellm_completion(*args, **kwargs):
         kwargs["max_tokens"] = user_max_tokens
     if user_api_base: 
         kwargs["api_base"] = user_api_base
-    ## CHECK CONFIG ## 
-    if llm_model_list != None:
-        llm_models = [m["model_name"] for m in llm_model_list]
-        if kwargs["model"] in llm_models:
-            for m in llm_model_list: 
-                if kwargs["model"] == m["model_name"]: # if user has specified a config, this will use the config
-                    for key, value in m["litellm_params"].items(): 
-                        kwargs[key] = value
-                    break
-        else:
-            print_verbose("user sent model not in config, using default config model")
-            default_model = llm_model_list[0]
-            litellm_params = default_model.get('litellm_params', None)
-            for key, value in litellm_params.items():
-                kwargs[key] = value
-    if call_type == "chat_completion":
-        response = litellm.completion(*args, **kwargs)
-    elif call_type == "text_completion":
-        response = litellm.text_completion(*args, **kwargs)
+    ## ROUTE TO CORRECT ENDPOINT ## 
+    router_model_names = [m["model_name"] for m in llm_model_list]
+    if llm_router is not None and kwargs["model"] in router_model_names: # model in router model list 
+        if call_type == "chat_completion":
+            response = llm_router.completion(*args, **kwargs)
+        elif call_type == "text_completion":
+            response = llm_router.text_completion(*args, **kwargs)
+    else: 
+        if call_type == "chat_completion":
+            response = litellm.completion(*args, **kwargs)
+        elif call_type == "text_completion":
+            response = litellm.text_completion(*args, **kwargs)
     if 'stream' in kwargs and kwargs['stream'] == True: # use generate_responses to stream responses
         return StreamingResponse(data_generator(response), media_type='text/event-stream')
     return response
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index a7af44534..ec5c3ef5b 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -579,36 +579,34 @@ def test_completion_openai_with_more_optional_params():
         pytest.fail(f"Error occurred: {e}")
 
 # test_completion_openai_with_more_optional_params()
-# def test_completion_openai_azure_with_functions():
-#     function1 = [
-#         {
-#             "name": "get_current_weather",
-#             "description": "Get the current weather in a given location",
-#             "parameters": {
-#                 "type": "object",
-#                 "properties": {
-#                     "location": {
-#                         "type": "string",
-#                         "description": "The city and state, e.g. San Francisco, CA",
-#                     },
-#                     "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
-#                 },
-#                 "required": ["location"],
-#             },
-#         }
-#     ]
-#     try:
-#         response = completion(
-#             model="azure/chatgpt-functioncalling", messages=messages, stream=True
-#         )
-#         # Add any assertions here to check the response
-#         print(response)
-#         for chunk in response:
-#             print(chunk)
-#             print(chunk["choices"][0]["finish_reason"])
-#     except Exception as e:
-#         pytest.fail(f"Error occurred: {e}")
-# test_completion_openai_azure_with_functions()
+def test_completion_openai_azure_with_functions():
+    function1 = [
+        {
+            "name": "get_current_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and state, e.g. San Francisco, CA",
+                    },
+                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+                },
+                "required": ["location"],
+            },
+        }
+    ]
+    try:
+        messages = [{"role": "user", "content": "What is the weather like in Boston?"}]
+        response = completion(
+            model="azure/chatgpt-functioncalling", messages=messages, functions=function1
+        )
+        # Add any assertions here to check the response
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+test_completion_openai_azure_with_functions()
 
 
 def test_completion_azure():
diff --git a/litellm/utils.py b/litellm/utils.py
index 4b75cc8c1..a9cc6e465 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2896,7 +2896,7 @@ def convert_to_model_response_object(response_object: Optional[dict]=None, model
                 raise Exception("Error in response object format")
             choice_list=[]
             for idx, choice in enumerate(response_object["choices"]): 
-                message = Message(content=choice["message"]["content"], role=choice["message"]["role"], function_call=choice["message"].get("function_call", None))
+                message = Message(content=choice["message"].get("content", None), role=choice["message"]["role"], function_call=choice["message"].get("function_call", None))
                 finish_reason = choice.get("finish_reason", None)
                 if finish_reason == None:
                     # gpt-4 vision can return 'finish_reason' or 'finish_details'
@@ -4018,7 +4018,8 @@ def exception_type(
                         raise APIConnectionError(
                             message=f"VLLMException - {original_exception.message}",
                             llm_provider="vllm",
-                            model=model
+                            model=model,
+                            request=original_exception.request
                         )
             elif custom_llm_provider == "azure": 
                 if "This model's maximum context length is" in error_str:
@@ -4093,7 +4094,8 @@ def exception_type(
             raise APIConnectionError(
                 message=f"{str(original_exception)}",
                 llm_provider=custom_llm_provider,
-                model=model
+                model=model,
+                request=original_exception.request
             )
     except Exception as e:
         # LOGGING