diff --git a/dist/litellm-0.9.2.dev1-py3-none-any.whl b/dist/litellm-0.9.2.dev1-py3-none-any.whl
new file mode 100644
index 000000000..821f8eaf4
Binary files /dev/null and b/dist/litellm-0.9.2.dev1-py3-none-any.whl differ
diff --git a/dist/litellm-0.9.2.dev1.tar.gz b/dist/litellm-0.9.2.dev1.tar.gz
new file mode 100644
index 000000000..9a029f0d3
Binary files /dev/null and b/dist/litellm-0.9.2.dev1.tar.gz differ
diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
index 3d19e0b04..f2b29bf5f 100644
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@@ -86,6 +86,7 @@ def is_port_in_use(port):
 @click.option('--host', default='0.0.0.0', help='Host for the server to listen on.')
 @click.option('--port', default=8000, help='Port to bind the server to.')
 @click.option('--api_base', default=None, help='API base URL.')
+@click.option('--api_version', default="2023-07-01-preview", help='For azure - pass in the api version.')
 @click.option('--model', '-m', default=None, help='The model name to pass to litellm expects') 
 @click.option('--alias', default=None, help='The alias for the model - use this to give a litellm model name (e.g. "huggingface/codellama/CodeLlama-7b-Instruct-hf") a more user-friendly name ("codellama")') 
 @click.option('--add_key', default=None, help='The model name to pass to litellm expects') 
@@ -105,7 +106,7 @@ def is_port_in_use(port):
 @click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to')
 @click.option('--local', is_flag=True, default=False, help='for local debugging')
 @click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
-def run_server(host, port, api_base, model, alias, add_key, headers, save, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, cost):
+def run_server(host, port, api_base, api_version, model, alias, add_key, headers, save, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, cost):
     global feature_telemetry
     args = locals()
     if local:
@@ -197,7 +198,7 @@ def run_server(host, port, api_base, model, alias, add_key, headers, save, debug
     else:
         if headers:
             headers = json.loads(headers)
-        initialize(model=model, alias=alias, api_base=api_base, debug=debug, temperature=temperature, max_tokens=max_tokens, max_budget=max_budget, telemetry=telemetry, drop_params=drop_params, add_function_to_prompt=add_function_to_prompt, headers=headers, save=save)
+        initialize(model=model, alias=alias, api_base=api_base, api_version=api_version, debug=debug, temperature=temperature, max_tokens=max_tokens, max_budget=max_budget, telemetry=telemetry, drop_params=drop_params, add_function_to_prompt=add_function_to_prompt, headers=headers, save=save)
         try:
             import uvicorn
         except:
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 336bfb845..ca8b19f59 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -248,7 +248,7 @@ def load_config():
         pass
 
 
-def initialize(model, alias, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params,
+def initialize(model, alias, api_base, api_version, debug, temperature, max_tokens, max_budget, telemetry, drop_params,
                add_function_to_prompt, headers, save):
     global user_model, user_api_base, user_debug, user_max_tokens, user_temperature, user_telemetry, user_headers
     user_model = model
@@ -261,6 +261,8 @@ def initialize(model, alias, api_base, debug, temperature, max_tokens, max_budge
     if api_base:  # model-specific param
         user_api_base = api_base
         dynamic_config[user_model]["api_base"] = api_base
+    if api_version: 
+        os.environ["AZURE_API_VERSION"] = api_version # set this for azure - litellm can read this from the env
     if max_tokens:  # model-specific param
         user_max_tokens = max_tokens
         dynamic_config[user_model]["max_tokens"] = max_tokens
diff --git a/litellm/utils.py b/litellm/utils.py
index a077dbc7e..e1dea8867 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2509,7 +2509,7 @@ def exception_type(
         if isinstance(original_exception, OriginalError):
             # Handle the OpenAIError
             exception_mapping_worked = True
-            if model in litellm.openrouter_models:
+            if custom_llm_provider == "openrouter":
                 if original_exception.http_status == 413:
                     raise InvalidRequestError(
                         message=str(original_exception),
@@ -3169,7 +3169,14 @@ def exception_type(
                             model=model
                         )
         exception_mapping_worked = True
-        raise APIError(status_code=500, message=str(original_exception), llm_provider=custom_llm_provider, model=model)
+        if "InvalidRequestError.__init__() missing 1 required positional argument: 'param'" in str(original_exception): # deal with edge-case invalid request error bug in openai-python sdk
+            raise InvalidRequestError(
+                message=f"OpenAIException: This can happen due to missing AZURE_API_VERSION: {str(original_exception)}",
+                model=model, 
+                llm_provider=custom_llm_provider
+            )
+        else:
+            raise APIError(status_code=500, message=str(original_exception), llm_provider=custom_llm_provider, model=model)
     except Exception as e:
         # LOGGING
         exception_logging(