diff --git a/litellm/proxy/.env.template b/litellm/.env.template
similarity index 100%
rename from litellm/proxy/.env.template
rename to litellm/.env.template
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index d3a54a2d4..a7ff83ae4 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py
index 381e8b8fd..d24548488 100644
--- a/litellm/llms/prompt_templates/factory.py
+++ b/litellm/llms/prompt_templates/factory.py
@@ -217,5 +217,4 @@ def prompt_factory(model: str, messages: list):
         else: 
             return hf_chat_template(original_model_name, messages)
     except:
-        traceback.print_exc()
         return default_pt(messages=messages) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2)
\ No newline at end of file
diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
index dc6778e6d..28f252fe4 100644
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@@ -1,9 +1,55 @@
 import click
 import subprocess
-import os
+import os, appdirs
 from dotenv import load_dotenv
 
 load_dotenv()
+from importlib import resources
+import shutil
+
+config_filename = ".env.litellm"
+
+# Using appdirs to determine user-specific config path
+config_dir = appdirs.user_config_dir("litellm")
+user_config_path = os.path.join(config_dir, config_filename)
+
+def load_config():
+    try: 
+        if not os.path.exists(user_config_path):
+            # If user's config doesn't exist, copy the default config from the package
+            here = os.path.abspath(os.path.dirname(__file__))
+            parent_dir = os.path.dirname(here)
+            default_config_path = os.path.join(parent_dir, '.env.template')
+            # Ensure the user-specific directory exists
+            os.makedirs(config_dir, exist_ok=True)
+            # Copying the file using shutil.copy
+            shutil.copy(default_config_path, user_config_path)
+
+        # As the .env file is typically much simpler in structure, we use load_dotenv here directly
+        load_dotenv(dotenv_path=user_config_path)
+    except:
+        pass
+
+def open_config():
+    # Create the .env file if it doesn't exist
+    if not os.path.exists(user_config_path):
+        # If user's env doesn't exist, copy the default env from the package
+        here = os.path.abspath(os.path.dirname(__file__))
+        parent_dir = os.path.dirname(here)
+        default_env_path = os.path.join(parent_dir, '.env.template')
+        # Ensure the user-specific directory exists
+        os.makedirs(config_dir, exist_ok=True)
+        # Copying the file using shutil.copy
+        try:
+            shutil.copy(default_env_path, user_config_path)
+        except Exception as e:
+            print(f"Failed to copy .env.template: {e}")
+
+    # Open the .env file in the default editor 
+    if os.name == 'nt': # For Windows
+        os.startfile(user_config_path)
+    elif os.name == 'posix': # For MacOS, Linux, and anything using Bash
+        subprocess.call(('open', '-t', user_config_path)) 
 
 @click.command()
 @click.option('--port', default=8000, help='Port to bind the server to.')
@@ -16,22 +62,17 @@ load_dotenv()
 @click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`') 
 @click.option('--config', is_flag=True, help='Create and open .env file from .env.template')
 @click.option('--test', default=None, help='proxy chat completions url to make a test request to')
-def run_server(port, api_base, model, deploy, debug, temperature, max_tokens, telemetry, config, test):
+@click.option('--local', is_flag=True, default=False, help='for local debugging')
+def run_server(port, api_base, model, deploy, debug, temperature, max_tokens, telemetry, config, test, local):
     if config:
-        if os.path.exists('.env.template'):
-            if not os.path.exists('.env'):
-                with open('.env.template', 'r') as source:
-                    data = source.read()
-                    with open('.env', 'w') as destination:
-                        destination.write(data)
-            
-            click.echo('Opening .env file...')
-            subprocess.call(['open', '.env'])  # replace `open` with `start` on Windows
-        else:
-            click.echo('No .env.template file found.')
+        open_config()
     
-    from .proxy_server import app, initialize, deploy_proxy
-    # from proxy_server import app, initialize, deploy_proxy
+    if local:
+        from proxy_server import app, initialize, deploy_proxy
+        debug = True
+    else:
+        from .proxy_server import app, initialize, deploy_proxy
+
     if deploy == True:
         print(f"\033[32mLiteLLM: Deploying your proxy to api.litellm.ai\033[0m\n")
         print(f"\033[32mLiteLLM: Deploying proxy for model: {model}\033[0m\n")
@@ -57,6 +98,7 @@ def run_server(port, api_base, model, deploy, debug, temperature, max_tokens, te
         click.echo(f'LiteLLM: response from proxy {response}')
         return
     else:
+        load_config()
         initialize(model, api_base, debug, temperature, max_tokens, telemetry)
 
 
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 313ca43bd..5f771d489 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -120,9 +120,8 @@ def model_list():
 async def completion(request: Request):
     data = await request.json()
     print_verbose(f"data passed in: {data}")
-    if (user_model is None):
-        raise ValueError("Proxy model needs to be set")
-    data["model"] = user_model
+    if user_model:
+        data["model"] = user_model
     if user_api_base:
         data["api_base"] = user_api_base
     ## check for custom prompt template ## 
@@ -154,9 +153,8 @@ async def completion(request: Request):
 async def chat_completion(request: Request):
     data = await request.json()
     print_verbose(f"data passed in: {data}")
-    if (user_model is None):
-        raise ValueError("Proxy model needs to be set")
-    data["model"] = user_model
+    if user_model:
+        data["model"] = user_model
     # override with user settings
     if user_temperature: 
         data["temperature"] = user_temperature
@@ -186,7 +184,6 @@ async def chat_completion(request: Request):
     )
     response = litellm.completion(**data)
     if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
-        print("reaches stream")
         return StreamingResponse(data_generator(response), media_type='text/event-stream')
     print_verbose(f"response: {response}")
     return response
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index 49bece66a..30d1870de 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -94,6 +94,14 @@ last_fetched_at_keys = None
 def _generate_id(): # private helper function
     return 'chatcmpl-' + str(uuid.uuid4())
 
+def map_finish_reason(finish_reason: str): # openai supports 5 stop sequences - 'stop', 'length', 'function_call', 'content_filter', 'null'
+    # anthropic mapping
+    print(f"receives finish reason: {finish_reason}")
+    if finish_reason == "stop_sequence":
+        return "stop"
+    return finish_reason
+
+
 class Message(OpenAIObject):
     def __init__(self, content="default", role="assistant", logprobs=None, **params):
         super(Message, self).__init__(**params)
@@ -114,7 +122,7 @@ class Choices(OpenAIObject):
     def __init__(self, finish_reason=None, index=0, message=None, **params):
         super(Choices, self).__init__(**params)
         if finish_reason:
-            self.finish_reason = finish_reason
+            self.finish_reason = map_finish_reason(finish_reason)
         else:
             self.finish_reason = "stop"
         self.index = index
@@ -3200,6 +3208,7 @@ class CustomStreamWrapper:
                     model_response.choices[0].delta = Delta(**completion_obj)
                     return model_response
                 elif model_response.choices[0].finish_reason:
+                    model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai
                     return model_response
         except StopIteration:
             raise StopIteration
diff --git a/poetry.lock b/poetry.lock
index dc313bd6f..4943d7f52 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -122,6 +122,17 @@ files = [
 [package.dependencies]
 frozenlist = ">=1.1.0"
 
+[[package]]
+name = "appdirs"
+version = "1.4.4"
+description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
+optional = false
+python-versions = "*"
+files = [
+    {file = "appdirs-1.4.4-py2.py3-none-any.whl", hash = "sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128"},
+    {file = "appdirs-1.4.4.tar.gz", hash = "sha256:7d5d0167b2b1ba821647616af46a749d1c653740dd0d2415100fe26e27afdf41"},
+]
+
 [[package]]
 name = "async-timeout"
 version = "4.0.3"
@@ -1074,4 +1085,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.8"
-content-hash = "0fa234d1342838a6cc444dd996dbe404ca2cd6c872dcf560dbe420a2956aaecd"
+content-hash = "c8cae152cee4eda56560529476234bc5e91171c6207641af797e7bebf720a499"
diff --git a/pyproject.toml b/pyproject.toml
index e857833f6..e3f3c556d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,6 +14,7 @@ tiktoken = ">=0.4.0"
 importlib-metadata = ">=6.8.0"
 tokenizers = "*"
 click = "*"
+appdirs = "^1.4.4"
 
 [tool.poetry.scripts]
 litellm = 'litellm:run_server'