diff --git a/litellm/__init__.py b/litellm/__init__.py
index 6b14214c6..699c2ef91 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -125,6 +125,12 @@ for key, value in model_cost.items():
     elif value.get('litellm_provider') == 'bedrock': 
         bedrock_models.append(key)
 
+# known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary
+openai_compatible_endpoints: List = [
+    "api.perplexity.ai"
+]
+
+
 # well supported replicate llms
 replicate_models: List = [
     # llama replicate supported LLMs
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 5d22e7d42..9aa83e657 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 91ca15d40..6cb4ebcbe 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/main.py b/litellm/main.py
index c6d0583a4..c5189ea46 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -253,7 +253,7 @@ def completion(
         if deployment_id != None: # azure llms 
                 model=deployment_id
                 custom_llm_provider="azure"
-        model, custom_llm_provider = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider)
+        model, custom_llm_provider = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base)
         model_api_key = get_api_key(llm_provider=custom_llm_provider, dynamic_api_key=api_key) # get the api key from the environment if required for the model
         if model_api_key and "sk-litellm" in model_api_key:
             api_base = "https://proxy.litellm.ai"
diff --git a/litellm/proxy/cost.log b/litellm/proxy/cost.log
index d45f60ee2..88bacc3a7 100644
--- a/litellm/proxy/cost.log
+++ b/litellm/proxy/cost.log
@@ -2629,3 +2629,5 @@
 2023-10-10 12:59:03 - Model claude-2 Cost: $0.00299782
 2023-10-10 12:59:48 - Model claude-2 Cost: $0.00338998
 2023-10-10 13:00:45 - Model claude-2 Cost: $0.00286710
+2023-10-10 19:51:53 - Model claude-2 Cost: $0.01657712
+2023-10-10 20:29:51 - Model claude-2 Cost: $0.00041534
diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
index a0a0209ee..fe188b385 100644
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@@ -38,8 +38,8 @@ def generate_feedback_box():
 generate_feedback_box()
 
 
-config_filename = ".env.litellm"
-
+config_filename = "litellm.secrets.toml"
+pkg_config_filename = "template.secrets.toml"
 # Using appdirs to determine user-specific config path
 config_dir = appdirs.user_config_dir("litellm")
 user_config_path = os.path.join(config_dir, config_filename)
@@ -50,37 +50,20 @@ def run_ollama_serve():
     with open(os.devnull, 'w') as devnull:
         process = subprocess.Popen(command, stdout=devnull, stderr=devnull)
 
-def load_config():
-    try: 
-        if not os.path.exists(user_config_path):
-            # If user's config doesn't exist, copy the default config from the package
-            here = os.path.abspath(os.path.dirname(__file__))
-            parent_dir = os.path.dirname(here)
-            default_config_path = os.path.join(parent_dir, '.env.template')
-            # Ensure the user-specific directory exists
-            os.makedirs(config_dir, exist_ok=True)
-            # Copying the file using shutil.copy
-            shutil.copy(default_config_path, user_config_path)
-        # As the .env file is typically much simpler in structure, we use load_dotenv here directly
-        load_dotenv(dotenv_path=user_config_path)
-    except Exception as e:
-        traceback.print_exc()
-        pass
-
 def open_config():
     # Create the .env file if it doesn't exist
     if not os.path.exists(user_config_path):
         # If user's env doesn't exist, copy the default env from the package
         here = os.path.abspath(os.path.dirname(__file__))
         parent_dir = os.path.dirname(here)
-        default_env_path = os.path.join(parent_dir, '.env.template')
+        default_env_path = os.path.join(parent_dir, pkg_config_filename)
         # Ensure the user-specific directory exists
         os.makedirs(config_dir, exist_ok=True)
         # Copying the file using shutil.copy
         try:
             shutil.copy(default_env_path, user_config_path)
         except Exception as e:
-            print(f"Failed to copy .env.template: {e}")
+            print(f"Failed to copy .template.secrets.toml: {e}")
 
     # Open the .env file in the default editor 
     if os.name == 'nt': # For Windows
@@ -99,7 +82,6 @@ def open_config():
 @click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model') 
 @click.option('--drop_params', is_flag=True, help='Drop any unmapped params') 
 @click.option('--add_function_to_prompt', is_flag=True, help='If function passed but unsupported, pass it as prompt') 
-@click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model') 
 @click.option('--max_budget', default=None, type=float, help='Set max budget for API calls - works for hosted models like OpenAI, TogetherAI, Anthropic, etc.`') 
 @click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`') 
 @click.option('--config', is_flag=True, help='Create and open .env file from .env.template')
@@ -109,7 +91,7 @@ def open_config():
 def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, add_function_to_prompt, max_budget, telemetry, config, test, local, cost):
     if config:
         open_config()
-    
+        return
     if local:
         from proxy_server import app, initialize, deploy_proxy, print_cost_logs
         debug = True
@@ -162,10 +144,8 @@ def run_server(host, port, api_base, model, deploy, debug, temperature, max_toke
             click.echo(f'LiteLLM: streaming response from proxy {chunk}')
         return
     else:
-        load_config()
         initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt)
 
-
         try:
             import uvicorn
         except:
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index cdda255be..f79df7e8b 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1,4 +1,6 @@
-import sys, os, platform
+import sys, os, platform, appdirs
+import tomllib
+import shutil, random, traceback
 sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
@@ -35,6 +37,12 @@ user_debug = False
 user_max_tokens = None
 user_temperature = None
 user_telemetry = False
+user_config = None
+config_filename = "litellm.secrets.toml"
+pkg_config_filename = "template.secrets.toml"
+# Using appdirs to determine user-specific config path
+config_dir = appdirs.user_config_dir("litellm")
+user_config_path = os.path.join(config_dir, config_filename)
 
 #### HELPER FUNCTIONS ####
 def print_verbose(print_statement):
@@ -49,11 +57,95 @@ def usage_telemetry(): # helps us know if people are using this feature. Set `li
         }
         litellm.utils.litellm_telemetry(data=data)
 
+def load_config():
+    try: 
+        global user_config, user_api_base, user_max_tokens, user_temperature, user_model
+        if not os.path.exists(user_config_path):
+            # If user's config doesn't exist, copy the default config from the package
+            here = os.path.abspath(os.path.dirname(__file__))
+            parent_dir = os.path.dirname(here)
+            default_config_path = os.path.join(parent_dir, pkg_config_filename)
+            # Ensure the user-specific directory exists
+            os.makedirs(config_dir, exist_ok=True)
+            # Copying the file using shutil.copy
+            shutil.copy(default_config_path, user_config_path)
+        # As the .env file is typically much simpler in structure, we use load_dotenv here directly
+        with open(user_config_path, "rb") as f:
+            user_config = tomllib.load(f)
+
+        ## load keys
+        if "keys" in user_config:
+            for key in user_config["keys"]:
+                if key == "HUGGINGFACE_API_KEY":
+                    litellm.huggingface_key = user_config["keys"][key]
+                elif key == "OPENAI_API_KEY":
+                    litellm.openai_key = user_config["keys"][key]
+                elif key == "TOGETHERAI_API_KEY": 
+                    litellm.togetherai_api_key = user_config["keys"][key]
+                elif key == "NLP_CLOUD_API_KEY": 
+                    litellm.nlp_cloud_key = user_config["keys"][key]
+                elif key == "ANTHROPIC_API_KEY":
+                    litellm.anthropic_key = user_config["keys"][key]
+                elif key == "REPLICATE_API_KEY":
+                    litellm.replicate_key = user_config["keys"][key]
+
+        ## settings 
+        litellm.add_function_to_prompt = user_config["general"].get("add_function_to_prompt", True) # by default add function to prompt if unsupported by provider
+        litellm.drop_params = user_config["general"].get("drop_params", True) # by default drop params if unsupported by provider
+
+        ## load model config - to set this run `litellm --config`
+        model_config = None
+        if user_model == "local": 
+            model_config = user_config["local_model"]
+        elif user_model == "hosted":
+            model_config = user_config["hosted_model"]
+            litellm.max_budget = model_config.get("max_budget", None) # check if user set a budget for hosted model - e.g. gpt-4
+        
+        print_verbose(f"user_config: {user_config}")
+        if model_config is None:
+            return
+
+        user_model = model_config["model_name"] # raise an error if this isn't set when user runs either `litellm --model local_model` or  `litellm --model hosted_model`
+        print_verbose(f"user_model: {user_model}")
+
+
+        user_max_tokens = model_config.get("max_tokens", None)
+        user_temperature = model_config.get("temperature", None)
+        user_api_base = model_config.get("api_base", None)
+        
+        ## custom prompt template
+        if "prompt_template" in model_config:
+            model_prompt_template = model_config["prompt_template"]
+            if len(model_prompt_template.keys()) > 0: # if user has initialized this at all
+                litellm.register_prompt_template(
+                    model=user_model,
+                    initial_prompt_value=model_prompt_template.get("MODEL_PRE_PROMPT", ""),
+                    roles={
+                        "system": {
+                            "pre_message": model_prompt_template.get("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""),
+                            "post_message": model_prompt_template.get("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""), 
+                        }, 
+                        "user": {
+                            "pre_message": model_prompt_template.get("MODEL_USER_MESSAGE_START_TOKEN", ""),
+                            "post_message": model_prompt_template.get("MODEL_USER_MESSAGE_END_TOKEN", ""), 
+                        }, 
+                        "assistant": {
+                            "pre_message": model_prompt_template.get("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""),
+                            "post_message": model_prompt_template.get("MODEL_ASSISTANT_MESSAGE_END_TOKEN", ""), 
+                        }
+                    }, 
+                    final_prompt_value=model_prompt_template.get("MODEL_POST_PROMPT", ""),
+                )
+    except Exception as e:
+        traceback.print_exc()
+
 def initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt):
     global user_model, user_api_base, user_debug, user_max_tokens, user_temperature, user_telemetry
     user_model = model
-    user_api_base = api_base
     user_debug = debug
+    
+    load_config()
+    user_api_base = api_base
     user_max_tokens = max_tokens
     user_temperature = temperature
     user_telemetry = telemetry
@@ -65,6 +157,7 @@ def initialize(model, api_base, debug, temperature, max_tokens, max_budget, tele
     if max_budget: 
         litellm.max_budget = max_budget
 
+
 def deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy):
     import requests
     # Load .env file
diff --git a/litellm/template.secrets.toml b/litellm/template.secrets.toml
new file mode 100644
index 000000000..59d5d0e16
--- /dev/null
+++ b/litellm/template.secrets.toml
@@ -0,0 +1,50 @@
+[keys]
+# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key
+# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key
+# TOGETHERAI_API_KEY="" # Uncomment to save your TogetherAI API key
+# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key
+# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key
+# REPLICATE_API_KEY="" # Uncomment to save your Replicate API key
+
+[general]
+# add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead
+# drop_params = True # drop any params not supported by the provider (e.g. Ollama)
+
+[local_model] # run via `litellm --model local_model`
+# model_name = "ollama/codellama"  # Uncomment to set a local model 
+# max_tokens = "" # set max tokens for the model 
+# temperature = "" # set temperature for the model 
+# api_base = "" # set a custom api base for the model
+
+[local_model.prompt_template] # Set a custom prompt template for your local model - docs: https://docs.litellm.ai/docs/completion/prompt_formatting#format-prompt-yourself
+# MODEL_SYSTEM_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
+# MODEL_SYSTEM_MESSAGE_END_TOKEN = "<|endoftext|>" # This does not need to be a token, can be any string
+
+# MODEL_USER_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
+# MODEL_USER_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to user messages. Can be any string.
+
+# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "<|prompter|>" # Applies only to assistant messages. Can be any string.
+# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to system messages. Can be any string.
+
+# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt
+# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt
+
+[hosted_model] # run via `litellm --model hosted_model`
+# model_name = "gpt-4"
+# max_tokens = "" # set max tokens for the model 
+# temperature = "" # set temperature for the model 
+# api_base = "" # set a custom api base for the model
+# max_budget = 100 # sets a max budget of $100 for your hosted model
+
+[hosted_model.prompt_template] # Set a custom prompt template for your hosted model - docs: https://docs.litellm.ai/docs/completion/prompt_formatting#format-prompt-yourself
+# MODEL_SYSTEM_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
+# MODEL_SYSTEM_MESSAGE_END_TOKEN = "<|endoftext|>" # This does not need to be a token, can be any string
+
+# MODEL_USER_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string
+# MODEL_USER_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to user messages. Can be any string.
+
+# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "<|prompter|>" # Applies only to assistant messages. Can be any string.
+# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to system messages. Can be any string.
+
+# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt
+# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index 127cdcad0..7331d7ec5 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1358,7 +1358,7 @@ def get_optional_params(  # use the openai defaults
             optional_params[k] = passed_params[k]
     return optional_params
 
-def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None):
+def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_base: Optional[str] = None):
     try:
         # check if llm provider provided
         if custom_llm_provider:
@@ -1370,6 +1370,13 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None):
             model = model.split("/", 1)[1]
             return model, custom_llm_provider
 
+        # check if api base is a known openai compatible endpoint
+        if api_base: 
+            for endpoint in litellm.openai_compatible_endpoints:
+                if endpoint in api_base:
+                    custom_llm_provider = "openai"
+                    return model, custom_llm_provider
+
         # check if model in known model provider list  -> for huggingface models, raise exception as they don't have a fixed provider (can be togetherai, anyscale, baseten, runpod, et.)
         ## openai - chatcompletion + text completion
         if model in litellm.open_ai_chat_completion_models:
@@ -1429,6 +1436,7 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None):
     except Exception as e: 
         raise e
 
+
 def get_api_key(llm_provider: str, dynamic_api_key: Optional[str]):
     api_key = (dynamic_api_key or litellm.api_key)
     # openai 
@@ -1503,6 +1511,7 @@ def get_api_key(llm_provider: str, dynamic_api_key: Optional[str]):
                 get_secret("TOGETHER_AI_TOKEN")
         )
     return api_key
+
 def get_max_tokens(model: str):
     try:
         return litellm.model_cost[model]
@@ -2183,6 +2192,7 @@ def register_prompt_template(model: str, roles: dict, initial_prompt_value: str
     )
     ```
     """
+    model, _ = get_llm_provider(model=model)
     litellm.custom_prompt_dict[model] = {
         "roles": roles,
         "initial_prompt_value": initial_prompt_value,