fix(proxy_cli-and-utils.py): fixing how config file is read + infering llm_provider for known openai endpoints

2025-04-26 03:04:13 +00:00 · 2023-10-10 20:53:02 -07:00 · 2023-10-10 20:53:02 -07:00 · 87e5f79924
commit 87e5f79924
parent 61f36c094f
9 changed files with 170 additions and 29 deletions
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1,4 +1,6 @@
-import sys, os, platform
+import sys, os, platform, appdirs
+import tomllib
+import shutil, random, traceback
 sys.path.insert(
    0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
@ -35,6 +37,12 @@ user_debug = False
 user_max_tokens = None
 user_temperature = None
 user_telemetry = False
+user_config = None
+config_filename = "litellm.secrets.toml"
+pkg_config_filename = "template.secrets.toml"
+# Using appdirs to determine user-specific config path
+config_dir = appdirs.user_config_dir("litellm")
+user_config_path = os.path.join(config_dir, config_filename)

 #### HELPER FUNCTIONS ####
 def print_verbose(print_statement):
@ -49,11 +57,95 @@ def usage_telemetry(): # helps us know if people are using this feature. Set `li
        }
        litellm.utils.litellm_telemetry(data=data)

+def load_config():
+    try: 
+        global user_config, user_api_base, user_max_tokens, user_temperature, user_model
+        if not os.path.exists(user_config_path):
+            # If user's config doesn't exist, copy the default config from the package
+            here = os.path.abspath(os.path.dirname(__file__))
+            parent_dir = os.path.dirname(here)
+            default_config_path = os.path.join(parent_dir, pkg_config_filename)
+            # Ensure the user-specific directory exists
+            os.makedirs(config_dir, exist_ok=True)
+            # Copying the file using shutil.copy
+            shutil.copy(default_config_path, user_config_path)
+        # As the .env file is typically much simpler in structure, we use load_dotenv here directly
+        with open(user_config_path, "rb") as f:
+            user_config = tomllib.load(f)
+
+        ## load keys
+        if "keys" in user_config:
+            for key in user_config["keys"]:
+                if key == "HUGGINGFACE_API_KEY":
+                    litellm.huggingface_key = user_config["keys"][key]
+                elif key == "OPENAI_API_KEY":
+                    litellm.openai_key = user_config["keys"][key]
+                elif key == "TOGETHERAI_API_KEY": 
+                    litellm.togetherai_api_key = user_config["keys"][key]
+                elif key == "NLP_CLOUD_API_KEY": 
+                    litellm.nlp_cloud_key = user_config["keys"][key]
+                elif key == "ANTHROPIC_API_KEY":
+                    litellm.anthropic_key = user_config["keys"][key]
+                elif key == "REPLICATE_API_KEY":
+                    litellm.replicate_key = user_config["keys"][key]
+
+        ## settings 
+        litellm.add_function_to_prompt = user_config["general"].get("add_function_to_prompt", True) # by default add function to prompt if unsupported by provider
+        litellm.drop_params = user_config["general"].get("drop_params", True) # by default drop params if unsupported by provider
+
+        ## load model config - to set this run `litellm --config`
+        model_config = None
+        if user_model == "local": 
+            model_config = user_config["local_model"]
+        elif user_model == "hosted":
+            model_config = user_config["hosted_model"]
+            litellm.max_budget = model_config.get("max_budget", None) # check if user set a budget for hosted model - e.g. gpt-4
+        
+        print_verbose(f"user_config: {user_config}")
+        if model_config is None:
+            return
+
+        user_model = model_config["model_name"] # raise an error if this isn't set when user runs either `litellm --model local_model` or  `litellm --model hosted_model`
+        print_verbose(f"user_model: {user_model}")
+
+
+        user_max_tokens = model_config.get("max_tokens", None)
+        user_temperature = model_config.get("temperature", None)
+        user_api_base = model_config.get("api_base", None)
+        
+        ## custom prompt template
+        if "prompt_template" in model_config:
+            model_prompt_template = model_config["prompt_template"]
+            if len(model_prompt_template.keys()) > 0: # if user has initialized this at all
+                litellm.register_prompt_template(
+                    model=user_model,
+                    initial_prompt_value=model_prompt_template.get("MODEL_PRE_PROMPT", ""),
+                    roles={
+                        "system": {
+                            "pre_message": model_prompt_template.get("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""),
+                            "post_message": model_prompt_template.get("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""), 
+                        }, 
+                        "user": {
+                            "pre_message": model_prompt_template.get("MODEL_USER_MESSAGE_START_TOKEN", ""),
+                            "post_message": model_prompt_template.get("MODEL_USER_MESSAGE_END_TOKEN", ""), 
+                        }, 
+                        "assistant": {
+                            "pre_message": model_prompt_template.get("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""),
+                            "post_message": model_prompt_template.get("MODEL_ASSISTANT_MESSAGE_END_TOKEN", ""), 
+                        }
+                    }, 
+                    final_prompt_value=model_prompt_template.get("MODEL_POST_PROMPT", ""),
+                )
+    except Exception as e:
+        traceback.print_exc()
+
 def initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt):
    global user_model, user_api_base, user_debug, user_max_tokens, user_temperature, user_telemetry
    user_model = model
-    user_api_base = api_base
    user_debug = debug
+    
+    load_config()
+    user_api_base = api_base
    user_max_tokens = max_tokens
    user_temperature = temperature
    user_telemetry = telemetry
@ -65,6 +157,7 @@ def initialize(model, api_base, debug, temperature, max_tokens, max_budget, tele
    if max_budget: 
        litellm.max_budget = max_budget

+
 def deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy):
    import requests
    # Load .env file