feat(proxy_server.py): dynamic reloading config.yaml with new models

2023-12-01 20:32:24 -08:00 · 2023-12-01 20:32:24 -08:00 · 72381c3cc2
commit 72381c3cc2
parent eae5b3ce50
2 changed files with 42 additions and 63 deletions
--- a/litellm/proxy/config.yaml
+++ b/litellm/proxy/config.yaml
@ -1,35 +0,0 @@
-model_list:
-  - model_name: gpt-4
-    litellm_params:
-      model: azure/chatgpt-v-2
-      api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
-      api_version: "2023-05-15"
-      api_key: 
-  - model_name: gpt-4
-    litellm_params:
-      model: azure/gpt-4
-      api_key: 
-      api_base: https://openai-gpt-4-test-v-2.openai.azure.com/
-  - model_name: gpt-4
-    litellm_params:
-      model: azure/gpt-4
-      api_key: 
-      api_base: https://openai-gpt-4-test-v-2.openai.azure.com/
-
-
-
-litellm_settings:
-  drop_params: True
-  set_verbose: True
-  # cache: # optional if you want to use caching
-  #   type: redis   # tell litellm to use redis caching
-
-general_settings: 
-  # master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)
-  # database_url: "postgresql://<user>:<password>@<host>:<port>/<dbname>" # [OPTIONAL] use for token-based auth to proxy
-
-environment_variables:
-  # settings for using redis caching
-  # REDIS_HOST: redis-16337.c322.us-east-1-2.ec2.cloud.redislabs.com
-  # REDIS_PORT: "16337"
-  # REDIS_PASSWORD: 
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -153,6 +153,10 @@ class ProxyChatCompletionRequest(BaseModel):
    class Config:
        extra='allow' # allow params not defined here, these fall in litellm.completion(**kwargs)

+class ModelParams(BaseModel):
+    model_name: str
+    litellm_params: dict
+
 user_api_base = None
 user_model = None
 user_debug = False
@ -162,6 +166,7 @@ user_temperature = None
 user_telemetry = True
 user_config = None
 user_headers = None
+user_config_file_path = None
 local_logging = True # writes logs to a local api_log.json file for debugging
 experimental = False
 #### GLOBAL VARIABLES ####
@ -315,7 +320,6 @@ def load_from_azure_key_vault(use_azure_key_vault: bool = False):
    except Exception as e: 
        print("Error when loading keys from Azure Key Vault. Ensure you run `pip install azure-identity azure-keyvault-secrets`")

-
 def cost_tracking(): 
    global prisma_client, master_key
    if prisma_client is not None and master_key is not None:
@ -405,10 +409,11 @@ def run_ollama_serve():
        """)

 def load_router_config(router: Optional[litellm.Router], config_file_path: str):
-    global master_key
+    global master_key, user_config_file_path
    config = {}
    try: 
        if os.path.exists(config_file_path):
+            user_config_file_path = config_file_path
            with open(config_file_path, 'r') as file:
                config = yaml.safe_load(file)
        else:
@ -947,36 +952,46 @@ async def info_key_fn(key: str = fastapi.Query(..., description="Key in the requ

 #### MODEL MANAGEMENT #### 

+#### [BETA] - This is a beta endpoint, format might change based on user feedback. - https://github.com/BerriAI/litellm/issues/964
+@router.post("/model/new", description="Allows adding new models to the model list in the config.yaml", tags=["model management"], dependencies=[Depends(user_api_key_auth)])
+async def add_new_model(model_params: ModelParams):
+    global llm_router, llm_model_list, general_settings
+    try:
+        # Load existing config
+        with open(user_config_file_path, "r") as config_file:
+            config = yaml.safe_load(config_file)
+
+        # Add the new model to the config
+        config['model_list'].append({
+            'model_name': model_params.model_name,
+            'litellm_params': model_params.litellm_params
+        })
+
+        # Save the updated config
+        with open(user_config_file_path, "w") as config_file:
+            yaml.dump(config, config_file, default_flow_style=False)
+
+        # update Router 
+        llm_router, llm_model_list, general_settings = load_router_config(router=llm_router, config_file_path=config)
+
+
+        return {"message": "Model added successfully"}
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Internal Server Error: {str(e)}")
+
 #### [BETA] - This is a beta endpoint, format might change based on user feedback. - https://github.com/BerriAI/litellm/issues/933
@router.get("/model/info", description="Provides more info about each model in /models, including config.yaml descriptions (except api key and api base)", tags=["model management"], dependencies=[Depends(user_api_key_auth)])
 async def model_info(request: Request):
    global llm_model_list, general_settings   
-    all_models = [] 
-    if llm_model_list is not None:
-        for m in llm_model_list: 
-            model_dict = {} 
-            model_name = m["model_name"]
-            model_params = {}
-            for k,v in m["litellm_params"].items():
-                if k == "api_key" or k == "api_base": # don't send the api key or api base
-                    continue 
+    # Load existing config
+    with open(user_config_file_path, "r") as config_file:
+        config = yaml.safe_load(config_file)
+    all_models = config['model_list']

-                if k == "model": 
-                    ########## remove -ModelID-XXXX from model ##############
-                    original_model_string = v
-                    # Find the index of "ModelID" in the string
-                    index_of_model_id = original_model_string.find("-ModelID")
-                    # Remove everything after "-ModelID" if it exists
-                    if index_of_model_id != -1:
-                        v = original_model_string[:index_of_model_id]
-                    else:
-                        v = original_model_string
-                
-                model_params[k] = v
-
-            model_dict["model_name"] = model_name
-            model_dict["model_params"] = model_params
-            all_models.append(model_dict)
+    for model in all_models:
+        # don't return the api key
+        model["litellm_params"].pop("api_key", None)
    # all_models = list(set([m["model_name"] for m in llm_model_list]))
    print_verbose(f"all_models: {all_models}")
    return dict(
@ -991,7 +1006,6 @@ async def model_info(request: Request):
        ],
        object="list",
    )
-    pass
 #### EXPERIMENTAL QUEUING #### 
@router.post("/queue/request", dependencies=[Depends(user_api_key_auth)])
 async def async_queue_request(request: Request):