feat(proxy_server): adding model fallbacks and default model to toml

2025-04-27 03:34:10 +00:00 · 2023-10-13 15:30:18 -07:00 · 2023-10-13 15:30:18 -07:00 · 74c0d5b7a0
commit 74c0d5b7a0
parent ec925bfa2e
6 changed files with 14 additions and 2 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -43,6 +43,7 @@ _current_cost = 0 # private variable, used if max budget is set
 error_logs: Dict = {}
 add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
 client_session: Optional[requests.Session] = None
+model_fallbacks: Optional[List] = None
 #############################################

 def get_model_cost_map():
--- a/litellm/pycache/init.cpython-311.pyc
+++ b/litellm/pycache/init.cpython-311.pyc
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/main.py
+++ b/litellm/main.py
@ -229,7 +229,7 @@ def completion(
    litellm_logging_obj = kwargs.get('litellm_logging_obj', None)
    id = kwargs.get('id', None)
    metadata = kwargs.get('metadata', None)
-    fallbacks = kwargs.get('fallbacks', [])
+    fallbacks = kwargs.get('fallbacks', None)
    ######## end of unpacking kwargs ###########
    openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key"]
    litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "metadata", "fallbacks", "azure"]
@ -239,7 +239,11 @@ def completion(
        return mock_completion(model, messages, stream=stream, mock_response=mock_response)
    try:
        logging = litellm_logging_obj
-        if fallbacks != []:
+        fallbacks = (
+            fallbacks
+            or litellm.model_fallbacks
+        )
+        if fallbacks is not None:
            return completion_with_fallbacks(**args)
        if litellm.model_alias_map and model in litellm.model_alias_map:
            args["model_alias_map"] = litellm.model_alias_map
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -125,6 +125,11 @@ def load_config():
        ## settings 
        litellm.add_function_to_prompt = user_config["general"].get("add_function_to_prompt", True) # by default add function to prompt if unsupported by provider
        litellm.drop_params = user_config["general"].get("drop_params", True) # by default drop params if unsupported by provider
+        litellm.model_fallbacks = user_config["general"].get("fallbacks", None) # fallback models in case initial completion call fails 
+        default_model = user_config["general"].get("default_model", None) # route all requests to this model. 
+
+        if user_model is None: # `litellm --model <model-name>`` > default_model.
+            user_model = default_model

        ## load model config - to set this run `litellm --config`
        model_config = None
--- a/litellm/proxy/secrets_template.toml
+++ b/litellm/proxy/secrets_template.toml
@ -11,6 +11,8 @@
 [general]
 # add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead
 # drop_params = True # drop any params not supported by the provider (e.g. Ollama)
+# default_model = None # route all requests to this model
+# fallbacks = ["gpt-3.5-turbo", "gpt-4"] # models you want to fallback to in case completion call fails (remember: add relevant keys) 

 [model."ollama/llama2"] # run via `litellm --model ollama/llama2`
 # max_tokens = "" # set max tokens for the model