mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 03:34:10 +00:00
feat(proxy_server): adding model fallbacks and default model to toml
This commit is contained in:
parent
ec925bfa2e
commit
74c0d5b7a0
6 changed files with 14 additions and 2 deletions
|
@ -43,6 +43,7 @@ _current_cost = 0 # private variable, used if max budget is set
|
|||
error_logs: Dict = {}
|
||||
add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
|
||||
client_session: Optional[requests.Session] = None
|
||||
model_fallbacks: Optional[List] = None
|
||||
#############################################
|
||||
|
||||
def get_model_cost_map():
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -229,7 +229,7 @@ def completion(
|
|||
litellm_logging_obj = kwargs.get('litellm_logging_obj', None)
|
||||
id = kwargs.get('id', None)
|
||||
metadata = kwargs.get('metadata', None)
|
||||
fallbacks = kwargs.get('fallbacks', [])
|
||||
fallbacks = kwargs.get('fallbacks', None)
|
||||
######## end of unpacking kwargs ###########
|
||||
openai_params = ["functions", "function_call", "temperature", "temperature", "top_p", "n", "stream", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "request_timeout", "api_base", "api_version", "api_key"]
|
||||
litellm_params = ["metadata", "acompletion", "caching", "return_async", "mock_response", "api_key", "api_version", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "metadata", "fallbacks", "azure"]
|
||||
|
@ -239,7 +239,11 @@ def completion(
|
|||
return mock_completion(model, messages, stream=stream, mock_response=mock_response)
|
||||
try:
|
||||
logging = litellm_logging_obj
|
||||
if fallbacks != []:
|
||||
fallbacks = (
|
||||
fallbacks
|
||||
or litellm.model_fallbacks
|
||||
)
|
||||
if fallbacks is not None:
|
||||
return completion_with_fallbacks(**args)
|
||||
if litellm.model_alias_map and model in litellm.model_alias_map:
|
||||
args["model_alias_map"] = litellm.model_alias_map
|
||||
|
|
|
@ -125,6 +125,11 @@ def load_config():
|
|||
## settings
|
||||
litellm.add_function_to_prompt = user_config["general"].get("add_function_to_prompt", True) # by default add function to prompt if unsupported by provider
|
||||
litellm.drop_params = user_config["general"].get("drop_params", True) # by default drop params if unsupported by provider
|
||||
litellm.model_fallbacks = user_config["general"].get("fallbacks", None) # fallback models in case initial completion call fails
|
||||
default_model = user_config["general"].get("default_model", None) # route all requests to this model.
|
||||
|
||||
if user_model is None: # `litellm --model <model-name>`` > default_model.
|
||||
user_model = default_model
|
||||
|
||||
## load model config - to set this run `litellm --config`
|
||||
model_config = None
|
||||
|
|
|
@ -11,6 +11,8 @@
|
|||
[general]
|
||||
# add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead
|
||||
# drop_params = True # drop any params not supported by the provider (e.g. Ollama)
|
||||
# default_model = None # route all requests to this model
|
||||
# fallbacks = ["gpt-3.5-turbo", "gpt-4"] # models you want to fallback to in case completion call fails (remember: add relevant keys)
|
||||
|
||||
[model."ollama/llama2"] # run via `litellm --model ollama/llama2`
|
||||
# max_tokens = "" # set max tokens for the model
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue