diff --git a/litellm/__init__.py b/litellm/__init__.py index 6b14214c6..699c2ef91 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -125,6 +125,12 @@ for key, value in model_cost.items(): elif value.get('litellm_provider') == 'bedrock': bedrock_models.append(key) +# known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary +openai_compatible_endpoints: List = [ + "api.perplexity.ai" +] + + # well supported replicate llms replicate_models: List = [ # llama replicate supported LLMs diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 5d22e7d42..9aa83e657 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 91ca15d40..6cb4ebcbe 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/main.py b/litellm/main.py index c6d0583a4..c5189ea46 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -253,7 +253,7 @@ def completion( if deployment_id != None: # azure llms model=deployment_id custom_llm_provider="azure" - model, custom_llm_provider = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider) + model, custom_llm_provider = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base) model_api_key = get_api_key(llm_provider=custom_llm_provider, dynamic_api_key=api_key) # get the api key from the environment if required for the model if model_api_key and "sk-litellm" in model_api_key: api_base = "https://proxy.litellm.ai" diff --git a/litellm/proxy/cost.log b/litellm/proxy/cost.log index d45f60ee2..88bacc3a7 100644 --- a/litellm/proxy/cost.log +++ b/litellm/proxy/cost.log @@ -2629,3 +2629,5 @@ 2023-10-10 12:59:03 - Model claude-2 Cost: $0.00299782 2023-10-10 12:59:48 - Model claude-2 Cost: $0.00338998 2023-10-10 13:00:45 - Model claude-2 Cost: $0.00286710 +2023-10-10 19:51:53 - Model claude-2 Cost: $0.01657712 +2023-10-10 20:29:51 - Model claude-2 Cost: $0.00041534 diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index a0a0209ee..fe188b385 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -38,8 +38,8 @@ def generate_feedback_box(): generate_feedback_box() -config_filename = ".env.litellm" - +config_filename = "litellm.secrets.toml" +pkg_config_filename = "template.secrets.toml" # Using appdirs to determine user-specific config path config_dir = appdirs.user_config_dir("litellm") user_config_path = os.path.join(config_dir, config_filename) @@ -50,37 +50,20 @@ def run_ollama_serve(): with open(os.devnull, 'w') as devnull: process = subprocess.Popen(command, stdout=devnull, stderr=devnull) -def load_config(): - try: - if not os.path.exists(user_config_path): - # If user's config doesn't exist, copy the default config from the package - here = os.path.abspath(os.path.dirname(__file__)) - parent_dir = os.path.dirname(here) - default_config_path = os.path.join(parent_dir, '.env.template') - # Ensure the user-specific directory exists - os.makedirs(config_dir, exist_ok=True) - # Copying the file using shutil.copy - shutil.copy(default_config_path, user_config_path) - # As the .env file is typically much simpler in structure, we use load_dotenv here directly - load_dotenv(dotenv_path=user_config_path) - except Exception as e: - traceback.print_exc() - pass - def open_config(): # Create the .env file if it doesn't exist if not os.path.exists(user_config_path): # If user's env doesn't exist, copy the default env from the package here = os.path.abspath(os.path.dirname(__file__)) parent_dir = os.path.dirname(here) - default_env_path = os.path.join(parent_dir, '.env.template') + default_env_path = os.path.join(parent_dir, pkg_config_filename) # Ensure the user-specific directory exists os.makedirs(config_dir, exist_ok=True) # Copying the file using shutil.copy try: shutil.copy(default_env_path, user_config_path) except Exception as e: - print(f"Failed to copy .env.template: {e}") + print(f"Failed to copy .template.secrets.toml: {e}") # Open the .env file in the default editor if os.name == 'nt': # For Windows @@ -99,7 +82,6 @@ def open_config(): @click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model') @click.option('--drop_params', is_flag=True, help='Drop any unmapped params') @click.option('--add_function_to_prompt', is_flag=True, help='If function passed but unsupported, pass it as prompt') -@click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model') @click.option('--max_budget', default=None, type=float, help='Set max budget for API calls - works for hosted models like OpenAI, TogetherAI, Anthropic, etc.`') @click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`') @click.option('--config', is_flag=True, help='Create and open .env file from .env.template') @@ -109,7 +91,7 @@ def open_config(): def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, add_function_to_prompt, max_budget, telemetry, config, test, local, cost): if config: open_config() - + return if local: from proxy_server import app, initialize, deploy_proxy, print_cost_logs debug = True @@ -162,10 +144,8 @@ def run_server(host, port, api_base, model, deploy, debug, temperature, max_toke click.echo(f'LiteLLM: streaming response from proxy {chunk}') return else: - load_config() initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt) - try: import uvicorn except: diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index cdda255be..f79df7e8b 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1,4 +1,6 @@ -import sys, os, platform +import sys, os, platform, appdirs +import tomllib +import shutil, random, traceback sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path @@ -35,6 +37,12 @@ user_debug = False user_max_tokens = None user_temperature = None user_telemetry = False +user_config = None +config_filename = "litellm.secrets.toml" +pkg_config_filename = "template.secrets.toml" +# Using appdirs to determine user-specific config path +config_dir = appdirs.user_config_dir("litellm") +user_config_path = os.path.join(config_dir, config_filename) #### HELPER FUNCTIONS #### def print_verbose(print_statement): @@ -49,11 +57,95 @@ def usage_telemetry(): # helps us know if people are using this feature. Set `li } litellm.utils.litellm_telemetry(data=data) +def load_config(): + try: + global user_config, user_api_base, user_max_tokens, user_temperature, user_model + if not os.path.exists(user_config_path): + # If user's config doesn't exist, copy the default config from the package + here = os.path.abspath(os.path.dirname(__file__)) + parent_dir = os.path.dirname(here) + default_config_path = os.path.join(parent_dir, pkg_config_filename) + # Ensure the user-specific directory exists + os.makedirs(config_dir, exist_ok=True) + # Copying the file using shutil.copy + shutil.copy(default_config_path, user_config_path) + # As the .env file is typically much simpler in structure, we use load_dotenv here directly + with open(user_config_path, "rb") as f: + user_config = tomllib.load(f) + + ## load keys + if "keys" in user_config: + for key in user_config["keys"]: + if key == "HUGGINGFACE_API_KEY": + litellm.huggingface_key = user_config["keys"][key] + elif key == "OPENAI_API_KEY": + litellm.openai_key = user_config["keys"][key] + elif key == "TOGETHERAI_API_KEY": + litellm.togetherai_api_key = user_config["keys"][key] + elif key == "NLP_CLOUD_API_KEY": + litellm.nlp_cloud_key = user_config["keys"][key] + elif key == "ANTHROPIC_API_KEY": + litellm.anthropic_key = user_config["keys"][key] + elif key == "REPLICATE_API_KEY": + litellm.replicate_key = user_config["keys"][key] + + ## settings + litellm.add_function_to_prompt = user_config["general"].get("add_function_to_prompt", True) # by default add function to prompt if unsupported by provider + litellm.drop_params = user_config["general"].get("drop_params", True) # by default drop params if unsupported by provider + + ## load model config - to set this run `litellm --config` + model_config = None + if user_model == "local": + model_config = user_config["local_model"] + elif user_model == "hosted": + model_config = user_config["hosted_model"] + litellm.max_budget = model_config.get("max_budget", None) # check if user set a budget for hosted model - e.g. gpt-4 + + print_verbose(f"user_config: {user_config}") + if model_config is None: + return + + user_model = model_config["model_name"] # raise an error if this isn't set when user runs either `litellm --model local_model` or `litellm --model hosted_model` + print_verbose(f"user_model: {user_model}") + + + user_max_tokens = model_config.get("max_tokens", None) + user_temperature = model_config.get("temperature", None) + user_api_base = model_config.get("api_base", None) + + ## custom prompt template + if "prompt_template" in model_config: + model_prompt_template = model_config["prompt_template"] + if len(model_prompt_template.keys()) > 0: # if user has initialized this at all + litellm.register_prompt_template( + model=user_model, + initial_prompt_value=model_prompt_template.get("MODEL_PRE_PROMPT", ""), + roles={ + "system": { + "pre_message": model_prompt_template.get("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""), + "post_message": model_prompt_template.get("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""), + }, + "user": { + "pre_message": model_prompt_template.get("MODEL_USER_MESSAGE_START_TOKEN", ""), + "post_message": model_prompt_template.get("MODEL_USER_MESSAGE_END_TOKEN", ""), + }, + "assistant": { + "pre_message": model_prompt_template.get("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""), + "post_message": model_prompt_template.get("MODEL_ASSISTANT_MESSAGE_END_TOKEN", ""), + } + }, + final_prompt_value=model_prompt_template.get("MODEL_POST_PROMPT", ""), + ) + except Exception as e: + traceback.print_exc() + def initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt): global user_model, user_api_base, user_debug, user_max_tokens, user_temperature, user_telemetry user_model = model - user_api_base = api_base user_debug = debug + + load_config() + user_api_base = api_base user_max_tokens = max_tokens user_temperature = temperature user_telemetry = telemetry @@ -65,6 +157,7 @@ def initialize(model, api_base, debug, temperature, max_tokens, max_budget, tele if max_budget: litellm.max_budget = max_budget + def deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy): import requests # Load .env file diff --git a/litellm/template.secrets.toml b/litellm/template.secrets.toml new file mode 100644 index 000000000..59d5d0e16 --- /dev/null +++ b/litellm/template.secrets.toml @@ -0,0 +1,50 @@ +[keys] +# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key +# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key +# TOGETHERAI_API_KEY="" # Uncomment to save your TogetherAI API key +# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key +# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key +# REPLICATE_API_KEY="" # Uncomment to save your Replicate API key + +[general] +# add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead +# drop_params = True # drop any params not supported by the provider (e.g. Ollama) + +[local_model] # run via `litellm --model local_model` +# model_name = "ollama/codellama" # Uncomment to set a local model +# max_tokens = "" # set max tokens for the model +# temperature = "" # set temperature for the model +# api_base = "" # set a custom api base for the model + +[local_model.prompt_template] # Set a custom prompt template for your local model - docs: https://docs.litellm.ai/docs/completion/prompt_formatting#format-prompt-yourself +# MODEL_SYSTEM_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string +# MODEL_SYSTEM_MESSAGE_END_TOKEN = "<|endoftext|>" # This does not need to be a token, can be any string + +# MODEL_USER_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string +# MODEL_USER_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to user messages. Can be any string. + +# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "<|prompter|>" # Applies only to assistant messages. Can be any string. +# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to system messages. Can be any string. + +# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt +# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt + +[hosted_model] # run via `litellm --model hosted_model` +# model_name = "gpt-4" +# max_tokens = "" # set max tokens for the model +# temperature = "" # set temperature for the model +# api_base = "" # set a custom api base for the model +# max_budget = 100 # sets a max budget of $100 for your hosted model + +[hosted_model.prompt_template] # Set a custom prompt template for your hosted model - docs: https://docs.litellm.ai/docs/completion/prompt_formatting#format-prompt-yourself +# MODEL_SYSTEM_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string +# MODEL_SYSTEM_MESSAGE_END_TOKEN = "<|endoftext|>" # This does not need to be a token, can be any string + +# MODEL_USER_MESSAGE_START_TOKEN = "<|prompter|>" # This does not need to be a token, can be any string +# MODEL_USER_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to user messages. Can be any string. + +# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "<|prompter|>" # Applies only to assistant messages. Can be any string. +# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "<|endoftext|>" # Applies only to system messages. Can be any string. + +# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt +# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 127cdcad0..7331d7ec5 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1358,7 +1358,7 @@ def get_optional_params( # use the openai defaults optional_params[k] = passed_params[k] return optional_params -def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None): +def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_base: Optional[str] = None): try: # check if llm provider provided if custom_llm_provider: @@ -1370,6 +1370,13 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None): model = model.split("/", 1)[1] return model, custom_llm_provider + # check if api base is a known openai compatible endpoint + if api_base: + for endpoint in litellm.openai_compatible_endpoints: + if endpoint in api_base: + custom_llm_provider = "openai" + return model, custom_llm_provider + # check if model in known model provider list -> for huggingface models, raise exception as they don't have a fixed provider (can be togetherai, anyscale, baseten, runpod, et.) ## openai - chatcompletion + text completion if model in litellm.open_ai_chat_completion_models: @@ -1429,6 +1436,7 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None): except Exception as e: raise e + def get_api_key(llm_provider: str, dynamic_api_key: Optional[str]): api_key = (dynamic_api_key or litellm.api_key) # openai @@ -1503,6 +1511,7 @@ def get_api_key(llm_provider: str, dynamic_api_key: Optional[str]): get_secret("TOGETHER_AI_TOKEN") ) return api_key + def get_max_tokens(model: str): try: return litellm.model_cost[model] @@ -2183,6 +2192,7 @@ def register_prompt_template(model: str, roles: dict, initial_prompt_value: str ) ``` """ + model, _ = get_llm_provider(model=model) litellm.custom_prompt_dict[model] = { "roles": roles, "initial_prompt_value": initial_prompt_value,