import click import subprocess, traceback import os, appdirs from dotenv import load_dotenv load_dotenv() from importlib import resources import shutil, random list_of_messages = [ "'The thing I wish you improved is...'", "'A feature I really want is...'", "'The worst thing about this product is...'", "'This product would be better if...'", "'I don't like how this works...'", "'It would help me if you could add...'", "'This feature doesn't meet my needs because...'", "'I get frustrated when the product...'", ] def generate_feedback_box(): box_width = 60 # Select a random message message = random.choice(list_of_messages) print() print('\033[1;37m' + '#' + '-'*box_width + '#\033[0m') print('\033[1;37m' + '#' + ' '*box_width + '#\033[0m') print('\033[1;37m' + '# {:^59} #\033[0m'.format(message)) print('\033[1;37m' + '# {:^59} #\033[0m'.format('https://github.com/BerriAI/litellm/issues/new')) print('\033[1;37m' + '#' + ' '*box_width + '#\033[0m') print('\033[1;37m' + '#' + '-'*box_width + '#\033[0m') print() print(' Thank you for using LiteLLM! - Krrish & Ishaan') print() print() # generate_feedback_box() config_filename = "litellm.secrets.toml" pkg_config_filename = "template.secrets.toml" # Using appdirs to determine user-specific config path config_dir = appdirs.user_config_dir("litellm") user_config_path = os.path.join(config_dir, config_filename) def run_ollama_serve(): command = ['ollama', 'serve'] with open(os.devnull, 'w') as devnull: process = subprocess.Popen(command, stdout=devnull, stderr=devnull) def open_config(): # Create the .env file if it doesn't exist if not os.path.exists(user_config_path): # If user's env doesn't exist, copy the default env from the package here = os.path.abspath(os.path.dirname(__file__)) parent_dir = os.path.dirname(here) default_env_path = os.path.join(parent_dir, pkg_config_filename) # Ensure the user-specific directory exists os.makedirs(config_dir, exist_ok=True) # Copying the file using shutil.copy try: shutil.copy(default_env_path, user_config_path) except Exception as e: print(f"Failed to copy .template.secrets.toml: {e}") # Open the .env file in the default editor if os.name == 'nt': # For Windows os.startfile(user_config_path) elif os.name == 'posix': # For MacOS, Linux, and anything using Bash subprocess.call(('open', '-t', user_config_path)) @click.command() @click.option('--host', default='0.0.0.0', help='Host for the server to listen on.') @click.option('--port', default=8000, help='Port to bind the server to.') @click.option('--api_base', default=None, help='API base URL.') @click.option('--model', default=None, help='The model name to pass to litellm expects') @click.option('--deploy', is_flag=True, type=bool, help='Get a deployed proxy endpoint - api.litellm.ai') @click.option('--debug', is_flag=True, help='To debug the input') @click.option('--temperature', default=None, type=float, help='Set temperature for the model') @click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model') @click.option('--drop_params', is_flag=True, help='Drop any unmapped params') @click.option('--add_function_to_prompt', is_flag=True, help='If function passed but unsupported, pass it as prompt') @click.option('--max_budget', default=None, type=float, help='Set max budget for API calls - works for hosted models like OpenAI, TogetherAI, Anthropic, etc.`') @click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`') @click.option('--config', is_flag=True, help='Create and open .env file from .env.template') @click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to') @click.option('--local', is_flag=True, default=False, help='for local debugging') @click.option('--cost', is_flag=True, default=False, help='for viewing cost logs') def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, add_function_to_prompt, max_budget, telemetry, config, test, local, cost): if config: open_config() return if local: from proxy_server import app, initialize, deploy_proxy, print_cost_logs debug = True else: from .proxy_server import app, initialize, deploy_proxy, print_cost_logs if deploy == True: print(f"\033[32mLiteLLM: Deploying your proxy to api.litellm.ai\033[0m\n") print(f"\033[32mLiteLLM: Deploying proxy for model: {model}\033[0m\n") url = deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy) print(f"\033[32mLiteLLM: Deploy Successfull\033[0m\n") print(f"\033[32mLiteLLM: Your deployed url: {url}\033[0m\n") print(f"\033[32mLiteLLM: Test your URL using the following: \"litellm --test {url}\"\033[0m") return if model and "ollama" in model: run_ollama_serve() if cost == True: print_cost_logs() return if test != False: click.echo('LiteLLM: Making a test ChatCompletions request to your proxy') import openai if test == True: # flag value set api_base = f"http://{host}:{port}" else: api_base = test openai.api_base = api_base openai.api_key = "temp-key" print(openai.api_base) response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages = [ { "role": "user", "content": "this is a test request, acknowledge that you got it" } ]) click.echo(f'LiteLLM: response from proxy {response}') click.echo(f'LiteLLM: response from proxy with streaming {response}') response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages = [ { "role": "user", "content": "this is a test request, acknowledge that you got it" } ], stream=True, ) for chunk in response: click.echo(f'LiteLLM: streaming response from proxy {chunk}') return else: initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt) try: import uvicorn except: raise ImportError("Uvicorn needs to be imported. Run - `pip install uvicorn`") print(f"\033[32mLiteLLM: Deployed Proxy Locally\033[0m\n") print(f"\033[32mLiteLLM: Test your local endpoint with: \"litellm --test\" [In a new terminal tab]\033[0m\n") print(f"\033[32mLiteLLM: Deploy your proxy using the following: \"litellm --model claude-instant-1 --deploy\" Get an https://api.litellm.ai/chat/completions endpoint \033[0m\n") uvicorn.run(app, host=host, port=port) if __name__ == "__main__": run_server()