diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index 32ca51041..efd2f42d5 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 6f74501e5..be268cfe4 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 6f9ecddb4..1f1eab4bb 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index c732fcbe7..373577580 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -69,11 +69,12 @@ def open_config(): @click.option('--drop_params', is_flag=True, help='Drop any unmapped params') @click.option('--add_function_to_prompt', is_flag=True, help='If function passed but unsupported, pass it as prompt') @click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model') +@click.option('--max_budget', default=None, type=float, help='Set max budget for API calls - works for hosted models like OpenAI, TogetherAI, Anthropic, etc.`') @click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`') @click.option('--config', is_flag=True, help='Create and open .env file from .env.template') @click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to') @click.option('--local', is_flag=True, default=False, help='for local debugging') -def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, add_function_to_prompt, telemetry, config, test, local): +def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, add_function_to_prompt, max_budget, telemetry, config, test, local): if config: open_config() @@ -127,7 +128,7 @@ def run_server(host, port, api_base, model, deploy, debug, temperature, max_toke return else: load_config() - initialize(model, api_base, debug, temperature, max_tokens, telemetry, drop_params, add_function_to_prompt) + initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt) try: diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 8d7f0859a..e0a8ae68f 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -46,7 +46,7 @@ def usage_telemetry(): # helps us know if people are using this feature. Set `li } litellm.utils.litellm_telemetry(data=data) -def initialize(model, api_base, debug, temperature, max_tokens, telemetry, drop_params, add_function_to_prompt): +def initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt): global user_model, user_api_base, user_debug, user_max_tokens, user_temperature, user_telemetry user_model = model user_api_base = api_base @@ -59,6 +59,8 @@ def initialize(model, api_base, debug, temperature, max_tokens, telemetry, drop_ litellm.drop_params = True if add_function_to_prompt == True: litellm.add_function_to_prompt = True + if max_budget: + litellm.max_budget = max_budget def deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy): import requests