(feat) add --cost as a flag to the proxy server cli

2025-04-25 18:54:30 +00:00 · 2023-10-09 15:05:15 -07:00 · 2023-10-09 15:05:15 -07:00 · ba754a07a3
commit ba754a07a3
parent 70720c255e
2 changed files with 15 additions and 9 deletions
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@ -74,15 +74,16 @@ def open_config():
@click.option('--config', is_flag=True, help='Create and open .env file from .env.template')
@click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to')
@click.option('--local', is_flag=True, default=False, help='for local debugging')
-def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, add_function_to_prompt, max_budget, telemetry, config, test, local):
+@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
 def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, add_function_to_prompt, max_budget, telemetry, config, test, local, cost):
    if config:
        open_config()
    if local:
-        from proxy_server import app, initialize, deploy_proxy
+        from proxy_server import app, initialize, deploy_proxy, print_cost_logs
        debug = True
    else:
-        from .proxy_server import app, initialize, deploy_proxy
+        from .proxy_server import app, initialize, deploy_proxy, print_cost_logs
    if deploy == True:
        print(f"\033[32mLiteLLM: Deploying your proxy to api.litellm.ai\033[0m\n")
@ -95,6 +96,9 @@ def run_server(host, port, api_base, model, deploy, debug, temperature, max_toke
        return
    if model and "ollama" in model: 
        run_ollama_serve()
    if cost == True:
        print_cost_logs()
        return
    if test != False:
        click.echo('LiteLLM: Making a test ChatCompletions request to your proxy')
        import openai
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -215,16 +215,18 @@ async def track_cost(response):
            format='%(asctime)s - %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        )
        import datetime
        response_cost = litellm.completion_cost(completion_response=response)
-
+        logging.info(f"Model {response.model} Cost: ${response_cost:.8f}")
        logging.info(f"Model {response.model} Cost: {response_cost:.8f}")
    except:
        pass
-
+def print_cost_logs():
    with open('cost.log', 'r') as f:
        # print this in green
        print("\033[1;32m")
        print(f.read())
        print("\033[0m")
    return
@router.get("/ollama_logs")
 async def retrieve_server_log(request: Request):