diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 373577580..663befe04 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -74,15 +74,16 @@ def open_config(): @click.option('--config', is_flag=True, help='Create and open .env file from .env.template') @click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to') @click.option('--local', is_flag=True, default=False, help='for local debugging') -def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, add_function_to_prompt, max_budget, telemetry, config, test, local): +@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs') +def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, add_function_to_prompt, max_budget, telemetry, config, test, local, cost): if config: open_config() if local: - from proxy_server import app, initialize, deploy_proxy + from proxy_server import app, initialize, deploy_proxy, print_cost_logs debug = True else: - from .proxy_server import app, initialize, deploy_proxy + from .proxy_server import app, initialize, deploy_proxy, print_cost_logs if deploy == True: print(f"\033[32mLiteLLM: Deploying your proxy to api.litellm.ai\033[0m\n") @@ -95,6 +96,9 @@ def run_server(host, port, api_base, model, deploy, debug, temperature, max_toke return if model and "ollama" in model: run_ollama_serve() + if cost == True: + print_cost_logs() + return if test != False: click.echo('LiteLLM: Making a test ChatCompletions request to your proxy') import openai diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 28bd22223..d4ad0834d 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -215,16 +215,18 @@ async def track_cost(response): format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S' ) - import datetime - response_cost = litellm.completion_cost(completion_response=response) - - logging.info(f"Model {response.model} Cost: {response_cost:.8f}") - + logging.info(f"Model {response.model} Cost: ${response_cost:.8f}") except: pass - +def print_cost_logs(): + with open('cost.log', 'r') as f: + # print this in green + print("\033[1;32m") + print(f.read()) + print("\033[0m") + return @router.get("/ollama_logs") async def retrieve_server_log(request: Request):