diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index fa75d9bc16..fe6536ba97 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -75,18 +75,17 @@ def is_port_in_use(port): @click.option('--logs', flag_value=False, type=int, help='Gets the "n" most recent logs. By default gets most recent log.') @click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to') @click.option('--local', is_flag=True, default=False, help='for local debugging') -@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs') -def run_server(host, port, api_base, api_version, model, alias, add_key, headers, save, debug, temperature, max_tokens, request_timeout, drop_params, create_proxy, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, cost, num_workers): +def run_server(host, port, api_base, api_version, model, alias, add_key, headers, save, debug, temperature, max_tokens, request_timeout, drop_params, create_proxy, add_function_to_prompt, config, file, max_budget, telemetry, logs, test, local, num_workers): global feature_telemetry args = locals() if local: - from proxy_server import app, save_worker_config, print_cost_logs, usage_telemetry, add_keys_to_config + from proxy_server import app, save_worker_config, usage_telemetry, add_keys_to_config debug = True else: try: - from .proxy_server import app, save_worker_config, print_cost_logs, usage_telemetry, add_keys_to_config + from .proxy_server import app, save_worker_config, usage_telemetry, add_keys_to_config except ImportError as e: - from proxy_server import app, save_worker_config, print_cost_logs, usage_telemetry, add_keys_to_config + from proxy_server import app, save_worker_config, usage_telemetry, add_keys_to_config feature_telemetry = usage_telemetry if create_proxy == True: repo_url = 'https://github.com/BerriAI/litellm' @@ -125,9 +124,6 @@ def run_server(host, port, api_base, api_version, model, alias, add_key, headers if model and "ollama" in model: print(f"ollama called") run_ollama_serve() - if cost == True: - print_cost_logs() - return if test != False: click.echo('LiteLLM: Making a test ChatCompletions request to your proxy') import openai diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 9a783c0ce0..a26f638115 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -401,110 +401,6 @@ def initialize( user_telemetry = telemetry usage_telemetry(feature="local_proxy_server") - -def track_cost_callback( - kwargs, # kwargs to completion - completion_response, # response from completion - start_time, - end_time, # start/end time -): - # track cost like this - # { - # "Oct12": { - # "gpt-4": 10, - # "claude-2": 12.01, - # }, - # "Oct 15": { - # "ollama/llama2": 0.0, - # "gpt2": 1.2 - # } - # } - try: - # for streaming responses - if "complete_streaming_response" in kwargs: - # for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost - completion_response = kwargs["complete_streaming_response"] - input_text = kwargs["messages"] - output_text = completion_response["choices"][0]["message"]["content"] - response_cost = litellm.completion_cost( - model=kwargs["model"], messages=input_text, completion=output_text - ) - model = kwargs["model"] - - # for non streaming responses - else: - # we pass the completion_response obj - if kwargs["stream"] != True: - response_cost = litellm.completion_cost( - completion_response=completion_response - ) - model = completion_response["model"] - - # read/write from json for storing daily model costs - cost_data = {} - try: - with open("costs.json") as f: - cost_data = json.load(f) - except FileNotFoundError: - cost_data = {} - import datetime - - date = datetime.datetime.now().strftime("%b-%d-%Y") - if date not in cost_data: - cost_data[date] = {} - - if kwargs["model"] in cost_data[date]: - cost_data[date][kwargs["model"]]["cost"] += response_cost - cost_data[date][kwargs["model"]]["num_requests"] += 1 - else: - cost_data[date][kwargs["model"]] = { - "cost": response_cost, - "num_requests": 1, - } - - with open("costs.json", "w") as f: - json.dump(cost_data, f, indent=2) - - except: - pass - - -def logger( - kwargs, # kwargs to completion - completion_response=None, # response from completion - start_time=None, - end_time=None, # start/end time -): - log_event_type = kwargs["log_event_type"] - try: - if log_event_type == "pre_api_call": - inference_params = copy.deepcopy(kwargs) - timestamp = inference_params.pop("start_time") - dt_key = timestamp.strftime("%Y%m%d%H%M%S%f")[:23] - log_data = {dt_key: {"pre_api_call": inference_params}} - - try: - with open(log_file, "r") as f: - existing_data = json.load(f) - except FileNotFoundError: - existing_data = {} - - existing_data.update(log_data) - - def write_to_log(): - with open(log_file, "w") as f: - json.dump(existing_data, f, indent=2) - - thread = threading.Thread(target=write_to_log, daemon=True) - thread.start() - except: - pass - - -litellm.input_callback = [logger] -litellm.success_callback = [logger] -litellm.failure_callback = [logger] - # for streaming def data_generator(response): print_verbose("inside generator") @@ -605,6 +501,7 @@ async def completion(request: Request, model: Optional[str] = None): **data ) except Exception as e: + print(f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`") error_traceback = traceback.format_exc() error_msg = f"{str(e)}\n\n{error_traceback}" return {"error": error_msg} @@ -638,14 +535,6 @@ async def chat_completion(request: Request, model: Optional[str] = None): error_msg = f"{str(e)}\n\n{error_traceback}" return {"error": error_msg} -def print_cost_logs(): - with open("costs.json", "r") as f: - # print this in green - print("\033[1;32m") - print(f.read()) - print("\033[0m") - return - @router.get("/ollama_logs") async def retrieve_server_log(request: Request):