diff --git a/litellm/proxy/cost.log b/litellm/proxy/cost.log index 566d7dbbb..71a31fb6d 100644 --- a/litellm/proxy/cost.log +++ b/litellm/proxy/cost.log @@ -1,4 +1,6 @@ -2023-10-09 14:46:28 - Model gpt-3.5-turbo-0613 Cost: 6.1e-05 -2023-10-09 14:46:29 - Model gpt-3.5-turbo Cost: 0.0 -2023-10-09 14:48:18 - Model gpt-3.5-turbo-0613 Cost: 0.00004700 -2023-10-09 14:48:18 - Model gpt-3.5-turbo Cost: 0.00000000 +2023-10-10 11:30:40 - Model gpt-3.5-turbo Cost: $0.00126900 +2023-10-10 11:31:02 - Model gpt-3.5-turbo Cost: $0.00131700 +2023-10-10 11:31:57 - Model gpt-3.5-turbo Cost: $0.00132450 +2023-10-10 11:32:04 - Model gpt-3.5-turbo Cost: $0.00148000 +2023-10-10 11:32:05 - Model gpt-3.5-turbo Cost: $0.00138800 +2023-10-10 11:32:14 - Model gpt-3.5-turbo Cost: $0.00160450 diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 252ccafa7..13b007521 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -118,36 +118,45 @@ def data_generator(response): print_verbose(f"returned chunk: {chunk}") yield f"data: {json.dumps(chunk)}\n\n" -def custom_callback( +def track_cost_callback( kwargs, # kwargs to completion completion_response, # response from completion start_time, end_time # start/end time ): - # Your custom code here - print("LITELLM: in custom callback function") - # print("kwargs", kwargs) - # print("start_time", start_time) - # print("end_time", end_time) - if "complete_streaming_response" in kwargs: - print("GOT COMPLETE STREAMING RESPINSE", kwargs["complete_streaming_response"]) - response_cost = litellm.completion_cost( - completion_response=kwargs["complete_streaming_response"] + try: + # init logging config + logging.basicConfig( + filename='cost.log', + level=logging.INFO, + format='%(asctime)s - %(message)s', + datefmt='%Y-%m-%d %H:%M:%S' ) - print("response_cost", response_cost) - else: - print("completion_response", completion_response) - response_cost = litellm.completion_cost(completion_response=completion_response) - logging.basicConfig( - filename='cost.log', - level=logging.INFO, - format='%(asctime)s - %(message)s', - datefmt='%Y-%m-%d %H:%M:%S' - ) - logging.info(f"Model {completion_response.model} Cost: ${response_cost:.8f}") + # check if it has collected an entire stream response + if "complete_streaming_response" in kwargs: + # for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost + completion_response=kwargs["complete_streaming_response"] + input_text = kwargs["messages"] + output_text = completion_response["choices"][0]["message"]["content"] + response_cost = litellm.completion_cost( + model = kwargs["model"], + messages = input_text, + completion=output_text + ) + print("streaming response_cost", response_cost) + logging.info(f"Model {kwargs['model']} Cost: ${response_cost:.8f}") + # for non streaming responses + else: + # we pass the completion_response obj + if kwargs["stream"] != True: + response_cost = litellm.completion_cost(completion_response=completion_response) + print("regular response_cost", response_cost) + logging.info(f"Model {completion_response.model} Cost: ${response_cost:.8f}") + except: + pass -litellm.success_callback = [custom_callback] +litellm.success_callback = [track_cost_callback] def litellm_completion(data, type): try: