forked from phoenix/litellm-mirror
(feat) proxy_server add a track_cost_callback for streaming
This commit is contained in:
parent
68b655df51
commit
7496afdf64
2 changed files with 37 additions and 26 deletions
|
@ -1,4 +1,6 @@
|
||||||
2023-10-09 14:46:28 - Model gpt-3.5-turbo-0613 Cost: 6.1e-05
|
2023-10-10 11:30:40 - Model gpt-3.5-turbo Cost: $0.00126900
|
||||||
2023-10-09 14:46:29 - Model gpt-3.5-turbo Cost: 0.0
|
2023-10-10 11:31:02 - Model gpt-3.5-turbo Cost: $0.00131700
|
||||||
2023-10-09 14:48:18 - Model gpt-3.5-turbo-0613 Cost: 0.00004700
|
2023-10-10 11:31:57 - Model gpt-3.5-turbo Cost: $0.00132450
|
||||||
2023-10-09 14:48:18 - Model gpt-3.5-turbo Cost: 0.00000000
|
2023-10-10 11:32:04 - Model gpt-3.5-turbo Cost: $0.00148000
|
||||||
|
2023-10-10 11:32:05 - Model gpt-3.5-turbo Cost: $0.00138800
|
||||||
|
2023-10-10 11:32:14 - Model gpt-3.5-turbo Cost: $0.00160450
|
||||||
|
|
|
@ -118,36 +118,45 @@ def data_generator(response):
|
||||||
print_verbose(f"returned chunk: {chunk}")
|
print_verbose(f"returned chunk: {chunk}")
|
||||||
yield f"data: {json.dumps(chunk)}\n\n"
|
yield f"data: {json.dumps(chunk)}\n\n"
|
||||||
|
|
||||||
def custom_callback(
|
def track_cost_callback(
|
||||||
kwargs, # kwargs to completion
|
kwargs, # kwargs to completion
|
||||||
completion_response, # response from completion
|
completion_response, # response from completion
|
||||||
start_time, end_time # start/end time
|
start_time, end_time # start/end time
|
||||||
):
|
):
|
||||||
# Your custom code here
|
try:
|
||||||
print("LITELLM: in custom callback function")
|
# init logging config
|
||||||
# print("kwargs", kwargs)
|
|
||||||
# print("start_time", start_time)
|
|
||||||
# print("end_time", end_time)
|
|
||||||
if "complete_streaming_response" in kwargs:
|
|
||||||
print("GOT COMPLETE STREAMING RESPINSE", kwargs["complete_streaming_response"])
|
|
||||||
response_cost = litellm.completion_cost(
|
|
||||||
completion_response=kwargs["complete_streaming_response"]
|
|
||||||
)
|
|
||||||
print("response_cost", response_cost)
|
|
||||||
else:
|
|
||||||
print("completion_response", completion_response)
|
|
||||||
response_cost = litellm.completion_cost(completion_response=completion_response)
|
|
||||||
|
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
filename='cost.log',
|
filename='cost.log',
|
||||||
level=logging.INFO,
|
level=logging.INFO,
|
||||||
format='%(asctime)s - %(message)s',
|
format='%(asctime)s - %(message)s',
|
||||||
datefmt='%Y-%m-%d %H:%M:%S'
|
datefmt='%Y-%m-%d %H:%M:%S'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# check if it has collected an entire stream response
|
||||||
|
if "complete_streaming_response" in kwargs:
|
||||||
|
# for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost
|
||||||
|
completion_response=kwargs["complete_streaming_response"]
|
||||||
|
input_text = kwargs["messages"]
|
||||||
|
output_text = completion_response["choices"][0]["message"]["content"]
|
||||||
|
response_cost = litellm.completion_cost(
|
||||||
|
model = kwargs["model"],
|
||||||
|
messages = input_text,
|
||||||
|
completion=output_text
|
||||||
|
)
|
||||||
|
print("streaming response_cost", response_cost)
|
||||||
|
logging.info(f"Model {kwargs['model']} Cost: ${response_cost:.8f}")
|
||||||
|
|
||||||
|
# for non streaming responses
|
||||||
|
else:
|
||||||
|
# we pass the completion_response obj
|
||||||
|
if kwargs["stream"] != True:
|
||||||
|
response_cost = litellm.completion_cost(completion_response=completion_response)
|
||||||
|
print("regular response_cost", response_cost)
|
||||||
logging.info(f"Model {completion_response.model} Cost: ${response_cost:.8f}")
|
logging.info(f"Model {completion_response.model} Cost: ${response_cost:.8f}")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
litellm.success_callback = [track_cost_callback]
|
||||||
litellm.success_callback = [custom_callback]
|
|
||||||
|
|
||||||
def litellm_completion(data, type):
|
def litellm_completion(data, type):
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue