mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
(feat) proxy_server: begin using callback for tracking costs
This commit is contained in:
parent
a27d9ad6bc
commit
e19b4fc114
1 changed files with 31 additions and 14 deletions
|
@ -118,6 +118,37 @@ def data_generator(response):
|
||||||
print_verbose(f"returned chunk: {chunk}")
|
print_verbose(f"returned chunk: {chunk}")
|
||||||
yield f"data: {json.dumps(chunk)}\n\n"
|
yield f"data: {json.dumps(chunk)}\n\n"
|
||||||
|
|
||||||
|
def custom_callback(
|
||||||
|
kwargs, # kwargs to completion
|
||||||
|
completion_response, # response from completion
|
||||||
|
start_time, end_time # start/end time
|
||||||
|
):
|
||||||
|
# Your custom code here
|
||||||
|
print("LITELLM: in custom callback function")
|
||||||
|
# print("kwargs", kwargs)
|
||||||
|
# print("start_time", start_time)
|
||||||
|
# print("end_time", end_time)
|
||||||
|
if "complete_streaming_response" in kwargs:
|
||||||
|
print("GOT COMPLETE STREAMING RESPINSE", kwargs["complete_streaming_response"])
|
||||||
|
response_cost = litellm.completion_cost(
|
||||||
|
completion_response=kwargs["complete_streaming_response"]
|
||||||
|
)
|
||||||
|
print("response_cost", response_cost)
|
||||||
|
else:
|
||||||
|
print("completion_response", completion_response)
|
||||||
|
response_cost = litellm.completion_cost(completion_response=completion_response)
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
filename='cost.log',
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(message)s',
|
||||||
|
datefmt='%Y-%m-%d %H:%M:%S'
|
||||||
|
)
|
||||||
|
logging.info(f"Model {completion_response.model} Cost: ${response_cost:.8f}")
|
||||||
|
|
||||||
|
|
||||||
|
litellm.success_callback = [custom_callback]
|
||||||
|
|
||||||
def litellm_completion(data, type):
|
def litellm_completion(data, type):
|
||||||
try:
|
try:
|
||||||
if user_model:
|
if user_model:
|
||||||
|
@ -203,22 +234,8 @@ async def chat_completion(request: Request):
|
||||||
data = await request.json()
|
data = await request.json()
|
||||||
print_verbose(f"data passed in: {data}")
|
print_verbose(f"data passed in: {data}")
|
||||||
response = litellm_completion(data, type="chat_completion")
|
response = litellm_completion(data, type="chat_completion")
|
||||||
# track cost of this response, using litellm.completion_cost
|
|
||||||
track_cost(response)
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
async def track_cost(response):
|
|
||||||
try:
|
|
||||||
logging.basicConfig(
|
|
||||||
filename='cost.log',
|
|
||||||
level=logging.INFO,
|
|
||||||
format='%(asctime)s - %(message)s',
|
|
||||||
datefmt='%Y-%m-%d %H:%M:%S'
|
|
||||||
)
|
|
||||||
response_cost = litellm.completion_cost(completion_response=response)
|
|
||||||
logging.info(f"Model {response.model} Cost: ${response_cost:.8f}")
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def print_cost_logs():
|
def print_cost_logs():
|
||||||
with open('cost.log', 'r') as f:
|
with open('cost.log', 'r') as f:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue