mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
41 lines
No EOL
1.9 KiB
Python
41 lines
No EOL
1.9 KiB
Python
# import litellm
|
|
# from litellm import ModelResponse
|
|
# from proxy_server import update_verification_token_cost
|
|
# from typing import Optional
|
|
# from fastapi import HTTPException, status
|
|
# import asyncio
|
|
|
|
# def track_cost_callback(
|
|
# kwargs, # kwargs to completion
|
|
# completion_response: ModelResponse, # response from completion
|
|
# start_time = None,
|
|
# end_time = None, # start/end time for completion
|
|
# ):
|
|
# try:
|
|
# # init logging config
|
|
# api_key = kwargs["litellm_params"]["metadata"]["api_key"]
|
|
# # check if it has collected an entire stream response
|
|
# if "complete_streaming_response" in kwargs:
|
|
# # for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost
|
|
# completion_response=kwargs["complete_streaming_response"]
|
|
# input_text = kwargs["messages"]
|
|
# output_text = completion_response["choices"][0]["message"]["content"]
|
|
# response_cost = litellm.completion_cost(
|
|
# model = kwargs["model"],
|
|
# messages = input_text,
|
|
# completion=output_text
|
|
# )
|
|
# print(f"LiteLLM Proxy: streaming response_cost: {response_cost} for api_key: {api_key}")
|
|
# # for non streaming responses
|
|
# else:
|
|
# # we pass the completion_response obj
|
|
# if kwargs["stream"] != True:
|
|
# response_cost = litellm.completion_cost(completion_response=completion_response)
|
|
# print(f"\n LiteLLM Proxy: regular response_cost: {response_cost} for api_key: {api_key}")
|
|
|
|
# ########### write costs to DB api_key / cost map
|
|
# asyncio.run(
|
|
# update_verification_token_cost(token=api_key, additional_cost=response_cost)
|
|
# )
|
|
# except:
|
|
# pass |