mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
(feat) proxy: cost tracking per completion request
This commit is contained in:
parent
1365092c89
commit
9b53ea4b0f
1 changed files with 37 additions and 4 deletions
|
@ -1,7 +1,40 @@
|
|||
# import threading, time, litellm
|
||||
# import concurrent.futures
|
||||
# """
|
||||
# v1:
|
||||
import litellm
|
||||
from litellm import ModelResponse
|
||||
from proxy_server import llm_model_list
|
||||
|
||||
def track_cost_callback(
|
||||
kwargs, # kwargs to completion
|
||||
completion_response: ModelResponse = None, # response from completion
|
||||
start_time = None,
|
||||
end_time = None, # start/end time for completion
|
||||
):
|
||||
try:
|
||||
# init logging config
|
||||
print("in custom callback tracking cost", llm_model_list)
|
||||
if "azure" in kwargs["model"]:
|
||||
# for azure cost tracking, we check the provided model list in the config.yaml
|
||||
# we need to map azure/chatgpt-deployment to -> azure/gpt-3.5-turbo
|
||||
pass
|
||||
# check if it has collected an entire stream response
|
||||
if "complete_streaming_response" in kwargs:
|
||||
# for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost
|
||||
completion_response=kwargs["complete_streaming_response"]
|
||||
input_text = kwargs["messages"]
|
||||
output_text = completion_response["choices"][0]["message"]["content"]
|
||||
response_cost = litellm.completion_cost(
|
||||
model = kwargs["model"],
|
||||
messages = input_text,
|
||||
completion=output_text
|
||||
)
|
||||
print("streaming response_cost", response_cost)
|
||||
# for non streaming responses
|
||||
else:
|
||||
# we pass the completion_response obj
|
||||
if kwargs["stream"] != True:
|
||||
response_cost = litellm.completion_cost(completion_response=completion_response)
|
||||
print("regular response_cost", response_cost)
|
||||
except:
|
||||
pass
|
||||
|
||||
# 1. `--experimental_async` starts 2 background threads:
|
||||
# - 1. to check the redis queue:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue