mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
adding berrispend integration
This commit is contained in:
parent
d3a2ac808a
commit
a6e236459d
10 changed files with 292 additions and 11 deletions
99
litellm/integrations/berrispend.py
Normal file
99
litellm/integrations/berrispend.py
Normal file
|
@ -0,0 +1,99 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to aispend.io
|
||||
import dotenv, os
|
||||
import requests
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
import datetime
|
||||
|
||||
model_cost = {
|
||||
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
|
||||
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
||||
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
|
||||
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
||||
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
||||
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
||||
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
|
||||
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
|
||||
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
|
||||
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
|
||||
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
|
||||
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
|
||||
}
|
||||
|
||||
class BerriSpendLogger:
|
||||
# Class variables or attributes
|
||||
def __init__(self):
|
||||
# Instance variables
|
||||
self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID")
|
||||
|
||||
def price_calculator(self, model, response_obj, start_time, end_time):
|
||||
# try and find if the model is in the model_cost map
|
||||
# else default to the average of the costs
|
||||
prompt_tokens_cost_usd_dollar = 0
|
||||
completion_tokens_cost_usd_dollar = 0
|
||||
if model in model_cost:
|
||||
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
|
||||
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
|
||||
elif "replicate" in model:
|
||||
# replicate models are charged based on time
|
||||
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
|
||||
model_run_time = end_time - start_time # assuming time in seconds
|
||||
cost_usd_dollar = model_run_time * 0.0032
|
||||
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
|
||||
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
|
||||
else:
|
||||
# calculate average input cost
|
||||
input_cost_sum = 0
|
||||
output_cost_sum = 0
|
||||
for model in model_cost:
|
||||
input_cost_sum += model_cost[model]["input_cost_per_token"]
|
||||
output_cost_sum += model_cost[model]["output_cost_per_token"]
|
||||
avg_input_cost = input_cost_sum / len(model_cost.keys())
|
||||
avg_output_cost = output_cost_sum / len(model_cost.keys())
|
||||
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
|
||||
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
|
||||
def log_event(self, model, messages, response_obj, start_time, end_time, print_verbose):
|
||||
# Method definition
|
||||
try:
|
||||
print_verbose(f"BerriSpend Logging - Enters logging function for model {model}")
|
||||
|
||||
url = f"https://berrispend.berri.ai/spend"
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
|
||||
total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||
|
||||
response_time = (end_time-start_time).total_seconds()
|
||||
if "response" in response_obj:
|
||||
data = [{
|
||||
"response_time": response_time,
|
||||
"model_id": response_obj["model"],
|
||||
"total_cost": total_cost,
|
||||
"messages": messages,
|
||||
"response": response_obj['choices'][0]['message']['content'],
|
||||
"account_id": self.account_id
|
||||
}]
|
||||
elif "error" in response_obj:
|
||||
data = [{
|
||||
"response_time": response_time,
|
||||
"model_id": response_obj["model"],
|
||||
"total_cost": total_cost,
|
||||
"messages": messages,
|
||||
"error": response_obj['error'],
|
||||
"account_id": self.account_id
|
||||
}]
|
||||
|
||||
print_verbose(f"BerriSpend Logging - final data object: {data}")
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
except:
|
||||
# traceback.print_exc()
|
||||
print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
|
||||
pass
|
Loading…
Add table
Add a link
Reference in a new issue