adding berrispend integration

This commit is contained in:
Krrish Dholakia 2023-08-05 16:11:45 -07:00
parent c03c4a4871
commit a0ae1d6a18
10 changed files with 292 additions and 11 deletions

View file

@ -0,0 +1,94 @@
#### What this does ####
# On success + failure, log events to aispend.io
import dotenv, os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback
import datetime
model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
}
class AISpendLogger:
# Class variables or attributes
def __init__(self):
# Instance variables
self.account_id = os.getenv("AISPEND_ACCOUNT_ID")
self.api_key = os.getenv("AISPEND_API_KEY")
def price_calculator(self, model, response_obj, start_time, end_time):
# try and find if the model is in the model_cost map
# else default to the average of the costs
prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = 0
if model in model_cost:
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
elif "replicate" in model:
# replicate models are charged based on time
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
model_run_time = end_time - start_time # assuming time in seconds
cost_usd_dollar = model_run_time * 0.0032
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
else:
# calculate average input cost
input_cost_sum = 0
output_cost_sum = 0
for model in model_cost:
input_cost_sum += model_cost[model]["input_cost_per_token"]
output_cost_sum += model_cost[model]["output_cost_per_token"]
avg_input_cost = input_cost_sum / len(model_cost.keys())
avg_output_cost = output_cost_sum / len(model_cost.keys())
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def log_event(self, model, response_obj, start_time, end_time, print_verbose):
# Method definition
try:
print_verbose(f"AISpend Logging - Enters logging function for model {model}")
url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data"
headers = {
'Authorization': f'Bearer {self.api_key}',
'Content-Type': 'application/json'
}
response_timestamp = datetime.datetime.fromtimestamp(int(response_obj["created"])).strftime('%Y-%m-%d')
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
data = [{
"requests": 1,
"requests_context": 1,
"context_tokens": response_obj["usage"]["prompt_tokens"],
"requests_generated": 1,
"generated_tokens": response_obj["usage"]["completion_tokens"],
"recorded_date": response_timestamp,
"model_id": response_obj["model"],
"generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
"context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent
}]
print_verbose(f"AISpend Logging - final data object: {data}")
except:
# traceback.print_exc()
print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
pass

View file

@ -0,0 +1,99 @@
#### What this does ####
# On success + failure, log events to aispend.io
import dotenv, os
import requests
dotenv.load_dotenv() # Loading env variables using dotenv
import traceback
import datetime
model_cost = {
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
}
class BerriSpendLogger:
# Class variables or attributes
def __init__(self):
# Instance variables
self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID")
def price_calculator(self, model, response_obj, start_time, end_time):
# try and find if the model is in the model_cost map
# else default to the average of the costs
prompt_tokens_cost_usd_dollar = 0
completion_tokens_cost_usd_dollar = 0
if model in model_cost:
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
elif "replicate" in model:
# replicate models are charged based on time
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
model_run_time = end_time - start_time # assuming time in seconds
cost_usd_dollar = model_run_time * 0.0032
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
else:
# calculate average input cost
input_cost_sum = 0
output_cost_sum = 0
for model in model_cost:
input_cost_sum += model_cost[model]["input_cost_per_token"]
output_cost_sum += model_cost[model]["output_cost_per_token"]
avg_input_cost = input_cost_sum / len(model_cost.keys())
avg_output_cost = output_cost_sum / len(model_cost.keys())
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
def log_event(self, model, messages, response_obj, start_time, end_time, print_verbose):
# Method definition
try:
print_verbose(f"BerriSpend Logging - Enters logging function for model {model}")
url = f"https://berrispend.berri.ai/spend"
headers = {
'Content-Type': 'application/json'
}
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
response_time = (end_time-start_time).total_seconds()
if "response" in response_obj:
data = [{
"response_time": response_time,
"model_id": response_obj["model"],
"total_cost": total_cost,
"messages": messages,
"response": response_obj['choices'][0]['message']['content'],
"account_id": self.account_id
}]
elif "error" in response_obj:
data = [{
"response_time": response_time,
"model_id": response_obj["model"],
"total_cost": total_cost,
"messages": messages,
"error": response_obj['error'],
"account_id": self.account_id
}]
print_verbose(f"BerriSpend Logging - final data object: {data}")
response = requests.post(url, headers=headers, json=data)
except:
# traceback.print_exc()
print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
pass

View file

@ -0,0 +1,25 @@
#### What this tests ####
# This tests if logging to the helicone integration actually works
import sys, os
import traceback
import pytest
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
import litellm
from litellm import embedding, completion
litellm.success_callback = ["berrispend"]
litellm.failure_callback = ["berrispend"]
litellm.set_verbose = True
user_message = "Hello, how are you?"
messages = [{ "content": user_message,"role": "user"}]
#openai call
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
#bad request call
response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])

View file

@ -2,7 +2,10 @@ import dotenv, json, traceback, threading
import subprocess, os import subprocess, os
import litellm, openai import litellm, openai
import random, uuid, requests import random, uuid, requests
import datetime import datetime, time
from anthropic import Anthropic
import tiktoken
encoding = tiktoken.get_encoding("cl100k_base")
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
####### ENVIRONMENT VARIABLES ################### ####### ENVIRONMENT VARIABLES ###################
dotenv.load_dotenv() # Loading env variables using dotenv dotenv.load_dotenv() # Loading env variables using dotenv
@ -13,6 +16,8 @@ posthog = None
slack_app = None slack_app = None
alerts_channel = None alerts_channel = None
heliconeLogger = None heliconeLogger = None
aispendLogger = None
berrispendLogger = None
callback_list = [] callback_list = []
user_logger_fn = None user_logger_fn = None
additional_details = {} additional_details = {}
@ -89,6 +94,7 @@ def client(original_function):
pass pass
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
start_time = None
try: try:
function_setup(*args, **kwargs) function_setup(*args, **kwargs)
## MODEL CALL ## MODEL CALL
@ -101,7 +107,8 @@ def client(original_function):
return result return result
except Exception as e: except Exception as e:
traceback_exception = traceback.format_exc() traceback_exception = traceback.format_exc()
my_thread = threading.Thread(target=handle_failure, args=(e, traceback_exception, args, kwargs)) # don't interrupt execution of main thread end_time = datetime.datetime.now()
my_thread = threading.Thread(target=handle_failure, args=(e, traceback_exception, start_time, end_time, args, kwargs)) # don't interrupt execution of main thread
my_thread.start() my_thread.start()
raise e raise e
return wrapper return wrapper
@ -153,7 +160,7 @@ def get_optional_params(
return optional_params return optional_params
def set_callbacks(callback_list): def set_callbacks(callback_list):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger
try: try:
for callback in callback_list: for callback in callback_list:
if callback == "sentry": if callback == "sentry":
@ -193,14 +200,19 @@ def set_callbacks(callback_list):
print_verbose(f"Initialized Slack App: {slack_app}") print_verbose(f"Initialized Slack App: {slack_app}")
elif callback == "helicone": elif callback == "helicone":
from .integrations.helicone import HeliconeLogger from .integrations.helicone import HeliconeLogger
heliconeLogger = HeliconeLogger() heliconeLogger = HeliconeLogger()
elif callback == "aispend":
from .integrations.aispend import AISpendLogger
aispendLogger = AISpendLogger()
elif callback == "berrispend":
from .integrations.berrispend import BerriSpendLogger
berrispendLogger = BerriSpendLogger()
except: except:
pass pass
def handle_failure(exception, traceback_exception, args, kwargs): def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs):
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, aispendLogger, berrispendLogger
try: try:
# print_verbose(f"handle_failure args: {args}") # print_verbose(f"handle_failure args: {args}")
# print_verbose(f"handle_failure kwargs: {kwargs}") # print_verbose(f"handle_failure kwargs: {kwargs}")
@ -248,6 +260,33 @@ def handle_failure(exception, traceback_exception, args, kwargs):
unique_id = str(uuid.uuid4()) unique_id = str(uuid.uuid4())
posthog.capture(unique_id, event_name) posthog.capture(unique_id, event_name)
print_verbose(f"successfully logged to PostHog!") print_verbose(f"successfully logged to PostHog!")
elif callback == "berrispend":
print_verbose("reaches berrispend for logging!")
model = args[0] if len(args) > 0 else kwargs["model"]
messages = args[1] if len(args) > 1 else kwargs["messages"]
result = {
"model": model,
"created": time.time(),
"error": traceback_exception,
"usage": {
"prompt_tokens": prompt_token_calculator(model, messages=messages),
"completion_tokens": 0
}
}
berrispendLogger.log_event(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
elif callback == "aispend":
print_verbose("reaches aispend for logging!")
model = args[0] if len(args) > 0 else kwargs["model"]
messages = args[1] if len(args) > 1 else kwargs["messages"]
result = {
"model": model,
"created": time.time(),
"usage": {
"prompt_tokens": prompt_token_calculator(model, messages=messages),
"completion_tokens": 0
}
}
aispendLogger.log_event(model=model, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
except: except:
print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}") print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}")
pass pass
@ -264,8 +303,21 @@ def handle_failure(exception, traceback_exception, args, kwargs):
logging(logger_fn=user_logger_fn, exception=e) logging(logger_fn=user_logger_fn, exception=e)
pass pass
def prompt_token_calculator(model, messages):
# use tiktoken or anthropic's tokenizer depending on the model
text = " ".join(message["content"] for message in messages)
num_tokens = 0
if "claude" in model:
anthropic = Anthropic()
num_tokens = anthropic.count_tokens(text)
else:
num_tokens = len(encoding.encode(text))
return num_tokens
def handle_success(args, kwargs, result, start_time, end_time): def handle_success(args, kwargs, result, start_time, end_time):
global heliconeLogger global heliconeLogger, aispendLogger
try: try:
success_handler = additional_details.pop("success_handler", None) success_handler = additional_details.pop("success_handler", None)
failure_handler = additional_details.pop("failure_handler", None) failure_handler = additional_details.pop("failure_handler", None)
@ -293,8 +345,19 @@ def handle_success(args, kwargs, result, start_time, end_time):
model = args[0] if len(args) > 0 else kwargs["model"] model = args[0] if len(args) > 0 else kwargs["model"]
messages = args[1] if len(args) > 1 else kwargs["messages"] messages = args[1] if len(args) > 1 else kwargs["messages"]
heliconeLogger.log_success(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose) heliconeLogger.log_success(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
except: elif callback == "aispend":
print_verbose(f"Success Callback Error - {traceback.format_exc()}") print_verbose("reaches aispend for logging!")
model = args[0] if len(args) > 0 else kwargs["model"]
aispendLogger.log_event(model=model, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
elif callback == "berrispend":
print_verbose("reaches berrispend for logging!")
model = args[0] if len(args) > 0 else kwargs["model"]
messages = args[1] if len(args) > 1 else kwargs["messages"]
berrispendLogger.log_event(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
except Exception as e:
## LOGGING
logging(logger_fn=user_logger_fn, exception=e)
print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
pass pass
if success_handler and callable(success_handler): if success_handler and callable(success_handler):
@ -303,7 +366,7 @@ def handle_success(args, kwargs, result, start_time, end_time):
except Exception as e: except Exception as e:
## LOGGING ## LOGGING
logging(logger_fn=user_logger_fn, exception=e) logging(logger_fn=user_logger_fn, exception=e)
print_verbose(f"Success Callback Error - {traceback.format_exc()}") print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
pass pass

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "litellm" name = "litellm"
version = "0.1.343" version = "0.1.344"
description = "Library to easily interface with LLM API providers" description = "Library to easily interface with LLM API providers"
authors = ["BerriAI"] authors = ["BerriAI"]
license = "MIT License" license = "MIT License"