forked from phoenix/litellm-mirror
adding berrispend integration
This commit is contained in:
parent
c03c4a4871
commit
a0ae1d6a18
10 changed files with 292 additions and 11 deletions
Binary file not shown.
Binary file not shown.
BIN
litellm/integrations/__pycache__/aispend.cpython-311.pyc
Normal file
BIN
litellm/integrations/__pycache__/aispend.cpython-311.pyc
Normal file
Binary file not shown.
BIN
litellm/integrations/__pycache__/berrispend.cpython-311.pyc
Normal file
BIN
litellm/integrations/__pycache__/berrispend.cpython-311.pyc
Normal file
Binary file not shown.
BIN
litellm/integrations/__pycache__/helicone.cpython-311.pyc
Normal file
BIN
litellm/integrations/__pycache__/helicone.cpython-311.pyc
Normal file
Binary file not shown.
94
litellm/integrations/aispend.py
Normal file
94
litellm/integrations/aispend.py
Normal file
|
@ -0,0 +1,94 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to aispend.io
|
||||
import dotenv, os
|
||||
import requests
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
import datetime
|
||||
|
||||
model_cost = {
|
||||
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
|
||||
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
||||
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
|
||||
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
||||
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
||||
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
||||
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
|
||||
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
|
||||
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
|
||||
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
|
||||
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
|
||||
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
|
||||
}
|
||||
|
||||
class AISpendLogger:
|
||||
# Class variables or attributes
|
||||
def __init__(self):
|
||||
# Instance variables
|
||||
self.account_id = os.getenv("AISPEND_ACCOUNT_ID")
|
||||
self.api_key = os.getenv("AISPEND_API_KEY")
|
||||
|
||||
def price_calculator(self, model, response_obj, start_time, end_time):
|
||||
# try and find if the model is in the model_cost map
|
||||
# else default to the average of the costs
|
||||
prompt_tokens_cost_usd_dollar = 0
|
||||
completion_tokens_cost_usd_dollar = 0
|
||||
if model in model_cost:
|
||||
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
|
||||
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
|
||||
elif "replicate" in model:
|
||||
# replicate models are charged based on time
|
||||
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
|
||||
model_run_time = end_time - start_time # assuming time in seconds
|
||||
cost_usd_dollar = model_run_time * 0.0032
|
||||
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
|
||||
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
|
||||
else:
|
||||
# calculate average input cost
|
||||
input_cost_sum = 0
|
||||
output_cost_sum = 0
|
||||
for model in model_cost:
|
||||
input_cost_sum += model_cost[model]["input_cost_per_token"]
|
||||
output_cost_sum += model_cost[model]["output_cost_per_token"]
|
||||
avg_input_cost = input_cost_sum / len(model_cost.keys())
|
||||
avg_output_cost = output_cost_sum / len(model_cost.keys())
|
||||
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
|
||||
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
|
||||
def log_event(self, model, response_obj, start_time, end_time, print_verbose):
|
||||
# Method definition
|
||||
try:
|
||||
print_verbose(f"AISpend Logging - Enters logging function for model {model}")
|
||||
|
||||
url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data"
|
||||
headers = {
|
||||
'Authorization': f'Bearer {self.api_key}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
response_timestamp = datetime.datetime.fromtimestamp(int(response_obj["created"])).strftime('%Y-%m-%d')
|
||||
|
||||
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
|
||||
prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
|
||||
completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
|
||||
data = [{
|
||||
"requests": 1,
|
||||
"requests_context": 1,
|
||||
"context_tokens": response_obj["usage"]["prompt_tokens"],
|
||||
"requests_generated": 1,
|
||||
"generated_tokens": response_obj["usage"]["completion_tokens"],
|
||||
"recorded_date": response_timestamp,
|
||||
"model_id": response_obj["model"],
|
||||
"generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
|
||||
"context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent
|
||||
}]
|
||||
|
||||
print_verbose(f"AISpend Logging - final data object: {data}")
|
||||
except:
|
||||
# traceback.print_exc()
|
||||
print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
|
||||
pass
|
99
litellm/integrations/berrispend.py
Normal file
99
litellm/integrations/berrispend.py
Normal file
|
@ -0,0 +1,99 @@
|
|||
#### What this does ####
|
||||
# On success + failure, log events to aispend.io
|
||||
import dotenv, os
|
||||
import requests
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
import traceback
|
||||
import datetime
|
||||
|
||||
model_cost = {
|
||||
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
|
||||
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
||||
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
|
||||
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
||||
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
||||
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
||||
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
|
||||
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
|
||||
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
|
||||
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
|
||||
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
|
||||
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
|
||||
}
|
||||
|
||||
class BerriSpendLogger:
|
||||
# Class variables or attributes
|
||||
def __init__(self):
|
||||
# Instance variables
|
||||
self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID")
|
||||
|
||||
def price_calculator(self, model, response_obj, start_time, end_time):
|
||||
# try and find if the model is in the model_cost map
|
||||
# else default to the average of the costs
|
||||
prompt_tokens_cost_usd_dollar = 0
|
||||
completion_tokens_cost_usd_dollar = 0
|
||||
if model in model_cost:
|
||||
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
|
||||
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
|
||||
elif "replicate" in model:
|
||||
# replicate models are charged based on time
|
||||
# llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
|
||||
model_run_time = end_time - start_time # assuming time in seconds
|
||||
cost_usd_dollar = model_run_time * 0.0032
|
||||
prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
|
||||
completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
|
||||
else:
|
||||
# calculate average input cost
|
||||
input_cost_sum = 0
|
||||
output_cost_sum = 0
|
||||
for model in model_cost:
|
||||
input_cost_sum += model_cost[model]["input_cost_per_token"]
|
||||
output_cost_sum += model_cost[model]["output_cost_per_token"]
|
||||
avg_input_cost = input_cost_sum / len(model_cost.keys())
|
||||
avg_output_cost = output_cost_sum / len(model_cost.keys())
|
||||
prompt_tokens_cost_usd_dollar = model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"]
|
||||
completion_tokens_cost_usd_dollar = model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"]
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
|
||||
def log_event(self, model, messages, response_obj, start_time, end_time, print_verbose):
|
||||
# Method definition
|
||||
try:
|
||||
print_verbose(f"BerriSpend Logging - Enters logging function for model {model}")
|
||||
|
||||
url = f"https://berrispend.berri.ai/spend"
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = self.price_calculator(model, response_obj, start_time, end_time)
|
||||
total_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||
|
||||
response_time = (end_time-start_time).total_seconds()
|
||||
if "response" in response_obj:
|
||||
data = [{
|
||||
"response_time": response_time,
|
||||
"model_id": response_obj["model"],
|
||||
"total_cost": total_cost,
|
||||
"messages": messages,
|
||||
"response": response_obj['choices'][0]['message']['content'],
|
||||
"account_id": self.account_id
|
||||
}]
|
||||
elif "error" in response_obj:
|
||||
data = [{
|
||||
"response_time": response_time,
|
||||
"model_id": response_obj["model"],
|
||||
"total_cost": total_cost,
|
||||
"messages": messages,
|
||||
"error": response_obj['error'],
|
||||
"account_id": self.account_id
|
||||
}]
|
||||
|
||||
print_verbose(f"BerriSpend Logging - final data object: {data}")
|
||||
response = requests.post(url, headers=headers, json=data)
|
||||
except:
|
||||
# traceback.print_exc()
|
||||
print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
|
||||
pass
|
25
litellm/tests/test_berrispend_integration.py
Normal file
25
litellm/tests/test_berrispend_integration.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
#### What this tests ####
|
||||
# This tests if logging to the helicone integration actually works
|
||||
|
||||
import sys, os
|
||||
import traceback
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, os.path.abspath('../..')) # Adds the parent directory to the system path
|
||||
import litellm
|
||||
from litellm import embedding, completion
|
||||
|
||||
litellm.success_callback = ["berrispend"]
|
||||
litellm.failure_callback = ["berrispend"]
|
||||
|
||||
litellm.set_verbose = True
|
||||
|
||||
user_message = "Hello, how are you?"
|
||||
messages = [{ "content": user_message,"role": "user"}]
|
||||
|
||||
|
||||
#openai call
|
||||
response = completion(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}])
|
||||
|
||||
#bad request call
|
||||
response = completion(model="chatgpt-test", messages=[{"role": "user", "content": "Hi 👋 - i'm a bad request"}])
|
|
@ -2,7 +2,10 @@ import dotenv, json, traceback, threading
|
|||
import subprocess, os
|
||||
import litellm, openai
|
||||
import random, uuid, requests
|
||||
import datetime
|
||||
import datetime, time
|
||||
from anthropic import Anthropic
|
||||
import tiktoken
|
||||
encoding = tiktoken.get_encoding("cl100k_base")
|
||||
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
|
||||
####### ENVIRONMENT VARIABLES ###################
|
||||
dotenv.load_dotenv() # Loading env variables using dotenv
|
||||
|
@ -13,6 +16,8 @@ posthog = None
|
|||
slack_app = None
|
||||
alerts_channel = None
|
||||
heliconeLogger = None
|
||||
aispendLogger = None
|
||||
berrispendLogger = None
|
||||
callback_list = []
|
||||
user_logger_fn = None
|
||||
additional_details = {}
|
||||
|
@ -89,6 +94,7 @@ def client(original_function):
|
|||
pass
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
start_time = None
|
||||
try:
|
||||
function_setup(*args, **kwargs)
|
||||
## MODEL CALL
|
||||
|
@ -101,7 +107,8 @@ def client(original_function):
|
|||
return result
|
||||
except Exception as e:
|
||||
traceback_exception = traceback.format_exc()
|
||||
my_thread = threading.Thread(target=handle_failure, args=(e, traceback_exception, args, kwargs)) # don't interrupt execution of main thread
|
||||
end_time = datetime.datetime.now()
|
||||
my_thread = threading.Thread(target=handle_failure, args=(e, traceback_exception, start_time, end_time, args, kwargs)) # don't interrupt execution of main thread
|
||||
my_thread.start()
|
||||
raise e
|
||||
return wrapper
|
||||
|
@ -153,7 +160,7 @@ def get_optional_params(
|
|||
return optional_params
|
||||
|
||||
def set_callbacks(callback_list):
|
||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger
|
||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, heliconeLogger, aispendLogger, berrispendLogger
|
||||
try:
|
||||
for callback in callback_list:
|
||||
if callback == "sentry":
|
||||
|
@ -193,14 +200,19 @@ def set_callbacks(callback_list):
|
|||
print_verbose(f"Initialized Slack App: {slack_app}")
|
||||
elif callback == "helicone":
|
||||
from .integrations.helicone import HeliconeLogger
|
||||
|
||||
heliconeLogger = HeliconeLogger()
|
||||
elif callback == "aispend":
|
||||
from .integrations.aispend import AISpendLogger
|
||||
aispendLogger = AISpendLogger()
|
||||
elif callback == "berrispend":
|
||||
from .integrations.berrispend import BerriSpendLogger
|
||||
berrispendLogger = BerriSpendLogger()
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
def handle_failure(exception, traceback_exception, args, kwargs):
|
||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel
|
||||
def handle_failure(exception, traceback_exception, start_time, end_time, args, kwargs):
|
||||
global sentry_sdk_instance, capture_exception, add_breadcrumb, posthog, slack_app, alerts_channel, aispendLogger, berrispendLogger
|
||||
try:
|
||||
# print_verbose(f"handle_failure args: {args}")
|
||||
# print_verbose(f"handle_failure kwargs: {kwargs}")
|
||||
|
@ -248,6 +260,33 @@ def handle_failure(exception, traceback_exception, args, kwargs):
|
|||
unique_id = str(uuid.uuid4())
|
||||
posthog.capture(unique_id, event_name)
|
||||
print_verbose(f"successfully logged to PostHog!")
|
||||
elif callback == "berrispend":
|
||||
print_verbose("reaches berrispend for logging!")
|
||||
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||
messages = args[1] if len(args) > 1 else kwargs["messages"]
|
||||
result = {
|
||||
"model": model,
|
||||
"created": time.time(),
|
||||
"error": traceback_exception,
|
||||
"usage": {
|
||||
"prompt_tokens": prompt_token_calculator(model, messages=messages),
|
||||
"completion_tokens": 0
|
||||
}
|
||||
}
|
||||
berrispendLogger.log_event(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
|
||||
elif callback == "aispend":
|
||||
print_verbose("reaches aispend for logging!")
|
||||
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||
messages = args[1] if len(args) > 1 else kwargs["messages"]
|
||||
result = {
|
||||
"model": model,
|
||||
"created": time.time(),
|
||||
"usage": {
|
||||
"prompt_tokens": prompt_token_calculator(model, messages=messages),
|
||||
"completion_tokens": 0
|
||||
}
|
||||
}
|
||||
aispendLogger.log_event(model=model, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
|
||||
except:
|
||||
print_verbose(f"Error Occurred while logging failure: {traceback.format_exc()}")
|
||||
pass
|
||||
|
@ -264,8 +303,21 @@ def handle_failure(exception, traceback_exception, args, kwargs):
|
|||
logging(logger_fn=user_logger_fn, exception=e)
|
||||
pass
|
||||
|
||||
def prompt_token_calculator(model, messages):
|
||||
# use tiktoken or anthropic's tokenizer depending on the model
|
||||
text = " ".join(message["content"] for message in messages)
|
||||
num_tokens = 0
|
||||
if "claude" in model:
|
||||
anthropic = Anthropic()
|
||||
num_tokens = anthropic.count_tokens(text)
|
||||
else:
|
||||
num_tokens = len(encoding.encode(text))
|
||||
return num_tokens
|
||||
|
||||
|
||||
|
||||
def handle_success(args, kwargs, result, start_time, end_time):
|
||||
global heliconeLogger
|
||||
global heliconeLogger, aispendLogger
|
||||
try:
|
||||
success_handler = additional_details.pop("success_handler", None)
|
||||
failure_handler = additional_details.pop("failure_handler", None)
|
||||
|
@ -293,8 +345,19 @@ def handle_success(args, kwargs, result, start_time, end_time):
|
|||
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||
messages = args[1] if len(args) > 1 else kwargs["messages"]
|
||||
heliconeLogger.log_success(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
|
||||
except:
|
||||
print_verbose(f"Success Callback Error - {traceback.format_exc()}")
|
||||
elif callback == "aispend":
|
||||
print_verbose("reaches aispend for logging!")
|
||||
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||
aispendLogger.log_event(model=model, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
|
||||
elif callback == "berrispend":
|
||||
print_verbose("reaches berrispend for logging!")
|
||||
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||
messages = args[1] if len(args) > 1 else kwargs["messages"]
|
||||
berrispendLogger.log_event(model=model, messages=messages, response_obj=result, start_time=start_time, end_time=end_time, print_verbose=print_verbose)
|
||||
except Exception as e:
|
||||
## LOGGING
|
||||
logging(logger_fn=user_logger_fn, exception=e)
|
||||
print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
|
||||
pass
|
||||
|
||||
if success_handler and callable(success_handler):
|
||||
|
@ -303,7 +366,7 @@ def handle_success(args, kwargs, result, start_time, end_time):
|
|||
except Exception as e:
|
||||
## LOGGING
|
||||
logging(logger_fn=user_logger_fn, exception=e)
|
||||
print_verbose(f"Success Callback Error - {traceback.format_exc()}")
|
||||
print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
|
||||
pass
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "0.1.343"
|
||||
version = "0.1.344"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT License"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue