mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
add helper functions for token usage calculation
This commit is contained in:
parent
ef99c616af
commit
39efc57d84
4 changed files with 59 additions and 18 deletions
|
@ -10,8 +10,25 @@ azure_key = None
|
|||
anthropic_key = None
|
||||
replicate_key = None
|
||||
cohere_key = None
|
||||
|
||||
hugging_api_token = None
|
||||
|
||||
model_cost = {
|
||||
"gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||
"gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
|
||||
"gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||
"gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
|
||||
"gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
||||
"gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
|
||||
"gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
|
||||
"gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
||||
"gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
|
||||
"gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
|
||||
"claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
|
||||
"claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
|
||||
"text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
|
||||
"chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
|
||||
"command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
|
||||
}
|
||||
####### THREAD-SPECIFIC DATA ###################
|
||||
class MyLocal(threading.local):
|
||||
def __init__(self):
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
test 1
|
|
@ -131,6 +131,46 @@ def client(original_function):
|
|||
raise e
|
||||
return wrapper
|
||||
|
||||
####### USAGE CALCULATOR ################
|
||||
|
||||
def prompt_token_calculator(model, messages):
|
||||
# use tiktoken or anthropic's tokenizer depending on the model
|
||||
text = " ".join(message["content"] for message in messages)
|
||||
num_tokens = 0
|
||||
if "claude" in model:
|
||||
install_and_import('anthropic')
|
||||
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||
anthropic = Anthropic()
|
||||
num_tokens = anthropic.count_tokens(text)
|
||||
else:
|
||||
num_tokens = len(encoding.encode(text))
|
||||
return num_tokens
|
||||
|
||||
|
||||
def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens = 0):
|
||||
## given
|
||||
prompt_tokens_cost_usd_dollar = 0
|
||||
completion_tokens_cost_usd_dollar = 0
|
||||
model_cost_ref = litellm.model_cost
|
||||
if model in model_cost_ref:
|
||||
prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
|
||||
completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
else:
|
||||
# calculate average input cost
|
||||
input_cost_sum = 0
|
||||
output_cost_sum = 0
|
||||
model_cost_ref = litellm.model_cost
|
||||
for model in model_cost_ref:
|
||||
input_cost_sum += model_cost_ref[model]["input_cost_per_token"]
|
||||
output_cost_sum += model_cost_ref[model]["output_cost_per_token"]
|
||||
avg_input_cost = input_cost_sum / len(model_cost_ref.keys())
|
||||
avg_output_cost = output_cost_sum / len(model_cost_ref.keys())
|
||||
prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens
|
||||
completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens
|
||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
|
||||
|
||||
####### HELPER FUNCTIONS ################
|
||||
def get_optional_params(
|
||||
# 12 optional params
|
||||
|
@ -367,21 +407,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
|
|||
logging(logger_fn=user_logger_fn, exception=e)
|
||||
pass
|
||||
|
||||
def prompt_token_calculator(model, messages):
|
||||
# use tiktoken or anthropic's tokenizer depending on the model
|
||||
text = " ".join(message["content"] for message in messages)
|
||||
num_tokens = 0
|
||||
if "claude" in model:
|
||||
install_and_import('anthropic')
|
||||
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||
anthropic = Anthropic()
|
||||
num_tokens = anthropic.count_tokens(text)
|
||||
else:
|
||||
num_tokens = len(encoding.encode(text))
|
||||
return num_tokens
|
||||
|
||||
|
||||
|
||||
def handle_success(args, kwargs, result, start_time, end_time):
|
||||
global heliconeLogger, aispendLogger
|
||||
try:
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "litellm"
|
||||
version = "0.1.365"
|
||||
version = "0.1.366"
|
||||
description = "Library to easily interface with LLM API providers"
|
||||
authors = ["BerriAI"]
|
||||
license = "MIT License"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue