add helper functions for token usage calculation

2025-04-25 18:54:30 +00:00 · 2023-08-08 20:47:02 -07:00 · 2023-08-08 20:47:02 -07:00 · 39efc57d84
commit 39efc57d84
parent ef99c616af
4 changed files with 59 additions and 18 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -10,8 +10,25 @@ azure_key = None
 anthropic_key = None 
 replicate_key = None 
 cohere_key = None 
 hugging_api_token = None
 model_cost = {
    "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
    "gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
    "gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
    "gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
    "gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
    "gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
    "gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
    "gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
    "gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
    "gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
    "claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
    "claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
    "text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
    "chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
    "command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
 }
 ####### THREAD-SPECIFIC DATA ###################
 class MyLocal(threading.local):
    def __init__(self):
--- a/litellm/tests.txt
+++ b/litellm/tests.txt
@ -1 +0,0 @@
 test 1
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -131,6 +131,46 @@ def client(original_function):
          raise e
    return wrapper
 ####### USAGE CALCULATOR ################
 def prompt_token_calculator(model, messages):
  # use tiktoken or anthropic's tokenizer depending on the model
  text = " ".join(message["content"] for message in messages)
  num_tokens = 0
  if "claude" in model:
    install_and_import('anthropic')
    from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
    anthropic = Anthropic()
    num_tokens = anthropic.count_tokens(text)
  else:
    num_tokens = len(encoding.encode(text))
  return num_tokens
 def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens = 0):
   ## given 
  prompt_tokens_cost_usd_dollar = 0
  completion_tokens_cost_usd_dollar = 0
  model_cost_ref = litellm.model_cost
  if model in model_cost_ref:
    prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
    completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens
    return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
  else:
    # calculate average input cost 
    input_cost_sum = 0
    output_cost_sum = 0
    model_cost_ref = litellm.model_cost
    for model in model_cost_ref:
        input_cost_sum += model_cost_ref[model]["input_cost_per_token"]
        output_cost_sum += model_cost_ref[model]["output_cost_per_token"]
    avg_input_cost = input_cost_sum / len(model_cost_ref.keys())
    avg_output_cost = output_cost_sum / len(model_cost_ref.keys())
    prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens
    completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens
  return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
 ####### HELPER FUNCTIONS ################
 def get_optional_params(
    # 12 optional params
@ -367,21 +407,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
      logging(logger_fn=user_logger_fn, exception=e)
      pass
 def prompt_token_calculator(model, messages):
  # use tiktoken or anthropic's tokenizer depending on the model
  text = " ".join(message["content"] for message in messages)
  num_tokens = 0
  if "claude" in model:
    install_and_import('anthropic')
    from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
    anthropic = Anthropic()
    num_tokens = anthropic.count_tokens(text)
  else:
    num_tokens = len(encoding.encode(text))
  return num_tokens
 def handle_success(args, kwargs, result, start_time, end_time):
  global heliconeLogger, aispendLogger
  try:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.365"
+version = "0.1.366"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"