add helper functions for token usage calculation

2025-04-25 10:44:24 +00:00 · 2023-08-08 20:47:02 -07:00 · 2023-08-08 20:47:02 -07:00 · 39efc57d84
commit 39efc57d84
parent ef99c616af
4 changed files with 59 additions and 18 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -10,8 +10,25 @@ azure_key = None
 anthropic_key = None 
 replicate_key = None 
 cohere_key = None 
-
 hugging_api_token = None
+
+model_cost = {
+    "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
+    "gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name
+    "gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
+    "gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002},
+    "gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
+    "gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name
+    "gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004},
+    "gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
+    "gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006},
+    "gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012},
+    "claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551},
+    "claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268},
+    "text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004},
+    "chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002},
+    "command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015},
+}
 ####### THREAD-SPECIFIC DATA ###################
 class MyLocal(threading.local):
    def __init__(self):
--- a/litellm/tests.txt
+++ b/litellm/tests.txt
@ -1 +0,0 @@
-test 1
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -131,6 +131,46 @@ def client(original_function):
          raise e
    return wrapper

+####### USAGE CALCULATOR ################
+
+def prompt_token_calculator(model, messages):
+  # use tiktoken or anthropic's tokenizer depending on the model
+  text = " ".join(message["content"] for message in messages)
+  num_tokens = 0
+  if "claude" in model:
+    install_and_import('anthropic')
+    from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
+    anthropic = Anthropic()
+    num_tokens = anthropic.count_tokens(text)
+  else:
+    num_tokens = len(encoding.encode(text))
+  return num_tokens
+
+
+def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens = 0):
+   ## given 
+  prompt_tokens_cost_usd_dollar = 0
+  completion_tokens_cost_usd_dollar = 0
+  model_cost_ref = litellm.model_cost
+  if model in model_cost_ref:
+    prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
+    completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens
+    return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+  else:
+    # calculate average input cost 
+    input_cost_sum = 0
+    output_cost_sum = 0
+    model_cost_ref = litellm.model_cost
+    for model in model_cost_ref:
+        input_cost_sum += model_cost_ref[model]["input_cost_per_token"]
+        output_cost_sum += model_cost_ref[model]["output_cost_per_token"]
+    avg_input_cost = input_cost_sum / len(model_cost_ref.keys())
+    avg_output_cost = output_cost_sum / len(model_cost_ref.keys())
+    prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens
+    completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens
+  return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    
+
 ####### HELPER FUNCTIONS ################
 def get_optional_params(
    # 12 optional params
@ -367,21 +407,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k
      logging(logger_fn=user_logger_fn, exception=e)
      pass

-def prompt_token_calculator(model, messages):
-  # use tiktoken or anthropic's tokenizer depending on the model
-  text = " ".join(message["content"] for message in messages)
-  num_tokens = 0
-  if "claude" in model:
-    install_and_import('anthropic')
-    from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
-    anthropic = Anthropic()
-    num_tokens = anthropic.count_tokens(text)
-  else:
-    num_tokens = len(encoding.encode(text))
-  return num_tokens
-  
-      
-
 def handle_success(args, kwargs, result, start_time, end_time):
  global heliconeLogger, aispendLogger
  try:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.365"
+version = "0.1.366"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"