From a9186dc40c955680f32a7c02e928a90e0961c92b Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 8 Aug 2023 21:11:06 -0700
Subject: [PATCH] add token usage

---
 docs/token_usage.md | 45 +++++++++++++++++++++++++++++++++++++++++++++
 litellm/__init__.py |  2 +-
 litellm/utils.py    | 24 +++++++++++++++++++++---
 mkdocs.yml          |  2 ++
 pyproject.toml      |  2 +-
 5 files changed, 70 insertions(+), 5 deletions(-)
 create mode 100644 docs/token_usage.md

diff --git a/docs/token_usage.md b/docs/token_usage.md
new file mode 100644
index 000000000..5bf2fbd3d
--- /dev/null
+++ b/docs/token_usage.md
@@ -0,0 +1,45 @@
+# Token Usage
+By default LiteLLM returns token usage in all completion requests ([See here](https://litellm.readthedocs.io/en/latest/output/))
+
+However, we also expose 3 public helper functions to calculate token usage across providers:
+
+- `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available. 
+
+- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/cookbook/community-resources/max_tokens.json).
+
+- `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output). 
+
+## Example Usage 
+
+1. `token_counter`
+
+```python
+from litellm import token_counter
+
+messages = [{"user": "role", "content": "Hey, how's it going"}]
+print(token_counter(model="gpt-3.5-turbo", messages=messages))
+```
+
+2. `cost_per_token`
+
+```python
+from litellm import cost_per_token
+
+prompt_tokens =  5
+completion_tokens = 10
+prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model="gpt-3.5-turbo", prompt_tokens=prompt_tokens, completion_tokens=completion_tokens))
+
+print(prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar)
+```
+
+3. `completion_cost`
+
+```python
+from litellm import completion_cost
+
+prompt = "Hey, how's it going"
+completion = "Hi, I'm gpt - I am doing well"
+cost_of_query = completion_cost(model="gpt-3.5-turbo", prompt=prompt, completion=completion))
+
+print(cost_of_query)
+```
diff --git a/litellm/__init__.py b/litellm/__init__.py
index 4c18d0e63..9b0154dda 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -89,7 +89,7 @@ open_ai_embedding_models = [
     'text-embedding-ada-002'
 ]
 from .timeout import timeout
-from .utils import client, logging, exception_type, get_optional_params, modify_integration
+from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost
 from .main import *  # Import all the symbols from main.py
 from .integrations import *
 from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index b47e08271..b81e9bc0d 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -133,9 +133,8 @@ def client(original_function):
 
 ####### USAGE CALCULATOR ################
 
-def prompt_token_calculator(model, messages):
+def token_counter(model, text):
   # use tiktoken or anthropic's tokenizer depending on the model
-  text = " ".join(message["content"] for message in messages)
   num_tokens = 0
   if "claude" in model:
     install_and_import('anthropic')
@@ -168,9 +167,15 @@ def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens =
     avg_output_cost = output_cost_sum / len(model_cost_ref.keys())
     prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens
     completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens
-  return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     
 
+def completion_cost(model="gpt-3.5-turbo", prompt="", completion=""):
+   prompt_tokens = tokenizer(model=model, text=prompt)
+   completion_tokens = tokenizer(model=model, text=completion)
+   prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model=model, prompt_tokens = prompt_tokens, completion_tokens = completion_tokens)
+   return prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
+
 ####### HELPER FUNCTIONS ################
 def get_optional_params(
     # 12 optional params
@@ -466,6 +471,19 @@ def handle_success(args, kwargs, result, start_time, end_time):
     print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
     pass
 
+def prompt_token_calculator(model, messages):
+  # use tiktoken or anthropic's tokenizer depending on the model
+  text = " ".join(message["content"] for message in messages)
+  num_tokens = 0
+  if "claude" in model:
+    install_and_import('anthropic')
+    from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
+    anthropic = Anthropic()
+    num_tokens = anthropic.count_tokens(text)
+  else:
+    num_tokens = len(encoding.encode(text))
+  return num_tokens
+
 # integration helper function 
 def modify_integration(integration_name, integration_params):
    global supabaseClient
diff --git a/mkdocs.yml b/mkdocs.yml
index e7326d0d6..97ed0d9ed 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -6,6 +6,8 @@ nav:
       - Input - Request Body: input.md
       - Output - Response Object: output.md
       - Streaming & Async Calls: stream.md
+    - token usage:
+      - Helper Functions: token_usage.md
   - 🤖 Supported LLM APIs: 
     - Supported Completion & Chat APIs: supported.md
     - Supported Embedding APIs: supported_embedding.md
diff --git a/pyproject.toml b/pyproject.toml
index dc608b841..0600035ca 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "0.1.366"
+version = "0.1.367"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"