From a9186dc40c955680f32a7c02e928a90e0961c92b Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 8 Aug 2023 21:11:06 -0700 Subject: [PATCH] add token usage --- docs/token_usage.md | 45 +++++++++++++++++++++++++++++++++++++++++++++ litellm/__init__.py | 2 +- litellm/utils.py | 24 +++++++++++++++++++++--- mkdocs.yml | 2 ++ pyproject.toml | 2 +- 5 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 docs/token_usage.md diff --git a/docs/token_usage.md b/docs/token_usage.md new file mode 100644 index 000000000..5bf2fbd3d --- /dev/null +++ b/docs/token_usage.md @@ -0,0 +1,45 @@ +# Token Usage +By default LiteLLM returns token usage in all completion requests ([See here](https://litellm.readthedocs.io/en/latest/output/)) + +However, we also expose 3 public helper functions to calculate token usage across providers: + +- `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available. + +- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/cookbook/community-resources/max_tokens.json). + +- `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output). + +## Example Usage + +1. `token_counter` + +```python +from litellm import token_counter + +messages = [{"user": "role", "content": "Hey, how's it going"}] +print(token_counter(model="gpt-3.5-turbo", messages=messages)) +``` + +2. `cost_per_token` + +```python +from litellm import cost_per_token + +prompt_tokens = 5 +completion_tokens = 10 +prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model="gpt-3.5-turbo", prompt_tokens=prompt_tokens, completion_tokens=completion_tokens)) + +print(prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar) +``` + +3. `completion_cost` + +```python +from litellm import completion_cost + +prompt = "Hey, how's it going" +completion = "Hi, I'm gpt - I am doing well" +cost_of_query = completion_cost(model="gpt-3.5-turbo", prompt=prompt, completion=completion)) + +print(cost_of_query) +``` diff --git a/litellm/__init__.py b/litellm/__init__.py index 4c18d0e63..9b0154dda 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -89,7 +89,7 @@ open_ai_embedding_models = [ 'text-embedding-ada-002' ] from .timeout import timeout -from .utils import client, logging, exception_type, get_optional_params, modify_integration +from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost from .main import * # Import all the symbols from main.py from .integrations import * from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index b47e08271..b81e9bc0d 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -133,9 +133,8 @@ def client(original_function): ####### USAGE CALCULATOR ################ -def prompt_token_calculator(model, messages): +def token_counter(model, text): # use tiktoken or anthropic's tokenizer depending on the model - text = " ".join(message["content"] for message in messages) num_tokens = 0 if "claude" in model: install_and_import('anthropic') @@ -168,9 +167,15 @@ def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens = avg_output_cost = output_cost_sum / len(model_cost_ref.keys()) prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens - return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar +def completion_cost(model="gpt-3.5-turbo", prompt="", completion=""): + prompt_tokens = tokenizer(model=model, text=prompt) + completion_tokens = tokenizer(model=model, text=completion) + prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model=model, prompt_tokens = prompt_tokens, completion_tokens = completion_tokens) + return prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar + ####### HELPER FUNCTIONS ################ def get_optional_params( # 12 optional params @@ -466,6 +471,19 @@ def handle_success(args, kwargs, result, start_time, end_time): print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}") pass +def prompt_token_calculator(model, messages): + # use tiktoken or anthropic's tokenizer depending on the model + text = " ".join(message["content"] for message in messages) + num_tokens = 0 + if "claude" in model: + install_and_import('anthropic') + from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT + anthropic = Anthropic() + num_tokens = anthropic.count_tokens(text) + else: + num_tokens = len(encoding.encode(text)) + return num_tokens + # integration helper function def modify_integration(integration_name, integration_params): global supabaseClient diff --git a/mkdocs.yml b/mkdocs.yml index e7326d0d6..97ed0d9ed 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -6,6 +6,8 @@ nav: - Input - Request Body: input.md - Output - Response Object: output.md - Streaming & Async Calls: stream.md + - token usage: + - Helper Functions: token_usage.md - 🤖 Supported LLM APIs: - Supported Completion & Chat APIs: supported.md - Supported Embedding APIs: supported_embedding.md diff --git a/pyproject.toml b/pyproject.toml index dc608b841..0600035ca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.366" +version = "0.1.367" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"