forked from phoenix/litellm-mirror
add token usage
This commit is contained in:
parent
39efc57d84
commit
a9186dc40c
5 changed files with 70 additions and 5 deletions
45
docs/token_usage.md
Normal file
45
docs/token_usage.md
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
# Token Usage
|
||||||
|
By default LiteLLM returns token usage in all completion requests ([See here](https://litellm.readthedocs.io/en/latest/output/))
|
||||||
|
|
||||||
|
However, we also expose 3 public helper functions to calculate token usage across providers:
|
||||||
|
|
||||||
|
- `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available.
|
||||||
|
|
||||||
|
- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/cookbook/community-resources/max_tokens.json).
|
||||||
|
|
||||||
|
- `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output).
|
||||||
|
|
||||||
|
## Example Usage
|
||||||
|
|
||||||
|
1. `token_counter`
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import token_counter
|
||||||
|
|
||||||
|
messages = [{"user": "role", "content": "Hey, how's it going"}]
|
||||||
|
print(token_counter(model="gpt-3.5-turbo", messages=messages))
|
||||||
|
```
|
||||||
|
|
||||||
|
2. `cost_per_token`
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import cost_per_token
|
||||||
|
|
||||||
|
prompt_tokens = 5
|
||||||
|
completion_tokens = 10
|
||||||
|
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model="gpt-3.5-turbo", prompt_tokens=prompt_tokens, completion_tokens=completion_tokens))
|
||||||
|
|
||||||
|
print(prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar)
|
||||||
|
```
|
||||||
|
|
||||||
|
3. `completion_cost`
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm import completion_cost
|
||||||
|
|
||||||
|
prompt = "Hey, how's it going"
|
||||||
|
completion = "Hi, I'm gpt - I am doing well"
|
||||||
|
cost_of_query = completion_cost(model="gpt-3.5-turbo", prompt=prompt, completion=completion))
|
||||||
|
|
||||||
|
print(cost_of_query)
|
||||||
|
```
|
|
@ -89,7 +89,7 @@ open_ai_embedding_models = [
|
||||||
'text-embedding-ada-002'
|
'text-embedding-ada-002'
|
||||||
]
|
]
|
||||||
from .timeout import timeout
|
from .timeout import timeout
|
||||||
from .utils import client, logging, exception_type, get_optional_params, modify_integration
|
from .utils import client, logging, exception_type, get_optional_params, modify_integration, token_counter, cost_per_token, completion_cost
|
||||||
from .main import * # Import all the symbols from main.py
|
from .main import * # Import all the symbols from main.py
|
||||||
from .integrations import *
|
from .integrations import *
|
||||||
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
|
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError, ServiceUnavailableError, OpenAIError
|
|
@ -133,9 +133,8 @@ def client(original_function):
|
||||||
|
|
||||||
####### USAGE CALCULATOR ################
|
####### USAGE CALCULATOR ################
|
||||||
|
|
||||||
def prompt_token_calculator(model, messages):
|
def token_counter(model, text):
|
||||||
# use tiktoken or anthropic's tokenizer depending on the model
|
# use tiktoken or anthropic's tokenizer depending on the model
|
||||||
text = " ".join(message["content"] for message in messages)
|
|
||||||
num_tokens = 0
|
num_tokens = 0
|
||||||
if "claude" in model:
|
if "claude" in model:
|
||||||
install_and_import('anthropic')
|
install_and_import('anthropic')
|
||||||
|
@ -168,9 +167,15 @@ def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens =
|
||||||
avg_output_cost = output_cost_sum / len(model_cost_ref.keys())
|
avg_output_cost = output_cost_sum / len(model_cost_ref.keys())
|
||||||
prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens
|
prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens
|
||||||
completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens
|
completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
|
|
||||||
|
|
||||||
|
def completion_cost(model="gpt-3.5-turbo", prompt="", completion=""):
|
||||||
|
prompt_tokens = tokenizer(model=model, text=prompt)
|
||||||
|
completion_tokens = tokenizer(model=model, text=completion)
|
||||||
|
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(model=model, prompt_tokens = prompt_tokens, completion_tokens = completion_tokens)
|
||||||
|
return prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||||
|
|
||||||
####### HELPER FUNCTIONS ################
|
####### HELPER FUNCTIONS ################
|
||||||
def get_optional_params(
|
def get_optional_params(
|
||||||
# 12 optional params
|
# 12 optional params
|
||||||
|
@ -466,6 +471,19 @@ def handle_success(args, kwargs, result, start_time, end_time):
|
||||||
print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
|
print_verbose(f"[Non-Blocking] Success Callback Error - {traceback.format_exc()}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def prompt_token_calculator(model, messages):
|
||||||
|
# use tiktoken or anthropic's tokenizer depending on the model
|
||||||
|
text = " ".join(message["content"] for message in messages)
|
||||||
|
num_tokens = 0
|
||||||
|
if "claude" in model:
|
||||||
|
install_and_import('anthropic')
|
||||||
|
from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT
|
||||||
|
anthropic = Anthropic()
|
||||||
|
num_tokens = anthropic.count_tokens(text)
|
||||||
|
else:
|
||||||
|
num_tokens = len(encoding.encode(text))
|
||||||
|
return num_tokens
|
||||||
|
|
||||||
# integration helper function
|
# integration helper function
|
||||||
def modify_integration(integration_name, integration_params):
|
def modify_integration(integration_name, integration_params):
|
||||||
global supabaseClient
|
global supabaseClient
|
||||||
|
|
|
@ -6,6 +6,8 @@ nav:
|
||||||
- Input - Request Body: input.md
|
- Input - Request Body: input.md
|
||||||
- Output - Response Object: output.md
|
- Output - Response Object: output.md
|
||||||
- Streaming & Async Calls: stream.md
|
- Streaming & Async Calls: stream.md
|
||||||
|
- token usage:
|
||||||
|
- Helper Functions: token_usage.md
|
||||||
- 🤖 Supported LLM APIs:
|
- 🤖 Supported LLM APIs:
|
||||||
- Supported Completion & Chat APIs: supported.md
|
- Supported Completion & Chat APIs: supported.md
|
||||||
- Supported Embedding APIs: supported_embedding.md
|
- Supported Embedding APIs: supported_embedding.md
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "litellm"
|
name = "litellm"
|
||||||
version = "0.1.366"
|
version = "0.1.367"
|
||||||
description = "Library to easily interface with LLM API providers"
|
description = "Library to easily interface with LLM API providers"
|
||||||
authors = ["BerriAI"]
|
authors = ["BerriAI"]
|
||||||
license = "MIT License"
|
license = "MIT License"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue