rename max_tokens.json

This commit is contained in:
ishaan-jaff 2023-09-06 16:28:17 -07:00
parent fbd67bc24c
commit bab27634a8
6 changed files with 8 additions and 6 deletions

View file

@ -5,7 +5,7 @@ However, we also expose 3 public helper functions to calculate token usage acros
- `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available. - `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available.
- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/cookbook/community-resources/max_tokens.json). - `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json).
- `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output). - `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output).

View file

@ -5,7 +5,7 @@ However, we also expose 3 public helper functions to calculate token usage acros
- `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available. - `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available.
- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/cookbook/community-resources/max_tokens.json). - `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json).
- `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output). - `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output).

View file

@ -36,7 +36,7 @@ caching_with_models = False # if you want the caching key to be model + prompt
cache: Optional[Cache] = None # cache object cache: Optional[Cache] = None # cache object
model_alias_map: Dict[str, str] = {} model_alias_map: Dict[str, str] = {}
def get_model_cost_map(): def get_model_cost_map():
url = "https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json" url = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
try: try:
response = requests.get(url) response = requests.get(url)

View file

@ -16,7 +16,7 @@
# print(completion_cost( # print(completion_cost(
# model="togethercomputer/llama-2-70b-chat", # model="togethercomputer/llama-2-2b-chat",
# prompt="gm", # prompt="gm",
# completion="hello" # completion="hello"
# )) # ))

View file

@ -652,7 +652,9 @@ def completion_cost(
prompt_tokens = token_counter(model=model, text=prompt) prompt_tokens = token_counter(model=model, text=prompt)
completion_tokens = token_counter(model=model, text=completion) completion_tokens = token_counter(model=model, text=completion)
if "togethercomputer" in model: if "togethercomputer" in model:
together_catgeory = get_model_params_and_category(model) # together ai prices based on size of llm
# get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json
model = get_model_params_and_category(model)
# print(together_catgeory) # print(together_catgeory)
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token( prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(
model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens
@ -875,7 +877,7 @@ def get_max_tokens(model: str):
try: try:
return litellm.model_cost[model] return litellm.model_cost[model]
except: except:
raise Exception("This model isn't mapped yet. Add it here - https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json") raise Exception("This model isn't mapped yet. Add it here - https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json")
def load_test_model( def load_test_model(