From bab27634a842ec38f0078d159ffb783facd9eb2d Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Wed, 6 Sep 2023 16:28:17 -0700 Subject: [PATCH] rename max_tokens.json --- docs/my-website/docs/token_usage.md | 2 +- docs/my-website/src/pages/token_usage.md | 2 +- litellm/__init__.py | 2 +- .../model_prices_and_context_window.json | 0 litellm/tests/test_completion_cost.py | 2 +- litellm/utils.py | 6 ++++-- 6 files changed, 8 insertions(+), 6 deletions(-) rename cookbook/community-resources/max_tokens.json => litellm/model_prices_and_context_window.json (100%) diff --git a/docs/my-website/docs/token_usage.md b/docs/my-website/docs/token_usage.md index 5bf2fbd3df..b7fc0b083d 100644 --- a/docs/my-website/docs/token_usage.md +++ b/docs/my-website/docs/token_usage.md @@ -5,7 +5,7 @@ However, we also expose 3 public helper functions to calculate token usage acros - `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available. -- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/cookbook/community-resources/max_tokens.json). +- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json). - `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output). diff --git a/docs/my-website/src/pages/token_usage.md b/docs/my-website/src/pages/token_usage.md index 5bf2fbd3df..b7fc0b083d 100644 --- a/docs/my-website/src/pages/token_usage.md +++ b/docs/my-website/src/pages/token_usage.md @@ -5,7 +5,7 @@ However, we also expose 3 public helper functions to calculate token usage acros - `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available. -- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/cookbook/community-resources/max_tokens.json). +- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json). - `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output). diff --git a/litellm/__init__.py b/litellm/__init__.py index c8e87c2431..31b167561f 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -36,7 +36,7 @@ caching_with_models = False # if you want the caching key to be model + prompt cache: Optional[Cache] = None # cache object model_alias_map: Dict[str, str] = {} def get_model_cost_map(): - url = "https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json" + url = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json" try: response = requests.get(url) diff --git a/cookbook/community-resources/max_tokens.json b/litellm/model_prices_and_context_window.json similarity index 100% rename from cookbook/community-resources/max_tokens.json rename to litellm/model_prices_and_context_window.json diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py index aea3acba52..3828ef7997 100644 --- a/litellm/tests/test_completion_cost.py +++ b/litellm/tests/test_completion_cost.py @@ -16,7 +16,7 @@ # print(completion_cost( -# model="togethercomputer/llama-2-70b-chat", +# model="togethercomputer/llama-2-2b-chat", # prompt="gm", # completion="hello" # )) \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 31def38865..782d0ac4a9 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -652,7 +652,9 @@ def completion_cost( prompt_tokens = token_counter(model=model, text=prompt) completion_tokens = token_counter(model=model, text=completion) if "togethercomputer" in model: - together_catgeory = get_model_params_and_category(model) + # together ai prices based on size of llm + # get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json + model = get_model_params_and_category(model) # print(together_catgeory) prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token( model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens @@ -875,7 +877,7 @@ def get_max_tokens(model: str): try: return litellm.model_cost[model] except: - raise Exception("This model isn't mapped yet. Add it here - https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json") + raise Exception("This model isn't mapped yet. Add it here - https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json") def load_test_model(