From bab27634a842ec38f0078d159ffb783facd9eb2d Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Wed, 6 Sep 2023 16:28:17 -0700
Subject: [PATCH] rename max_tokens.json

---
 docs/my-website/docs/token_usage.md                         | 2 +-
 docs/my-website/src/pages/token_usage.md                    | 2 +-
 litellm/__init__.py                                         | 2 +-
 .../model_prices_and_context_window.json                    | 0
 litellm/tests/test_completion_cost.py                       | 2 +-
 litellm/utils.py                                            | 6 ++++--
 6 files changed, 8 insertions(+), 6 deletions(-)
 rename cookbook/community-resources/max_tokens.json => litellm/model_prices_and_context_window.json (100%)

diff --git a/docs/my-website/docs/token_usage.md b/docs/my-website/docs/token_usage.md
index 5bf2fbd3df..b7fc0b083d 100644
--- a/docs/my-website/docs/token_usage.md
+++ b/docs/my-website/docs/token_usage.md
@@ -5,7 +5,7 @@ However, we also expose 3 public helper functions to calculate token usage acros
 
 - `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available. 
 
-- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/cookbook/community-resources/max_tokens.json).
+- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json).
 
 - `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output). 
 
diff --git a/docs/my-website/src/pages/token_usage.md b/docs/my-website/src/pages/token_usage.md
index 5bf2fbd3df..b7fc0b083d 100644
--- a/docs/my-website/src/pages/token_usage.md
+++ b/docs/my-website/src/pages/token_usage.md
@@ -5,7 +5,7 @@ However, we also expose 3 public helper functions to calculate token usage acros
 
 - `token_counter`: This returns the number of tokens for a given input - it uses the tokenizer based on the model, and defaults to tiktoken if no model-specific tokenizer is available. 
 
-- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/cookbook/community-resources/max_tokens.json).
+- `cost_per_token`: This returns the cost (in USD) for prompt (input) and completion (output) tokens. It utilizes our model_cost map which can be found in `__init__.py` and also as a [community resource](https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json).
 
 - `completion_cost`: This returns the overall cost (in USD) for a given LLM API Call. It combines `token_counter` and `cost_per_token` to return the cost for that query (counting both cost of input and output). 
 
diff --git a/litellm/__init__.py b/litellm/__init__.py
index c8e87c2431..31b167561f 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -36,7 +36,7 @@ caching_with_models = False  # if you want the caching key to be model + prompt
 cache: Optional[Cache] = None # cache object
 model_alias_map: Dict[str, str] = {}
 def get_model_cost_map():
-    url = "https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json"
+    url = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
     
     try:
         response = requests.get(url)
diff --git a/cookbook/community-resources/max_tokens.json b/litellm/model_prices_and_context_window.json
similarity index 100%
rename from cookbook/community-resources/max_tokens.json
rename to litellm/model_prices_and_context_window.json
diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py
index aea3acba52..3828ef7997 100644
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@@ -16,7 +16,7 @@
 
 
 # print(completion_cost(
-#         model="togethercomputer/llama-2-70b-chat", 
+#         model="togethercomputer/llama-2-2b-chat", 
 #         prompt="gm", 
 #         completion="hello"
 #     ))
\ No newline at end of file
diff --git a/litellm/utils.py b/litellm/utils.py
index 31def38865..782d0ac4a9 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -652,7 +652,9 @@ def completion_cost(
     prompt_tokens = token_counter(model=model, text=prompt)
     completion_tokens = token_counter(model=model, text=completion)
     if "togethercomputer" in model:
-        together_catgeory = get_model_params_and_category(model)
+        # together ai prices based on size of llm
+        # get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json 
+        model = get_model_params_and_category(model)
         # print(together_catgeory)
     prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(
         model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens
@@ -875,7 +877,7 @@ def get_max_tokens(model: str):
     try:
         return litellm.model_cost[model]
     except:
-        raise Exception("This model isn't mapped yet. Add it here - https://raw.githubusercontent.com/BerriAI/litellm/main/cookbook/community-resources/max_tokens.json")
+        raise Exception("This model isn't mapped yet. Add it here - https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json")
     
 
 def load_test_model(