show pricing for tg ai completion

2025-04-25 10:44:24 +00:00 · 2023-09-06 17:10:49 -07:00 · 2023-09-06 17:10:49 -07:00 · 1ba6b6761b
commit 1ba6b6761b
parent 04f8b20651
2 changed files with 46 additions and 23 deletions
--- a/litellm/tests/test_completion_cost.py
+++ b/litellm/tests/test_completion_cost.py
@ -1,22 +1,38 @@
-# import sys, os
+import sys, os
-# import traceback
+import traceback
-# from dotenv import load_dotenv
+from dotenv import load_dotenv
-# load_dotenv()
+load_dotenv()
-# import os
+import os
-# sys.path.insert(
+sys.path.insert(
-#     0, os.path.abspath("../..")
+    0, os.path.abspath("../..")
-# )  # Adds the parent directory to the system path
+)  # Adds the parent directory to the system path
-# import pytest
+import pytest
-# import litellm
+import litellm
-# from litellm import embedding, completion, text_completion
+from litellm import embedding, completion, text_completion
-# from litellm.utils import completion_cost
+from litellm.utils import completion_cost
-# print(completion_cost(
+user_message = "Write a short poem about the sky"
-#         model="togethercomputer/llama-2-2b-chat", 
+messages = [{"content": user_message, "role": "user"}]
-#         prompt="gm", 
+
-#         completion="hello"
+
-#     ))
+def test_completion_togetherai_cost():
    try:
        response = completion(
            model="together_ai/togethercomputer/llama-2-70b-chat",
            messages=messages,
            request_timeout=200,
        )
        # Add any assertions here to check the response
        print(response)
        print("Completion Cost:")
        cost = completion_cost(completion_response=response)
        formatted_string = f"${float(cost):.10f}"
        print(formatted_string)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 # test_completion_togetherai_cost()
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -563,7 +563,6 @@ def client(original_function):
                ):  # make it easy to get to the debugger logs if you've initialized it
                    e.message += f"\n Check the log in your dashboard - {liteDebuggerClient.dashboard_url}"
            raise e
    return wrapper
@ -571,7 +570,7 @@ def client(original_function):
 # Extract the number of billion parameters from the model name
-# together_compute
+# only used for together_computer LLMs
 def get_model_params_and_category(model_name):
    import re
    params_match = re.search(r'(\d+b)', model_name) # catch all decimals like 3b, 70b, etc    
@ -647,15 +646,23 @@ def cost_per_token(model="gpt-3.5-turbo", prompt_tokens=0, completion_tokens=0):
 def completion_cost(
        model="gpt-3.5-turbo", 
        prompt="", 
-        completion=""
+        completion="",
        completion_response=None
    ):
    prompt_tokens = 0
    completion_tokens = 0
    if completion_response != None:
        # get input/output tokens from completion_response
        prompt_tokens = completion_response['usage']['prompt_tokens']
        completion_tokens = completion_response['usage']['completion_tokens']
        model = completion_response['model'] # get model from completion_response
    else:
        prompt_tokens = token_counter(model=model, text=prompt)
        completion_tokens = token_counter(model=model, text=completion)
    if "togethercomputer" in model:
        # together ai prices based on size of llm
        # get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json 
        model = get_model_params_and_category(model)
        # print(together_catgeory)
    prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(
        model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens
    )