diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py index 3828ef7997..d759484a6b 100644 --- a/litellm/tests/test_completion_cost.py +++ b/litellm/tests/test_completion_cost.py @@ -1,22 +1,38 @@ -# import sys, os -# import traceback -# from dotenv import load_dotenv +import sys, os +import traceback +from dotenv import load_dotenv -# load_dotenv() -# import os +load_dotenv() +import os -# sys.path.insert( -# 0, os.path.abspath("../..") -# ) # Adds the parent directory to the system path -# import pytest -# import litellm -# from litellm import embedding, completion, text_completion -# from litellm.utils import completion_cost +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import pytest +import litellm +from litellm import embedding, completion, text_completion +from litellm.utils import completion_cost -# print(completion_cost( -# model="togethercomputer/llama-2-2b-chat", -# prompt="gm", -# completion="hello" -# )) \ No newline at end of file +user_message = "Write a short poem about the sky" +messages = [{"content": user_message, "role": "user"}] + + +def test_completion_togetherai_cost(): + try: + response = completion( + model="together_ai/togethercomputer/llama-2-70b-chat", + messages=messages, + request_timeout=200, + ) + # Add any assertions here to check the response + print(response) + print("Completion Cost:") + cost = completion_cost(completion_response=response) + formatted_string = f"${float(cost):.10f}" + print(formatted_string) + + except Exception as e: + pytest.fail(f"Error occurred: {e}") +# test_completion_togetherai_cost() \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index 782d0ac4a9..48e61b184c 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -563,7 +563,6 @@ def client(original_function): ): # make it easy to get to the debugger logs if you've initialized it e.message += f"\n Check the log in your dashboard - {liteDebuggerClient.dashboard_url}" raise e - return wrapper @@ -571,7 +570,7 @@ def client(original_function): # Extract the number of billion parameters from the model name -# together_compute +# only used for together_computer LLMs def get_model_params_and_category(model_name): import re params_match = re.search(r'(\d+b)', model_name) # catch all decimals like 3b, 70b, etc @@ -647,15 +646,23 @@ def cost_per_token(model="gpt-3.5-turbo", prompt_tokens=0, completion_tokens=0): def completion_cost( model="gpt-3.5-turbo", prompt="", - completion="" + completion="", + completion_response=None ): - prompt_tokens = token_counter(model=model, text=prompt) - completion_tokens = token_counter(model=model, text=completion) + prompt_tokens = 0 + completion_tokens = 0 + if completion_response != None: + # get input/output tokens from completion_response + prompt_tokens = completion_response['usage']['prompt_tokens'] + completion_tokens = completion_response['usage']['completion_tokens'] + model = completion_response['model'] # get model from completion_response + else: + prompt_tokens = token_counter(model=model, text=prompt) + completion_tokens = token_counter(model=model, text=completion) if "togethercomputer" in model: # together ai prices based on size of llm # get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json model = get_model_params_and_category(model) - # print(together_catgeory) prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token( model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens )