mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
show pricing for tg ai completion
This commit is contained in:
parent
04f8b20651
commit
1ba6b6761b
2 changed files with 46 additions and 23 deletions
|
@ -1,22 +1,38 @@
|
||||||
|
|
||||||
# import sys, os
|
import sys, os
|
||||||
# import traceback
|
import traceback
|
||||||
# from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
# load_dotenv()
|
load_dotenv()
|
||||||
# import os
|
import os
|
||||||
|
|
||||||
# sys.path.insert(
|
sys.path.insert(
|
||||||
# 0, os.path.abspath("../..")
|
0, os.path.abspath("../..")
|
||||||
# ) # Adds the parent directory to the system path
|
) # Adds the parent directory to the system path
|
||||||
# import pytest
|
import pytest
|
||||||
# import litellm
|
import litellm
|
||||||
# from litellm import embedding, completion, text_completion
|
from litellm import embedding, completion, text_completion
|
||||||
# from litellm.utils import completion_cost
|
from litellm.utils import completion_cost
|
||||||
|
|
||||||
|
|
||||||
# print(completion_cost(
|
user_message = "Write a short poem about the sky"
|
||||||
# model="togethercomputer/llama-2-2b-chat",
|
messages = [{"content": user_message, "role": "user"}]
|
||||||
# prompt="gm",
|
|
||||||
# completion="hello"
|
|
||||||
# ))
|
def test_completion_togetherai_cost():
|
||||||
|
try:
|
||||||
|
response = completion(
|
||||||
|
model="together_ai/togethercomputer/llama-2-70b-chat",
|
||||||
|
messages=messages,
|
||||||
|
request_timeout=200,
|
||||||
|
)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
print(response)
|
||||||
|
print("Completion Cost:")
|
||||||
|
cost = completion_cost(completion_response=response)
|
||||||
|
formatted_string = f"${float(cost):.10f}"
|
||||||
|
print(formatted_string)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
# test_completion_togetherai_cost()
|
|
@ -563,7 +563,6 @@ def client(original_function):
|
||||||
): # make it easy to get to the debugger logs if you've initialized it
|
): # make it easy to get to the debugger logs if you've initialized it
|
||||||
e.message += f"\n Check the log in your dashboard - {liteDebuggerClient.dashboard_url}"
|
e.message += f"\n Check the log in your dashboard - {liteDebuggerClient.dashboard_url}"
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
@ -571,7 +570,7 @@ def client(original_function):
|
||||||
|
|
||||||
|
|
||||||
# Extract the number of billion parameters from the model name
|
# Extract the number of billion parameters from the model name
|
||||||
# together_compute
|
# only used for together_computer LLMs
|
||||||
def get_model_params_and_category(model_name):
|
def get_model_params_and_category(model_name):
|
||||||
import re
|
import re
|
||||||
params_match = re.search(r'(\d+b)', model_name) # catch all decimals like 3b, 70b, etc
|
params_match = re.search(r'(\d+b)', model_name) # catch all decimals like 3b, 70b, etc
|
||||||
|
@ -647,15 +646,23 @@ def cost_per_token(model="gpt-3.5-turbo", prompt_tokens=0, completion_tokens=0):
|
||||||
def completion_cost(
|
def completion_cost(
|
||||||
model="gpt-3.5-turbo",
|
model="gpt-3.5-turbo",
|
||||||
prompt="",
|
prompt="",
|
||||||
completion=""
|
completion="",
|
||||||
|
completion_response=None
|
||||||
):
|
):
|
||||||
prompt_tokens = token_counter(model=model, text=prompt)
|
prompt_tokens = 0
|
||||||
completion_tokens = token_counter(model=model, text=completion)
|
completion_tokens = 0
|
||||||
|
if completion_response != None:
|
||||||
|
# get input/output tokens from completion_response
|
||||||
|
prompt_tokens = completion_response['usage']['prompt_tokens']
|
||||||
|
completion_tokens = completion_response['usage']['completion_tokens']
|
||||||
|
model = completion_response['model'] # get model from completion_response
|
||||||
|
else:
|
||||||
|
prompt_tokens = token_counter(model=model, text=prompt)
|
||||||
|
completion_tokens = token_counter(model=model, text=completion)
|
||||||
if "togethercomputer" in model:
|
if "togethercomputer" in model:
|
||||||
# together ai prices based on size of llm
|
# together ai prices based on size of llm
|
||||||
# get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json
|
# get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json
|
||||||
model = get_model_params_and_category(model)
|
model = get_model_params_and_category(model)
|
||||||
# print(together_catgeory)
|
|
||||||
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(
|
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(
|
||||||
model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens
|
model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens
|
||||||
)
|
)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue