mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
show pricing for tg ai completion
This commit is contained in:
parent
04f8b20651
commit
1ba6b6761b
2 changed files with 46 additions and 23 deletions
|
@ -1,22 +1,38 @@
|
|||
|
||||
# import sys, os
|
||||
# import traceback
|
||||
# from dotenv import load_dotenv
|
||||
import sys, os
|
||||
import traceback
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# load_dotenv()
|
||||
# import os
|
||||
load_dotenv()
|
||||
import os
|
||||
|
||||
# sys.path.insert(
|
||||
# 0, os.path.abspath("../..")
|
||||
# ) # Adds the parent directory to the system path
|
||||
# import pytest
|
||||
# import litellm
|
||||
# from litellm import embedding, completion, text_completion
|
||||
# from litellm.utils import completion_cost
|
||||
sys.path.insert(
|
||||
0, os.path.abspath("../..")
|
||||
) # Adds the parent directory to the system path
|
||||
import pytest
|
||||
import litellm
|
||||
from litellm import embedding, completion, text_completion
|
||||
from litellm.utils import completion_cost
|
||||
|
||||
|
||||
# print(completion_cost(
|
||||
# model="togethercomputer/llama-2-2b-chat",
|
||||
# prompt="gm",
|
||||
# completion="hello"
|
||||
# ))
|
||||
user_message = "Write a short poem about the sky"
|
||||
messages = [{"content": user_message, "role": "user"}]
|
||||
|
||||
|
||||
def test_completion_togetherai_cost():
|
||||
try:
|
||||
response = completion(
|
||||
model="together_ai/togethercomputer/llama-2-70b-chat",
|
||||
messages=messages,
|
||||
request_timeout=200,
|
||||
)
|
||||
# Add any assertions here to check the response
|
||||
print(response)
|
||||
print("Completion Cost:")
|
||||
cost = completion_cost(completion_response=response)
|
||||
formatted_string = f"${float(cost):.10f}"
|
||||
print(formatted_string)
|
||||
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
# test_completion_togetherai_cost()
|
|
@ -563,7 +563,6 @@ def client(original_function):
|
|||
): # make it easy to get to the debugger logs if you've initialized it
|
||||
e.message += f"\n Check the log in your dashboard - {liteDebuggerClient.dashboard_url}"
|
||||
raise e
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
|
@ -571,7 +570,7 @@ def client(original_function):
|
|||
|
||||
|
||||
# Extract the number of billion parameters from the model name
|
||||
# together_compute
|
||||
# only used for together_computer LLMs
|
||||
def get_model_params_and_category(model_name):
|
||||
import re
|
||||
params_match = re.search(r'(\d+b)', model_name) # catch all decimals like 3b, 70b, etc
|
||||
|
@ -647,15 +646,23 @@ def cost_per_token(model="gpt-3.5-turbo", prompt_tokens=0, completion_tokens=0):
|
|||
def completion_cost(
|
||||
model="gpt-3.5-turbo",
|
||||
prompt="",
|
||||
completion=""
|
||||
completion="",
|
||||
completion_response=None
|
||||
):
|
||||
prompt_tokens = token_counter(model=model, text=prompt)
|
||||
completion_tokens = token_counter(model=model, text=completion)
|
||||
prompt_tokens = 0
|
||||
completion_tokens = 0
|
||||
if completion_response != None:
|
||||
# get input/output tokens from completion_response
|
||||
prompt_tokens = completion_response['usage']['prompt_tokens']
|
||||
completion_tokens = completion_response['usage']['completion_tokens']
|
||||
model = completion_response['model'] # get model from completion_response
|
||||
else:
|
||||
prompt_tokens = token_counter(model=model, text=prompt)
|
||||
completion_tokens = token_counter(model=model, text=completion)
|
||||
if "togethercomputer" in model:
|
||||
# together ai prices based on size of llm
|
||||
# get_model_params_and_category takes a model name and returns the category of LLM size it is in model_prices_and_context_window.json
|
||||
model = get_model_params_and_category(model)
|
||||
# print(together_catgeory)
|
||||
prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = cost_per_token(
|
||||
model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue