diff --git a/litellm/__init__.py b/litellm/__init__.py index 01559e3d18..4c18d0e63c 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -10,8 +10,25 @@ azure_key = None anthropic_key = None replicate_key = None cohere_key = None - hugging_api_token = None + +model_cost = { + "gpt-3.5-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, + "gpt-35-turbo": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, # azure model name + "gpt-3.5-turbo-0613": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, + "gpt-3.5-turbo-0301": {"max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002}, + "gpt-3.5-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, + "gpt-35-turbo-16k": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, # azure model name + "gpt-3.5-turbo-16k-0613": {"max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004}, + "gpt-4": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, + "gpt-4-0613": {"max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006}, + "gpt-4-32k": {"max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012}, + "claude-instant-1": {"max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551}, + "claude-2": {"max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268}, + "text-bison-001": {"max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004}, + "chat-bison-001": {"max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002}, + "command-nightly": {"max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015}, +} ####### THREAD-SPECIFIC DATA ################### class MyLocal(threading.local): def __init__(self): diff --git a/litellm/tests.txt b/litellm/tests.txt deleted file mode 100644 index 4f67a836c5..0000000000 --- a/litellm/tests.txt +++ /dev/null @@ -1 +0,0 @@ -test 1 \ No newline at end of file diff --git a/litellm/utils.py b/litellm/utils.py index c92440dce9..b47e082712 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -131,6 +131,46 @@ def client(original_function): raise e return wrapper +####### USAGE CALCULATOR ################ + +def prompt_token_calculator(model, messages): + # use tiktoken or anthropic's tokenizer depending on the model + text = " ".join(message["content"] for message in messages) + num_tokens = 0 + if "claude" in model: + install_and_import('anthropic') + from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT + anthropic = Anthropic() + num_tokens = anthropic.count_tokens(text) + else: + num_tokens = len(encoding.encode(text)) + return num_tokens + + +def cost_per_token(model="gpt-3.5-turbo", prompt_tokens = 0, completion_tokens = 0): + ## given + prompt_tokens_cost_usd_dollar = 0 + completion_tokens_cost_usd_dollar = 0 + model_cost_ref = litellm.model_cost + if model in model_cost_ref: + prompt_tokens_cost_usd_dollar = model_cost_ref[model]["input_cost_per_token"] * prompt_tokens + completion_tokens_cost_usd_dollar = model_cost_ref[model]["output_cost_per_token"] * completion_tokens + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + else: + # calculate average input cost + input_cost_sum = 0 + output_cost_sum = 0 + model_cost_ref = litellm.model_cost + for model in model_cost_ref: + input_cost_sum += model_cost_ref[model]["input_cost_per_token"] + output_cost_sum += model_cost_ref[model]["output_cost_per_token"] + avg_input_cost = input_cost_sum / len(model_cost_ref.keys()) + avg_output_cost = output_cost_sum / len(model_cost_ref.keys()) + prompt_tokens_cost_usd_dollar = avg_input_cost * prompt_tokens + completion_tokens_cost_usd_dollar = avg_output_cost * completion_tokens + return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar + + ####### HELPER FUNCTIONS ################ def get_optional_params( # 12 optional params @@ -367,21 +407,6 @@ def handle_failure(exception, traceback_exception, start_time, end_time, args, k logging(logger_fn=user_logger_fn, exception=e) pass -def prompt_token_calculator(model, messages): - # use tiktoken or anthropic's tokenizer depending on the model - text = " ".join(message["content"] for message in messages) - num_tokens = 0 - if "claude" in model: - install_and_import('anthropic') - from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT - anthropic = Anthropic() - num_tokens = anthropic.count_tokens(text) - else: - num_tokens = len(encoding.encode(text)) - return num_tokens - - - def handle_success(args, kwargs, result, start_time, end_time): global heliconeLogger, aispendLogger try: diff --git a/pyproject.toml b/pyproject.toml index 87d67d4fa7..dc608b8411 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "0.1.365" +version = "0.1.366" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT License"