From 5079909142c11371df707a803bb8cbea9b93d2b1 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Wed, 20 Sep 2023 13:39:19 -0700 Subject: [PATCH] updating model cost map with new models --- litellm/__init__.py | 4 +- model_prices_and_context_window.json | 349 ++++++++++++++++++++++++--- 2 files changed, 324 insertions(+), 29 deletions(-) diff --git a/litellm/__init__.py b/litellm/__init__.py index a4faaf2321..49b8b2668d 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -98,8 +98,8 @@ open_ai_text_completion_models: List = [ "text-curie-001", "text-babbage-001", "text-ada-001", - "text-babbage-002", - "text-davinci-002", + "babbage-002", + "davinci-002", ] cohere_models: List = [ diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index ac6f2678a1..5690ae07c9 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -2,100 +2,395 @@ "gpt-4": { "max_tokens": 8192, "input_cost_per_token": 0.00003, - "output_cost_per_token": 0.00006 + "output_cost_per_token": 0.00006, + "litellm_provider": "openai" + }, + "gpt-4-0314": { + "max_tokens": 8192, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "litellm_provider": "openai" }, "gpt-4-0613": { "max_tokens": 8192, "input_cost_per_token": 0.00003, - "output_cost_per_token": 0.00006 + "output_cost_per_token": 0.00006, + "litellm_provider": "openai" }, "gpt-4-32k": { "max_tokens": 32768, "input_cost_per_token": 0.00006, - "output_cost_per_token": 0.00012 + "output_cost_per_token": 0.00012, + "litellm_provider": "openai" + }, + "gpt-4-32k-0314": { + "max_tokens": 32768, + "input_cost_per_token": 0.00006, + "output_cost_per_token": 0.00012, + "litellm_provider": "openai" + }, + "gpt-4-32k-0613": { + "max_tokens": 32768, + "input_cost_per_token": 0.00006, + "output_cost_per_token": 0.00012, + "litellm_provider": "openai" }, "gpt-3.5-turbo": { "max_tokens": 4097, "input_cost_per_token": 0.0000015, - "output_cost_per_token": 0.000002 - }, - "gpt-3.5-turbo-0613": { - "max_tokens": 4097, - "input_cost_per_token": 0.0000015, - "output_cost_per_token": 0.000002 + "output_cost_per_token": 0.000002, + "litellm_provider": "openai" }, "gpt-3.5-turbo-0301": { "max_tokens": 4097, "input_cost_per_token": 0.0000015, - "output_cost_per_token": 0.000002 + "output_cost_per_token": 0.000002, + "litellm_provider": "openai" + }, + "gpt-3.5-turbo-0613": { + "max_tokens": 4097, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "openai" }, "gpt-3.5-turbo-16k": { "max_tokens": 16385, "input_cost_per_token": 0.000003, - "output_cost_per_token": 0.000004 + "output_cost_per_token": 0.000004, + "litellm_provider": "openai" }, "gpt-3.5-turbo-16k-0613": { "max_tokens": 16385, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004 }, + "text-davinci-003": { + "max_tokens": 4097, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, + "litellm_provider": "text-completion-openai" + }, + "text-curie-001": { + "max_tokens": 2049, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, + "litellm_provider": "text-completion-openai" + }, + "text-babbage-001": { + "max_tokens": 2049, + "input_cost_per_token": 0.0000004, + "output_cost_per_token": 0.0000004, + "litellm_provider": "text-completion-openai" + }, + "text-ada-001": { + "max_tokens": 2049, + "input_cost_per_token": 0.0000004, + "output_cost_per_token": 0.0000004, + "litellm_provider": "text-completion-openai" + }, + "babbage-002": { + "max_tokens": 16384, + "input_cost_per_token": 0.0000004, + "output_cost_per_token": 0.0000004, + "litellm_provider": "text-completion-openai" + }, + "davinci-002": { + "max_tokens": 16384, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, + "litellm_provider": "text-completion-openai" + }, "gpt-3.5-turbo-instruct": { "max_tokens": 8192, "input_cost_per_token": 0.0000015, - "output_cost_per_token": 0.000002 + "output_cost_per_token": 0.000002, + "litellm_provider": "text-completion-openai" }, "claude-instant-1": { "max_tokens": 100000, "input_cost_per_token": 0.00000163, - "output_cost_per_token": 0.00000551 + "output_cost_per_token": 0.00000551, + "litellm_provider": "anthropic" }, "claude-instant-1.2": { "max_tokens": 100000, "input_cost_per_token": 0.00000163, - "output_cost_per_token": 0.00000551 + "output_cost_per_token": 0.00000551, + "litellm_provider": "anthropic" }, "claude-2": { "max_tokens": 100000, "input_cost_per_token": 0.00001102, - "output_cost_per_token": 0.00003268 + "output_cost_per_token": 0.00003268, + "litellm_provider": "anthropic" }, - "text-bison-001": { + "text-bison": { "max_tokens": 8192, - "input_cost_per_token": 0.000004, - "output_cost_per_token": 0.000004 + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-text-models" }, - "chat-bison-001": { + "text-bison@001": { + "max_tokens": 8192, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-text-models" + }, + "chat-bison": { "max_tokens": 4096, - "input_cost_per_token": 0.000002, - "output_cost_per_token": 0.000002 + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-chat-models" + }, + "chat-bison@001": { + "max_tokens": 4096, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-chat-models" + }, + "chat-bison-32k": { + "max_tokens": 32000, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-chat-models" + }, + "code-bison": { + "max_tokens": 6144, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-text-models" + }, + "code-bison@001": { + "max_tokens": 6144, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-text-models" + }, + "code-gecko@001": { + "max_tokens": 2048, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-chat-models" + }, + "code-gecko@latest": { + "max_tokens": 2048, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-chat-models" + }, + "codechat-bison": { + "max_tokens": 6144, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-chat-models" + }, + "codechat-bison@001": { + "max_tokens": 6144, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-chat-models" + }, + "codechat-bison-32k": { + "max_tokens": 32000, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-chat-models" }, "command-nightly": { "max_tokens": 4096, "input_cost_per_token": 0.000015, - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "litellm_provider": "cohere" }, "command": { "max_tokens": 4096, "input_cost_per_token": 0.000015, - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "litellm_provider": "cohere" }, "command-light": { "max_tokens": 4096, "input_cost_per_token": 0.000015, - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "litellm_provider": "cohere" }, "command-medium-beta": { "max_tokens": 4096, "input_cost_per_token": 0.000015, - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "litellm_provider": "cohere" }, "command-xlarge-beta": { "max_tokens": 4096, "input_cost_per_token": 0.000015, - "output_cost_per_token": 0.000015 + "output_cost_per_token": 0.000015, + "litellm_provider": "cohere" }, "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": { - "max_tokens": 4096 + "max_tokens": 4096, + "litellm_provider": "replicate" + }, + "openrouter/openai/gpt-3.5-turbo": { + "max_tokens": 4095, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "openrouter" + }, + "openrouter/openai/gpt-3.5-turbo-16k": { + "max_tokens": 16383, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, + "litellm_provider": "openrouter" + }, + "openrouter/openai/gpt-4": { + "max_tokens": 8192, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "litellm_provider": "openrouter" + }, + "openrouter/anthropic/claude-instant-v1": { + "max_tokens": 100000, + "input_cost_per_token": 0.00000163, + "output_cost_per_token": 0.00000551, + "litellm_provider": "openrouter" + }, + "openrouter/anthropic/claude-2": { + "max_tokens": 100000, + "input_cost_per_token": 0.00001102, + "output_cost_per_token": 0.00003268, + "litellm_provider": "openrouter" + }, + "openrouter/google/palm-2-chat-bison": { + "max_tokens": 8000, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "openrouter" + }, + "openrouter/google/palm-2-codechat-bison": { + "max_tokens": 8000, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "openrouter" + }, + "openrouter/meta-llama/llama-2-13b-chat": { + "max_tokens": 4096, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "openrouter" + }, + "openrouter/meta-llama/llama-2-70b-chat": { + "max_tokens": 4096, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.0000015, + "litellm_provider": "openrouter" + }, + "openrouter/meta-llama/codellama-34b-instruct": { + "max_tokens": 8096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000005, + "litellm_provider": "openrouter" + }, + "openrouter/nousresearch/nous-hermes-llama2-13b": { + "max_tokens": 4096, + "input_cost_per_token": 0.0000002, + "output_cost_per_token": 0.0000002, + "litellm_provider": "openrouter" + }, + "openrouter/mancer/weaver": { + "max_tokens": 8000, + "input_cost_per_token": 0.000005625, + "output_cost_per_token": 0.000005625, + "litellm_provider": "openrouter" + }, + "openrouter/gryphe/mythomax-l2-13b": { + "max_tokens": 8192, + "input_cost_per_token": 0.000001875, + "output_cost_per_token": 0.000001875, + "litellm_provider": "openrouter" + }, + "openrouter/jondurbin/airoboros-l2-70b-2.1": { + "max_tokens": 4096, + "input_cost_per_token": 0.000013875, + "output_cost_per_token": 0.000013875, + "litellm_provider": "openrouter" + }, + "openrouter/undi95/remm-slerp-l2-13b": { + "max_tokens": 6144, + "input_cost_per_token": 0.000001875, + "output_cost_per_token": 0.000001875, + "litellm_provider": "openrouter" + }, + "openrouter/pygmalionai/mythalion-13b": { + "max_tokens": 4096, + "input_cost_per_token": 0.000001875, + "output_cost_per_token": 0.000001875, + "litellm_provider": "openrouter" + }, + "j2-ultra": { + "max_tokens": 8192, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000015, + "litellm_provider": "ai21" + }, + "j2-mid": { + "max_tokens": 8192, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00001, + "litellm_provider": "ai21" + }, + "j2-light": { + "max_tokens": 8192, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000003, + "litellm_provider": "ai21" + }, + "dolphin": { + "max_tokens": 4096, + "input_cost_per_token": 0.00002, + "output_cost_per_token": 0.00002, + "litellm_provider": "nlp_cloud" + }, + "chatdolphin": { + "max_tokens": 4096, + "input_cost_per_token": 0.00002, + "output_cost_per_token": 0.00002, + "litellm_provider": "nlp_cloud" + }, + "luminous-base": { + "max_tokens": 2048, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.000033, + "litellm_provider": "aleph_alpha" + }, + "luminous-base-control": { + "max_tokens": 2048, + "input_cost_per_token": 0.0000375, + "output_cost_per_token": 0.00004125, + "litellm_provider": "aleph_alpha" + }, + "luminous-extended": { + "max_tokens": 2048, + "input_cost_per_token": 0.000045, + "output_cost_per_token": 0.0000495, + "litellm_provider": "aleph_alpha" + }, + "luminous-extended-control": { + "max_tokens": 2048, + "input_cost_per_token": 0.00005625, + "output_cost_per_token": 0.000061875, + "litellm_provider": "aleph_alpha" + }, + "luminous-supreme": { + "max_tokens": 2048, + "input_cost_per_token": 0.000175, + "output_cost_per_token": 0.0001925, + "litellm_provider": "aleph_alpha" + }, + "luminous-supreme-control": { + "max_tokens": 2048, + "input_cost_per_token": 0.00021875, + "output_cost_per_token": 0.000240625, + "litellm_provider": "aleph_alpha" }, "together-ai-up-to-3b": { "input_cost_per_token": 0.0000001,