(fix) update back model prices with latest llms

This commit is contained in:
ishaan-jaff 2023-12-11 10:56:01 -08:00
parent 04f6b976e9
commit 0522ffc4b7

View file

@ -41,6 +41,20 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat"
}, },
"gpt-4-1106-preview": {
"max_tokens": 128000,
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "openai",
"mode": "chat"
},
"gpt-4-vision-preview": {
"max_tokens": 128000,
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "openai",
"mode": "chat"
},
"gpt-3.5-turbo": { "gpt-3.5-turbo": {
"max_tokens": 4097, "max_tokens": 4097,
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
@ -62,6 +76,13 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat"
}, },
"gpt-3.5-turbo-1106": {
"max_tokens": 16385,
"input_cost_per_token": 0.0000010,
"output_cost_per_token": 0.0000020,
"litellm_provider": "openai",
"mode": "chat"
},
"gpt-3.5-turbo-16k": { "gpt-3.5-turbo-16k": {
"max_tokens": 16385, "max_tokens": 16385,
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
@ -76,6 +97,62 @@
"litellm_provider": "openai", "litellm_provider": "openai",
"mode": "chat" "mode": "chat"
}, },
"ft:gpt-3.5-turbo": {
"max_tokens": 4097,
"input_cost_per_token": 0.000012,
"output_cost_per_token": 0.000016,
"litellm_provider": "openai",
"mode": "chat"
},
"text-embedding-ada-002": {
"max_tokens": 8191,
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.000000,
"litellm_provider": "openai",
"mode": "embedding"
},
"azure/gpt-4-1106-preview": {
"max_tokens": 128000,
"input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003,
"litellm_provider": "azure",
"mode": "chat"
},
"azure/gpt-4-32k": {
"max_tokens": 8192,
"input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012,
"litellm_provider": "azure",
"mode": "chat"
},
"azure/gpt-4": {
"max_tokens": 16385,
"input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006,
"litellm_provider": "azure",
"mode": "chat"
},
"azure/gpt-3.5-turbo-16k": {
"max_tokens": 16385,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004,
"litellm_provider": "azure",
"mode": "chat"
},
"azure/gpt-3.5-turbo": {
"max_tokens": 4097,
"input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002,
"litellm_provider": "azure",
"mode": "chat"
},
"azure/text-embedding-ada-002": {
"max_tokens": 8191,
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.000000,
"litellm_provider": "azure",
"mode": "embedding"
},
"text-davinci-003": { "text-davinci-003": {
"max_tokens": 4097, "max_tokens": 4097,
"input_cost_per_token": 0.000002, "input_cost_per_token": 0.000002,
@ -127,6 +204,7 @@
}, },
"claude-instant-1": { "claude-instant-1": {
"max_tokens": 100000, "max_tokens": 100000,
"max_output_tokens": 8191,
"input_cost_per_token": 0.00000163, "input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551, "output_cost_per_token": 0.00000551,
"litellm_provider": "anthropic", "litellm_provider": "anthropic",
@ -134,15 +212,25 @@
}, },
"claude-instant-1.2": { "claude-instant-1.2": {
"max_tokens": 100000, "max_tokens": 100000,
"input_cost_per_token": 0.00000163, "max_output_tokens": 8191,
"output_cost_per_token": 0.00000551, "input_cost_per_token": 0.000000163,
"output_cost_per_token": 0.000000551,
"litellm_provider": "anthropic", "litellm_provider": "anthropic",
"mode": "chat" "mode": "chat"
}, },
"claude-2": { "claude-2": {
"max_tokens": 100000, "max_tokens": 100000,
"input_cost_per_token": 0.00001102, "max_output_tokens": 8191,
"output_cost_per_token": 0.00003268, "input_cost_per_token": 0.000008,
"output_cost_per_token": 0.000024,
"litellm_provider": "anthropic",
"mode": "chat"
},
"claude-2.1": {
"max_tokens": 200000,
"max_output_tokens": 8191,
"input_cost_per_token": 0.000008,
"output_cost_per_token": 0.000024,
"litellm_provider": "anthropic", "litellm_provider": "anthropic",
"mode": "chat" "mode": "chat"
}, },
@ -227,9 +315,51 @@
"max_tokens": 32000, "max_tokens": 32000,
"input_cost_per_token": 0.000000125, "input_cost_per_token": 0.000000125,
"output_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125,
"litellm_provider": "vertex_ai-chat-models", "litellm_provider": "vertex_ai-code-chat-models",
"mode": "chat" "mode": "chat"
}, },
"palm/chat-bison": {
"max_tokens": 4096,
"input_cost_per_token": 0.000000125,
"output_cost_per_token": 0.000000125,
"litellm_provider": "palm",
"mode": "chat"
},
"palm/chat-bison-001": {
"max_tokens": 4096,
"input_cost_per_token": 0.000000125,
"output_cost_per_token": 0.000000125,
"litellm_provider": "palm",
"mode": "chat"
},
"palm/text-bison": {
"max_tokens": 8196,
"input_cost_per_token": 0.000000125,
"output_cost_per_token": 0.000000125,
"litellm_provider": "palm",
"mode": "completion"
},
"palm/text-bison-001": {
"max_tokens": 8196,
"input_cost_per_token": 0.000000125,
"output_cost_per_token": 0.000000125,
"litellm_provider": "palm",
"mode": "completion"
},
"palm/text-bison-safety-off": {
"max_tokens": 8196,
"input_cost_per_token": 0.000000125,
"output_cost_per_token": 0.000000125,
"litellm_provider": "palm",
"mode": "completion"
},
"palm/text-bison-safety-recitation-off": {
"max_tokens": 8196,
"input_cost_per_token": 0.000000125,
"output_cost_per_token": 0.000000125,
"litellm_provider": "palm",
"mode": "completion"
},
"command-nightly": { "command-nightly": {
"max_tokens": 4096, "max_tokens": 4096,
"input_cost_per_token": 0.000015, "input_cost_per_token": 0.000015,
@ -267,6 +397,8 @@
}, },
"replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": { "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1": {
"max_tokens": 4096, "max_tokens": 4096,
"input_cost_per_token": 0.0000,
"output_cost_per_token": 0.0000,
"litellm_provider": "replicate", "litellm_provider": "replicate",
"mode": "chat" "mode": "chat"
}, },
@ -293,6 +425,7 @@
}, },
"openrouter/anthropic/claude-instant-v1": { "openrouter/anthropic/claude-instant-v1": {
"max_tokens": 100000, "max_tokens": 100000,
"max_output_tokens": 8191,
"input_cost_per_token": 0.00000163, "input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551, "output_cost_per_token": 0.00000551,
"litellm_provider": "openrouter", "litellm_provider": "openrouter",
@ -300,6 +433,7 @@
}, },
"openrouter/anthropic/claude-2": { "openrouter/anthropic/claude-2": {
"max_tokens": 100000, "max_tokens": 100000,
"max_output_tokens": 8191,
"input_cost_per_token": 0.00001102, "input_cost_per_token": 0.00001102,
"output_cost_per_token": 0.00003268, "output_cost_per_token": 0.00003268,
"litellm_provider": "openrouter", "litellm_provider": "openrouter",
@ -496,20 +630,31 @@
}, },
"anthropic.claude-v1": { "anthropic.claude-v1": {
"max_tokens": 100000, "max_tokens": 100000,
"input_cost_per_token": 0.00001102, "max_output_tokens": 8191,
"output_cost_per_token": 0.00003268, "input_cost_per_token": 0.000008,
"output_cost_per_token": 0.000024,
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
"mode": "chat" "mode": "chat"
}, },
"anthropic.claude-v2": { "anthropic.claude-v2": {
"max_tokens": 100000, "max_tokens": 100000,
"input_cost_per_token": 0.00001102, "max_output_tokens": 8191,
"output_cost_per_token": 0.00003268, "input_cost_per_token": 0.000008,
"output_cost_per_token": 0.000024,
"litellm_provider": "bedrock",
"mode": "chat"
},
"anthropic.claude-v2:1": {
"max_tokens": 200000,
"max_output_tokens": 8191,
"input_cost_per_token": 0.000008,
"output_cost_per_token": 0.000024,
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
"mode": "chat" "mode": "chat"
}, },
"anthropic.claude-instant-v1": { "anthropic.claude-instant-v1": {
"max_tokens": 100000, "max_tokens": 100000,
"max_output_tokens": 8191,
"input_cost_per_token": 0.00000163, "input_cost_per_token": 0.00000163,
"output_cost_per_token": 0.00000551, "output_cost_per_token": 0.00000551,
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
@ -529,26 +674,80 @@
"litellm_provider": "bedrock", "litellm_provider": "bedrock",
"mode": "chat" "mode": "chat"
}, },
"meta.llama2-70b-chat-v1": {
"max_tokens": 4096,
"input_cost_per_token": 0.00000195,
"output_cost_per_token": 0.00000256,
"litellm_provider": "bedrock",
"mode": "chat"
},
"sagemaker/meta-textgeneration-llama-2-7b": {
"max_tokens": 4096,
"input_cost_per_token": 0.000,
"output_cost_per_token": 0.000,
"litellm_provider": "sagemaker",
"mode": "completion"
},
"sagemaker/meta-textgeneration-llama-2-7b-f": {
"max_tokens": 4096,
"input_cost_per_token": 0.000,
"output_cost_per_token": 0.000,
"litellm_provider": "sagemaker",
"mode": "chat"
},
"sagemaker/meta-textgeneration-llama-2-13b": {
"max_tokens": 4096,
"input_cost_per_token": 0.000,
"output_cost_per_token": 0.000,
"litellm_provider": "sagemaker",
"mode": "completion"
},
"sagemaker/meta-textgeneration-llama-2-13b-f": {
"max_tokens": 4096,
"input_cost_per_token": 0.000,
"output_cost_per_token": 0.000,
"litellm_provider": "sagemaker",
"mode": "chat"
},
"sagemaker/meta-textgeneration-llama-2-70b": {
"max_tokens": 4096,
"input_cost_per_token": 0.000,
"output_cost_per_token": 0.000,
"litellm_provider": "sagemaker",
"mode": "completion"
},
"sagemaker/meta-textgeneration-llama-2-70b-b-f": {
"max_tokens": 4096,
"input_cost_per_token": 0.000,
"output_cost_per_token": 0.000,
"litellm_provider": "sagemaker",
"mode": "chat"
},
"together-ai-up-to-3b": { "together-ai-up-to-3b": {
"input_cost_per_token": 0.0000001, "input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.0000001 "output_cost_per_token": 0.0000001,
"litellm_provider": "together_ai"
}, },
"together-ai-3.1b-7b": { "together-ai-3.1b-7b": {
"input_cost_per_token": 0.0000002, "input_cost_per_token": 0.0000002,
"output_cost_per_token": 0.0000002 "output_cost_per_token": 0.0000002,
"litellm_provider": "together_ai"
}, },
"together-ai-7.1b-20b": { "together-ai-7.1b-20b": {
"max_tokens": 1000, "max_tokens": 1000,
"input_cost_per_token": 0.0000004, "input_cost_per_token": 0.0000004,
"output_cost_per_token": 0.0000004 "output_cost_per_token": 0.0000004,
"litellm_provider": "together_ai"
}, },
"together-ai-20.1b-40b": { "together-ai-20.1b-40b": {
"input_cost_per_token": 0.000001, "input_cost_per_token": 0.0000008,
"output_cost_per_token": 0.000001 "output_cost_per_token": 0.0000008,
"litellm_provider": "together_ai"
}, },
"together-ai-40.1b-70b": { "together-ai-40.1b-70b": {
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.0000009,
"output_cost_per_token": 0.000003 "output_cost_per_token": 0.0000009,
"litellm_provider": "together_ai"
}, },
"ollama/llama2": { "ollama/llama2": {
"max_tokens": 4096, "max_tokens": 4096,
@ -578,10 +777,38 @@
"litellm_provider": "ollama", "litellm_provider": "ollama",
"mode": "completion" "mode": "completion"
}, },
"ollama/mistral": {
"max_tokens": 8192,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "completion"
},
"ollama/codellama": {
"max_tokens": 4096,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "completion"
},
"ollama/orca-mini": {
"max_tokens": 4096,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "completion"
},
"ollama/vicuna": {
"max_tokens": 2048,
"input_cost_per_token": 0.0,
"output_cost_per_token": 0.0,
"litellm_provider": "ollama",
"mode": "completion"
},
"deepinfra/meta-llama/Llama-2-70b-chat-hf": { "deepinfra/meta-llama/Llama-2-70b-chat-hf": {
"max_tokens": 6144, "max_tokens": 4096,
"input_cost_per_token": 0.000001875, "input_cost_per_token": 0.000000700,
"output_cost_per_token": 0.000001875, "output_cost_per_token": 0.000000950,
"litellm_provider": "deepinfra", "litellm_provider": "deepinfra",
"mode": "chat" "mode": "chat"
}, },
@ -619,5 +846,103 @@
"output_cost_per_token": 0.00000095, "output_cost_per_token": 0.00000095,
"litellm_provider": "deepinfra", "litellm_provider": "deepinfra",
"mode": "chat" "mode": "chat"
},
"perplexity/pplx-7b-chat": {
"max_tokens": 8192,
"input_cost_per_token": 0.0000000,
"output_cost_per_token": 0.000000,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/pplx-70b-chat": {
"max_tokens": 4096,
"input_cost_per_token": 0.0000000,
"output_cost_per_token": 0.000000,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/pplx-7b-online": {
"max_tokens": 4096,
"input_cost_per_token": 0.0000000,
"output_cost_per_token": 0.0005,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/pplx-70b-online": {
"max_tokens": 4096,
"input_cost_per_token": 0.0000000,
"output_cost_per_token": 0.0005,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-2-13b-chat": {
"max_tokens": 4096,
"input_cost_per_token": 0.0000000,
"output_cost_per_token": 0.000000,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/llama-2-70b-chat": {
"max_tokens": 4096,
"input_cost_per_token": 0.0000000,
"output_cost_per_token": 0.000000,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/mistral-7b-instruct": {
"max_tokens": 4096,
"input_cost_per_token": 0.0000000,
"output_cost_per_token": 0.000000,
"litellm_provider": "perplexity",
"mode": "chat"
},
"perplexity/replit-code-v1.5-3b": {
"max_tokens": 4096,
"input_cost_per_token": 0.0000000,
"output_cost_per_token": 0.000000,
"litellm_provider": "perplexity",
"mode": "chat"
},
"anyscale/mistralai/Mistral-7B-Instruct-v0.1": {
"max_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "anyscale",
"mode": "chat"
},
"anyscale/HuggingFaceH4/zephyr-7b-beta": {
"max_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "anyscale",
"mode": "chat"
},
"anyscale/meta-llama/Llama-2-7b-chat-hf": {
"max_tokens": 4096,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "anyscale",
"mode": "chat"
},
"anyscale/meta-llama/Llama-2-13b-chat-hf": {
"max_tokens": 4096,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.00000025,
"litellm_provider": "anyscale",
"mode": "chat"
},
"anyscale/meta-llama/Llama-2-70b-chat-hf": {
"max_tokens": 4096,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000001,
"litellm_provider": "anyscale",
"mode": "chat"
},
"anyscale/codellama/CodeLlama-34b-Instruct-hf": {
"max_tokens": 16384,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000001,
"litellm_provider": "anyscale",
"mode": "chat"
} }
} }