build(model_prices_and_context_window.json): add max input tokens for openai and azure models

This commit is contained in:
Krrish Dholakia 2024-01-09 12:23:57 +05:30
parent 2f377db604
commit 9d45c73475

View file

@ -1,6 +1,7 @@
{ {
"gpt-4": { "gpt-4": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
@ -9,6 +10,7 @@
}, },
"gpt-4-0314": { "gpt-4-0314": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
@ -17,6 +19,7 @@
}, },
"gpt-4-0613": { "gpt-4-0613": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
@ -25,6 +28,7 @@
}, },
"gpt-4-32k": { "gpt-4-32k": {
"max_tokens": 32768, "max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00006, "input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012, "output_cost_per_token": 0.00012,
@ -33,6 +37,7 @@
}, },
"gpt-4-32k-0314": { "gpt-4-32k-0314": {
"max_tokens": 32768, "max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00006, "input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012, "output_cost_per_token": 0.00012,
@ -41,6 +46,7 @@
}, },
"gpt-4-32k-0613": { "gpt-4-32k-0613": {
"max_tokens": 32768, "max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00006, "input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012, "output_cost_per_token": 0.00012,
@ -49,6 +55,7 @@
}, },
"gpt-4-1106-preview": { "gpt-4-1106-preview": {
"max_tokens": 128000, "max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
@ -57,6 +64,7 @@
}, },
"gpt-4-vision-preview": { "gpt-4-vision-preview": {
"max_tokens": 128000, "max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
@ -65,6 +73,7 @@
}, },
"gpt-3.5-turbo": { "gpt-3.5-turbo": {
"max_tokens": 4097, "max_tokens": 4097,
"max_input_tokens": 4097,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
@ -73,6 +82,7 @@
}, },
"gpt-3.5-turbo-0301": { "gpt-3.5-turbo-0301": {
"max_tokens": 4097, "max_tokens": 4097,
"max_input_tokens": 4097,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
@ -81,6 +91,7 @@
}, },
"gpt-3.5-turbo-0613": { "gpt-3.5-turbo-0613": {
"max_tokens": 4097, "max_tokens": 4097,
"max_input_tokens": 4097,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
@ -89,6 +100,7 @@
}, },
"gpt-3.5-turbo-1106": { "gpt-3.5-turbo-1106": {
"max_tokens": 16385, "max_tokens": 16385,
"max_input_tokens": 16385,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.0000010, "input_cost_per_token": 0.0000010,
"output_cost_per_token": 0.0000020, "output_cost_per_token": 0.0000020,
@ -97,6 +109,7 @@
}, },
"gpt-3.5-turbo-16k": { "gpt-3.5-turbo-16k": {
"max_tokens": 16385, "max_tokens": 16385,
"max_input_tokens": 16385,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004, "output_cost_per_token": 0.000004,
@ -105,6 +118,7 @@
}, },
"gpt-3.5-turbo-16k-0613": { "gpt-3.5-turbo-16k-0613": {
"max_tokens": 16385, "max_tokens": 16385,
"max_input_tokens": 16385,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004, "output_cost_per_token": 0.000004,
@ -113,6 +127,7 @@
}, },
"ft:gpt-3.5-turbo": { "ft:gpt-3.5-turbo": {
"max_tokens": 4097, "max_tokens": 4097,
"max_input_tokens": 4097,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.000012, "input_cost_per_token": 0.000012,
"output_cost_per_token": 0.000016, "output_cost_per_token": 0.000016,
@ -189,6 +204,7 @@
}, },
"azure/gpt-4-1106-preview": { "azure/gpt-4-1106-preview": {
"max_tokens": 128000, "max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
@ -197,6 +213,7 @@
}, },
"azure/gpt-4-0613": { "azure/gpt-4-0613": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
@ -205,6 +222,7 @@
}, },
"azure/gpt-4-32k-0613": { "azure/gpt-4-32k-0613": {
"max_tokens": 32768, "max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00006, "input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012, "output_cost_per_token": 0.00012,
@ -213,6 +231,7 @@
}, },
"azure/gpt-4-32k": { "azure/gpt-4-32k": {
"max_tokens": 32768, "max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00006, "input_cost_per_token": 0.00006,
"output_cost_per_token": 0.00012, "output_cost_per_token": 0.00012,
@ -221,6 +240,7 @@
}, },
"azure/gpt-4": { "azure/gpt-4": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00003, "input_cost_per_token": 0.00003,
"output_cost_per_token": 0.00006, "output_cost_per_token": 0.00006,
@ -229,6 +249,7 @@
}, },
"azure/gpt-4-turbo": { "azure/gpt-4-turbo": {
"max_tokens": 128000, "max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
@ -237,6 +258,7 @@
}, },
"azure/gpt-4-turbo-vision-preview": { "azure/gpt-4-turbo-vision-preview": {
"max_tokens": 128000, "max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.00001, "input_cost_per_token": 0.00001,
"output_cost_per_token": 0.00003, "output_cost_per_token": 0.00003,
@ -245,6 +267,7 @@
}, },
"azure/gpt-35-turbo-16k-0613": { "azure/gpt-35-turbo-16k-0613": {
"max_tokens": 16385, "max_tokens": 16385,
"max_input_tokens": 16385,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004, "output_cost_per_token": 0.000004,
@ -253,6 +276,7 @@
}, },
"azure/gpt-35-turbo-1106": { "azure/gpt-35-turbo-1106": {
"max_tokens": 16384, "max_tokens": 16384,
"max_input_tokens": 16384,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
@ -261,6 +285,7 @@
}, },
"azure/gpt-35-turbo-16k": { "azure/gpt-35-turbo-16k": {
"max_tokens": 16385, "max_tokens": 16385,
"max_input_tokens": 16385,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.000003, "input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000004, "output_cost_per_token": 0.000004,
@ -269,6 +294,7 @@
}, },
"azure/gpt-35-turbo": { "azure/gpt-35-turbo": {
"max_tokens": 4097, "max_tokens": 4097,
"max_input_tokens": 4097,
"max_output_tokens": 4096, "max_output_tokens": 4096,
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
@ -333,6 +359,8 @@
}, },
"babbage-002": { "babbage-002": {
"max_tokens": 16384, "max_tokens": 16384,
"max_input_tokens": 16384,
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000004, "input_cost_per_token": 0.0000004,
"output_cost_per_token": 0.0000004, "output_cost_per_token": 0.0000004,
"litellm_provider": "text-completion-openai", "litellm_provider": "text-completion-openai",
@ -340,6 +368,8 @@
}, },
"davinci-002": { "davinci-002": {
"max_tokens": 16384, "max_tokens": 16384,
"max_input_tokens": 16384,
"max_output_tokens": 4096,
"input_cost_per_token": 0.000002, "input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
"litellm_provider": "text-completion-openai", "litellm_provider": "text-completion-openai",
@ -347,6 +377,8 @@
}, },
"gpt-3.5-turbo-instruct": { "gpt-3.5-turbo-instruct": {
"max_tokens": 8192, "max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000015, "input_cost_per_token": 0.0000015,
"output_cost_per_token": 0.000002, "output_cost_per_token": 0.000002,
"litellm_provider": "text-completion-openai", "litellm_provider": "text-completion-openai",