diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index b09703ba63..b749fd9ed0 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1,6 +1,7 @@ { "gpt-4": { - "max_tokens": 8192, + "max_tokens": 8192, + "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, @@ -9,6 +10,7 @@ }, "gpt-4-0314": { "max_tokens": 8192, + "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, @@ -17,6 +19,7 @@ }, "gpt-4-0613": { "max_tokens": 8192, + "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, @@ -25,6 +28,7 @@ }, "gpt-4-32k": { "max_tokens": 32768, + "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, @@ -33,6 +37,7 @@ }, "gpt-4-32k-0314": { "max_tokens": 32768, + "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, @@ -41,6 +46,7 @@ }, "gpt-4-32k-0613": { "max_tokens": 32768, + "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, @@ -49,6 +55,7 @@ }, "gpt-4-1106-preview": { "max_tokens": 128000, + "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, @@ -57,6 +64,7 @@ }, "gpt-4-vision-preview": { "max_tokens": 128000, + "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, @@ -65,6 +73,7 @@ }, "gpt-3.5-turbo": { "max_tokens": 4097, + "max_input_tokens": 4097, "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, @@ -73,6 +82,7 @@ }, "gpt-3.5-turbo-0301": { "max_tokens": 4097, + "max_input_tokens": 4097, "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, @@ -81,6 +91,7 @@ }, "gpt-3.5-turbo-0613": { "max_tokens": 4097, + "max_input_tokens": 4097, "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, @@ -89,6 +100,7 @@ }, "gpt-3.5-turbo-1106": { "max_tokens": 16385, + "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.0000010, "output_cost_per_token": 0.0000020, @@ -97,6 +109,7 @@ }, "gpt-3.5-turbo-16k": { "max_tokens": 16385, + "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, @@ -105,6 +118,7 @@ }, "gpt-3.5-turbo-16k-0613": { "max_tokens": 16385, + "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, @@ -113,6 +127,7 @@ }, "ft:gpt-3.5-turbo": { "max_tokens": 4097, + "max_input_tokens": 4097, "max_output_tokens": 4096, "input_cost_per_token": 0.000012, "output_cost_per_token": 0.000016, @@ -189,6 +204,7 @@ }, "azure/gpt-4-1106-preview": { "max_tokens": 128000, + "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, @@ -197,6 +213,7 @@ }, "azure/gpt-4-0613": { "max_tokens": 8192, + "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, @@ -205,6 +222,7 @@ }, "azure/gpt-4-32k-0613": { "max_tokens": 32768, + "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, @@ -213,6 +231,7 @@ }, "azure/gpt-4-32k": { "max_tokens": 32768, + "max_input_tokens": 32768, "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, @@ -221,6 +240,7 @@ }, "azure/gpt-4": { "max_tokens": 8192, + "max_input_tokens": 8192, "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, @@ -229,6 +249,7 @@ }, "azure/gpt-4-turbo": { "max_tokens": 128000, + "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, @@ -237,6 +258,7 @@ }, "azure/gpt-4-turbo-vision-preview": { "max_tokens": 128000, + "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, @@ -245,6 +267,7 @@ }, "azure/gpt-35-turbo-16k-0613": { "max_tokens": 16385, + "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, @@ -253,6 +276,7 @@ }, "azure/gpt-35-turbo-1106": { "max_tokens": 16384, + "max_input_tokens": 16384, "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, @@ -261,6 +285,7 @@ }, "azure/gpt-35-turbo-16k": { "max_tokens": 16385, + "max_input_tokens": 16385, "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, @@ -269,6 +294,7 @@ }, "azure/gpt-35-turbo": { "max_tokens": 4097, + "max_input_tokens": 4097, "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, @@ -333,6 +359,8 @@ }, "babbage-002": { "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000004, "output_cost_per_token": 0.0000004, "litellm_provider": "text-completion-openai", @@ -340,6 +368,8 @@ }, "davinci-002": { "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002, "litellm_provider": "text-completion-openai", @@ -347,6 +377,8 @@ }, "gpt-3.5-turbo-instruct": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, "litellm_provider": "text-completion-openai",