diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 9a25d39c4..e3aceb3da 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1125,30 +1125,30 @@ "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.0000005, + "input_cost_per_token": 0.00000003125, + "output_cost_per_token": 0.00000009375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models" }, "gemini-1.0-pro": { "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.0000005, + "input_cost_per_token": 0.00000003125, + "output_cost_per_token": 0.00000009375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models" }, "gemini-1.0-pro-001": { "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.0000005, + "input_cost_per_token": 0.00000003125, + "output_cost_per_token": 0.00000009375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1158,8 +1158,8 @@ "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.0000005, + "input_cost_per_token": 0.00000003125, + "output_cost_per_token": 0.00000009375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1169,8 +1169,16 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, - "output_cost_per_token": 0.000001875, + "input_cost_per_image": 0.001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_video_per_second": 0.001315, + "input_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000009375, + "output_cost_per_token_above_128k_tokens": 0.000001875, + "output_cost_per_image": 0.00263, + "output_cost_per_video_per_second": 0.00263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1187,8 +1195,16 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_token": 0, - "output_cost_per_token": 0, + "input_cost_per_image": 0.0001315, + "input_cost_per_video_per_second": 0.0001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_token": 0.00000003125, + "input_cost_per_token_above_128k_tokens": 0.0000000625, + "output_cost_per_token": 0.00000009375, + "output_cost_per_token_above_128k_tokens": 0.0000001875, + "output_cost_per_image": 0.000263, + "output_cost_per_video_per_second": 0.000263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1205,8 +1221,16 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_token": 0, - "output_cost_per_token": 0, + "input_cost_per_image": 0.0001315, + "input_cost_per_video_per_second": 0.0001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_token": 0.00000003125, + "input_cost_per_token_above_128k_tokens": 0.0000000625, + "output_cost_per_token": 0.00000009375, + "output_cost_per_token_above_128k_tokens": 0.0000001875, + "output_cost_per_image": 0.000263, + "output_cost_per_video_per_second": 0.000263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1217,8 +1241,16 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, - "output_cost_per_token": 0.000001875, + "input_cost_per_image": 0.001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_video_per_second": 0.001315, + "input_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000009375, + "output_cost_per_token_above_128k_tokens": 0.000001875, + "output_cost_per_image": 0.00263, + "output_cost_per_video_per_second": 0.00263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1229,8 +1261,16 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, - "output_cost_per_token": 0.000001875, + "input_cost_per_image": 0.001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_video_per_second": 0.001315, + "input_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000009375, + "output_cost_per_token_above_128k_tokens": 0.000001875, + "output_cost_per_image": 0.00263, + "output_cost_per_video_per_second": 0.00263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1241,8 +1281,16 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, - "output_cost_per_token": 0.000001875, + "input_cost_per_image": 0.001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_video_per_second": 0.001315, + "input_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000009375, + "output_cost_per_token_above_128k_tokens": 0.000001875, + "output_cost_per_image": 0.00263, + "output_cost_per_video_per_second": 0.00263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1253,8 +1301,16 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, - "output_cost_per_token": 0.000001875, + "input_cost_per_image": 0.001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_video_per_second": 0.001315, + "input_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000009375, + "output_cost_per_token_above_128k_tokens": 0.000001875, + "output_cost_per_image": 0.00263, + "output_cost_per_video_per_second": 0.00263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1517,12 +1573,10 @@ "max_video_length": 1, "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, - "max_pdf_size_mb": 30, - "input_cost_per_token": 0, - "input_cost_per_token_up_to_128k_tokens": 0.00000035, + "max_pdf_size_mb": 30, + "input_cost_per_token": 0.00000035, "input_cost_per_token_above_128k_tokens": 0.0000007, - "output_cost_per_token": 0, - "output_cost_per_token_up_to_128k_tokens": 0.00000105, + "output_cost_per_token": 0.00000105, "output_cost_per_token_above_128k_tokens": 0.0000021, "litellm_provider": "gemini", "mode": "chat", @@ -1534,11 +1588,9 @@ "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "input_cost_per_token_up_to_128k_tokens": 0.00000035, + "input_cost_per_token": 0.00000035, "input_cost_per_token_above_128k_tokens": 0.0000007, - "output_cost_per_token": 0.0, - "output_cost_per_token_up_to_128k_tokens": 0.00000105, + "output_cost_per_token": 0.00000105, "output_cost_per_token_above_128k_tokens": 0.0000021, "litellm_provider": "gemini", "mode": "chat", @@ -1549,11 +1601,9 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0, - "input_cost_per_token_up_to_128k_tokens": 0.00000035, + "input_cost_per_token": 0.00000035, "input_cost_per_token_above_128k_tokens": 0.0000007, - "output_cost_per_token": 0, - "output_cost_per_token_up_to_128k_tokens": 0.00000105, + "output_cost_per_token": 0.00000105, "output_cost_per_token_above_128k_tokens": 0.0000021, "litellm_provider": "gemini", "mode": "chat", @@ -1566,11 +1616,9 @@ "max_tokens": 8192, "max_input_tokens": 1048576, "max_output_tokens": 8192, - "input_cost_per_token": 0, - "input_cost_per_token_up_to_128k_tokens": 0.00000035, + "input_cost_per_token": 0.00000035, "input_cost_per_token_above_128k_tokens": 0.0000007, - "output_cost_per_token": 0, - "output_cost_per_token_up_to_128k_tokens": 0.00000105, + "output_cost_per_token": 0.00000105, "output_cost_per_token_above_128k_tokens": 0.0000021, "litellm_provider": "gemini", "mode": "chat", @@ -1583,11 +1631,9 @@ "max_tokens": 2048, "max_input_tokens": 30720, "max_output_tokens": 2048, - "input_cost_per_token": 0.0, - "input_cost_per_token_up_to_128k_tokens": 0.00000035, + "input_cost_per_token": 0.00000035, "input_cost_per_token_above_128k_tokens": 0.0000007, - "output_cost_per_token": 0.0, - "output_cost_per_token_up_to_128k_tokens": 0.00000105, + "output_cost_per_token": 0.00000105, "output_cost_per_token_above_128k_tokens": 0.0000021, "litellm_provider": "gemini", "mode": "chat", diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 9a25d39c4..e3aceb3da 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1125,30 +1125,30 @@ "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.0000005, + "input_cost_per_token": 0.00000003125, + "output_cost_per_token": 0.00000009375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models" }, "gemini-1.0-pro": { "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.0000005, + "input_cost_per_token": 0.00000003125, + "output_cost_per_token": 0.00000009375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" + "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#google_models" }, "gemini-1.0-pro-001": { "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.0000005, + "input_cost_per_token": 0.00000003125, + "output_cost_per_token": 0.00000009375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1158,8 +1158,8 @@ "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.00000025, - "output_cost_per_token": 0.0000005, + "input_cost_per_token": 0.00000003125, + "output_cost_per_token": 0.00000009375, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1169,8 +1169,16 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, - "output_cost_per_token": 0.000001875, + "input_cost_per_image": 0.001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_video_per_second": 0.001315, + "input_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000009375, + "output_cost_per_token_above_128k_tokens": 0.000001875, + "output_cost_per_image": 0.00263, + "output_cost_per_video_per_second": 0.00263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1187,8 +1195,16 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_token": 0, - "output_cost_per_token": 0, + "input_cost_per_image": 0.0001315, + "input_cost_per_video_per_second": 0.0001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_token": 0.00000003125, + "input_cost_per_token_above_128k_tokens": 0.0000000625, + "output_cost_per_token": 0.00000009375, + "output_cost_per_token_above_128k_tokens": 0.0000001875, + "output_cost_per_image": 0.000263, + "output_cost_per_video_per_second": 0.000263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1205,8 +1221,16 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_token": 0, - "output_cost_per_token": 0, + "input_cost_per_image": 0.0001315, + "input_cost_per_video_per_second": 0.0001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_token": 0.00000003125, + "input_cost_per_token_above_128k_tokens": 0.0000000625, + "output_cost_per_token": 0.00000009375, + "output_cost_per_token_above_128k_tokens": 0.0000001875, + "output_cost_per_image": 0.000263, + "output_cost_per_video_per_second": 0.000263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1217,8 +1241,16 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, - "output_cost_per_token": 0.000001875, + "input_cost_per_image": 0.001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_video_per_second": 0.001315, + "input_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000009375, + "output_cost_per_token_above_128k_tokens": 0.000001875, + "output_cost_per_image": 0.00263, + "output_cost_per_video_per_second": 0.00263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1229,8 +1261,16 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, - "output_cost_per_token": 0.000001875, + "input_cost_per_image": 0.001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_video_per_second": 0.001315, + "input_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000009375, + "output_cost_per_token_above_128k_tokens": 0.000001875, + "output_cost_per_image": 0.00263, + "output_cost_per_video_per_second": 0.00263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1241,8 +1281,16 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, - "output_cost_per_token": 0.000001875, + "input_cost_per_image": 0.001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_video_per_second": 0.001315, + "input_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000009375, + "output_cost_per_token_above_128k_tokens": 0.000001875, + "output_cost_per_image": 0.00263, + "output_cost_per_video_per_second": 0.00263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1253,8 +1301,16 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0.000000625, - "output_cost_per_token": 0.000001875, + "input_cost_per_image": 0.001315, + "input_cost_per_audio_per_second": 0.000125, + "input_cost_per_video_per_second": 0.001315, + "input_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.0000009375, + "output_cost_per_token_above_128k_tokens": 0.000001875, + "output_cost_per_image": 0.00263, + "output_cost_per_video_per_second": 0.00263, + "output_cost_per_audio_per_second": 0.00025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -1517,12 +1573,10 @@ "max_video_length": 1, "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, - "max_pdf_size_mb": 30, - "input_cost_per_token": 0, - "input_cost_per_token_up_to_128k_tokens": 0.00000035, + "max_pdf_size_mb": 30, + "input_cost_per_token": 0.00000035, "input_cost_per_token_above_128k_tokens": 0.0000007, - "output_cost_per_token": 0, - "output_cost_per_token_up_to_128k_tokens": 0.00000105, + "output_cost_per_token": 0.00000105, "output_cost_per_token_above_128k_tokens": 0.0000021, "litellm_provider": "gemini", "mode": "chat", @@ -1534,11 +1588,9 @@ "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "input_cost_per_token_up_to_128k_tokens": 0.00000035, + "input_cost_per_token": 0.00000035, "input_cost_per_token_above_128k_tokens": 0.0000007, - "output_cost_per_token": 0.0, - "output_cost_per_token_up_to_128k_tokens": 0.00000105, + "output_cost_per_token": 0.00000105, "output_cost_per_token_above_128k_tokens": 0.0000021, "litellm_provider": "gemini", "mode": "chat", @@ -1549,11 +1601,9 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 0, - "input_cost_per_token_up_to_128k_tokens": 0.00000035, + "input_cost_per_token": 0.00000035, "input_cost_per_token_above_128k_tokens": 0.0000007, - "output_cost_per_token": 0, - "output_cost_per_token_up_to_128k_tokens": 0.00000105, + "output_cost_per_token": 0.00000105, "output_cost_per_token_above_128k_tokens": 0.0000021, "litellm_provider": "gemini", "mode": "chat", @@ -1566,11 +1616,9 @@ "max_tokens": 8192, "max_input_tokens": 1048576, "max_output_tokens": 8192, - "input_cost_per_token": 0, - "input_cost_per_token_up_to_128k_tokens": 0.00000035, + "input_cost_per_token": 0.00000035, "input_cost_per_token_above_128k_tokens": 0.0000007, - "output_cost_per_token": 0, - "output_cost_per_token_up_to_128k_tokens": 0.00000105, + "output_cost_per_token": 0.00000105, "output_cost_per_token_above_128k_tokens": 0.0000021, "litellm_provider": "gemini", "mode": "chat", @@ -1583,11 +1631,9 @@ "max_tokens": 2048, "max_input_tokens": 30720, "max_output_tokens": 2048, - "input_cost_per_token": 0.0, - "input_cost_per_token_up_to_128k_tokens": 0.00000035, + "input_cost_per_token": 0.00000035, "input_cost_per_token_above_128k_tokens": 0.0000007, - "output_cost_per_token": 0.0, - "output_cost_per_token_up_to_128k_tokens": 0.00000105, + "output_cost_per_token": 0.00000105, "output_cost_per_token_above_128k_tokens": 0.0000021, "litellm_provider": "gemini", "mode": "chat",