diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index bc3799229..cfc2cef72 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -80,6 +80,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true, "supports_prompt_caching": true }, @@ -94,6 +95,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true, "supports_prompt_caching": true }, @@ -475,6 +477,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true }, "ft:davinci-002": { @@ -762,6 +765,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true }, "azure/gpt-4o-mini": { @@ -775,6 +779,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true, "supports_prompt_caching": true }, @@ -789,6 +794,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true, "supports_prompt_caching": true }, @@ -1113,6 +1119,52 @@ "supports_function_calling": true, "mode": "chat" }, + "azure_ai/mistral-large-2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "mode": "chat", + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview" + }, + "azure_ai/ministral-3b": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000004, + "output_cost_per_token": 0.00000004, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "mode": "chat", + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.ministral-3b-2410-offer?tab=Overview" + }, + "azure_ai/Llama-3.2-11B-Vision-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000037, + "output_cost_per_token": 0.00000037, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "supports_vision": true, + "mode": "chat", + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-11b-vision-instruct-offer?tab=Overview" + }, + "azure_ai/Llama-3.2-90B-Vision-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000204, + "output_cost_per_token": 0.00000204, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "supports_vision": true, + "mode": "chat", + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-90b-vision-instruct-offer?tab=Overview" + }, "azure_ai/Meta-Llama-3-70B-Instruct": { "max_tokens": 8192, "max_input_tokens": 8192, @@ -1152,6 +1204,105 @@ "mode": "chat", "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice" }, + "azure_ai/Phi-3.5-mini-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000052, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3.5-vision-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000052, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": true, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3.5-MoE-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000016, + "output_cost_per_token": 0.00000064, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-mini-4k-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000052, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-mini-128k-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000052, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-small-8k-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000006, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-small-128k-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000006, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-medium-4k-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000017, + "output_cost_per_token": 0.00000068, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-medium-128k-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000017, + "output_cost_per_token": 0.00000068, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, "azure_ai/cohere-rerank-v3-multilingual": { "max_tokens": 4096, "max_input_tokens": 4096, @@ -2212,16 +2363,16 @@ "input_cost_per_image": 0.00032875, "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_video_per_second": 0.00032875, - "input_cost_per_token": 0.000000078125, - "input_cost_per_character": 0.0000003125, + "input_cost_per_token": 0.00000125, + "input_cost_per_character": 0.0000003125, "input_cost_per_image_above_128k_tokens": 0.0006575, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, - "input_cost_per_token_above_128k_tokens": 0.00000015625, - "input_cost_per_character_above_128k_tokens": 0.000000625, - "output_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.000005, "output_cost_per_character": 0.00000125, - "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token_above_128k_tokens": 0.00001, "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", @@ -2238,16 +2389,16 @@ "input_cost_per_image": 0.00032875, "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_video_per_second": 0.00032875, - "input_cost_per_token": 0.000000078125, - "input_cost_per_character": 0.0000003125, + "input_cost_per_token": 0.00000125, + "input_cost_per_character": 0.0000003125, "input_cost_per_image_above_128k_tokens": 0.0006575, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, - "input_cost_per_token_above_128k_tokens": 0.00000015625, - "input_cost_per_character_above_128k_tokens": 0.000000625, - "output_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.000005, "output_cost_per_character": 0.00000125, - "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token_above_128k_tokens": 0.00001, "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", @@ -2264,16 +2415,16 @@ "input_cost_per_image": 0.00032875, "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_video_per_second": 0.00032875, - "input_cost_per_token": 0.000000078125, - "input_cost_per_character": 0.0000003125, + "input_cost_per_token": 0.00000125, + "input_cost_per_character": 0.0000003125, "input_cost_per_image_above_128k_tokens": 0.0006575, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, - "input_cost_per_token_above_128k_tokens": 0.00000015625, - "input_cost_per_character_above_128k_tokens": 0.000000625, - "output_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.000005, "output_cost_per_character": 0.00000125, - "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token_above_128k_tokens": 0.00001, "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", @@ -2373,17 +2524,17 @@ "input_cost_per_image": 0.00002, "input_cost_per_video_per_second": 0.00002, "input_cost_per_audio_per_second": 0.000002, - "input_cost_per_token": 0.000000004688, + "input_cost_per_token": 0.000000075, "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, - "output_cost_per_token": 0.0000000046875, - "output_cost_per_character": 0.00000001875, - "output_cost_per_token_above_128k_tokens": 0.000000009375, - "output_cost_per_character_above_128k_tokens": 0.0000000375, + "output_cost_per_token": 0.0000003, + "output_cost_per_character": 0.000000075, + "output_cost_per_token_above_128k_tokens": 0.0000006, + "output_cost_per_character_above_128k_tokens": 0.00000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2437,17 +2588,17 @@ "input_cost_per_image": 0.00002, "input_cost_per_video_per_second": 0.00002, "input_cost_per_audio_per_second": 0.000002, - "input_cost_per_token": 0.000000004688, + "input_cost_per_token": 0.000000075, "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, - "output_cost_per_token": 0.0000000046875, - "output_cost_per_character": 0.00000001875, - "output_cost_per_token_above_128k_tokens": 0.000000009375, - "output_cost_per_character_above_128k_tokens": 0.0000000375, + "output_cost_per_token": 0.0000003, + "output_cost_per_character": 0.000000075, + "output_cost_per_token_above_128k_tokens": 0.0000006, + "output_cost_per_character_above_128k_tokens": 0.00000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2469,17 +2620,17 @@ "input_cost_per_image": 0.00002, "input_cost_per_video_per_second": 0.00002, "input_cost_per_audio_per_second": 0.000002, - "input_cost_per_token": 0.000000004688, + "input_cost_per_token": 0.000000075, "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, - "output_cost_per_token": 0.0000000046875, - "output_cost_per_character": 0.00000001875, - "output_cost_per_token_above_128k_tokens": 0.000000009375, - "output_cost_per_character_above_128k_tokens": 0.0000000375, + "output_cost_per_token": 0.0000003, + "output_cost_per_character": 0.000000075, + "output_cost_per_token_above_128k_tokens": 0.0000006, + "output_cost_per_character_above_128k_tokens": 0.00000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2501,7 +2652,7 @@ "input_cost_per_image": 0.00002, "input_cost_per_video_per_second": 0.00002, "input_cost_per_audio_per_second": 0.000002, - "input_cost_per_token": 0.000000004688, + "input_cost_per_token": 0.000000075, "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, @@ -2714,14 +2865,15 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models" }, "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": { - "max_tokens": 8192, + "max_tokens": 128000, "max_input_tokens": 128000, - "max_output_tokens": 8192, + "max_output_tokens": 2048, "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, "litellm_provider": "vertex_ai-llama_models", "mode": "chat", "supports_system_messages": true, + "supports_vision": true, "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas" }, "vertex_ai/mistral-large@latest": {