forked from phoenix/litellm-mirror
Update several Azure AI models in model cost map (#6655)
* Adding Azure Phi 3/3.5 models to model cost map * Update gpt-4o-mini models * Adding missing Azure Mistral models to model cost map * Adding Azure Llama3.2 models to model cost map * Fix Gemini-1.5-flash pricing * Fix Gemini-1.5-flash output pricing * Fix Gemini-1.5-pro prices * Fix Gemini-1.5-flash output prices * Correct gemini-1.5-pro prices * Correction on Vertex Llama3.2 entry --------- Co-authored-by: Emerson Gomes <emerson.gomes@thalesgroup.com>
This commit is contained in:
parent
eb47117800
commit
d0d29d70de
1 changed files with 188 additions and 36 deletions
|
@ -80,6 +80,7 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
|
@ -94,6 +95,7 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
|
@ -475,6 +477,7 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"ft:davinci-002": {
|
||||
|
@ -762,6 +765,7 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_vision": true
|
||||
},
|
||||
"azure/gpt-4o-mini": {
|
||||
|
@ -775,6 +779,7 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
|
@ -789,6 +794,7 @@
|
|||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
"supports_parallel_function_calling": true,
|
||||
"supports_response_schema": true,
|
||||
"supports_vision": true,
|
||||
"supports_prompt_caching": true
|
||||
},
|
||||
|
@ -1113,6 +1119,52 @@
|
|||
"supports_function_calling": true,
|
||||
"mode": "chat"
|
||||
},
|
||||
"azure_ai/mistral-large-2407": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000006,
|
||||
"litellm_provider": "azure_ai",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat",
|
||||
"source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview"
|
||||
},
|
||||
"azure_ai/ministral-3b": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000004,
|
||||
"output_cost_per_token": 0.00000004,
|
||||
"litellm_provider": "azure_ai",
|
||||
"supports_function_calling": true,
|
||||
"mode": "chat",
|
||||
"source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.ministral-3b-2410-offer?tab=Overview"
|
||||
},
|
||||
"azure_ai/Llama-3.2-11B-Vision-Instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 2048,
|
||||
"input_cost_per_token": 0.00000037,
|
||||
"output_cost_per_token": 0.00000037,
|
||||
"litellm_provider": "azure_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"mode": "chat",
|
||||
"source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-11b-vision-instruct-offer?tab=Overview"
|
||||
},
|
||||
"azure_ai/Llama-3.2-90B-Vision-Instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 2048,
|
||||
"input_cost_per_token": 0.00000204,
|
||||
"output_cost_per_token": 0.00000204,
|
||||
"litellm_provider": "azure_ai",
|
||||
"supports_function_calling": true,
|
||||
"supports_vision": true,
|
||||
"mode": "chat",
|
||||
"source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-90b-vision-instruct-offer?tab=Overview"
|
||||
},
|
||||
"azure_ai/Meta-Llama-3-70B-Instruct": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 8192,
|
||||
|
@ -1152,6 +1204,105 @@
|
|||
"mode": "chat",
|
||||
"source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice"
|
||||
},
|
||||
"azure_ai/Phi-3.5-mini-instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000013,
|
||||
"output_cost_per_token": 0.00000052,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
||||
},
|
||||
"azure_ai/Phi-3.5-vision-instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000013,
|
||||
"output_cost_per_token": 0.00000052,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_vision": true,
|
||||
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
||||
},
|
||||
"azure_ai/Phi-3.5-MoE-instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000016,
|
||||
"output_cost_per_token": 0.00000064,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
||||
},
|
||||
"azure_ai/Phi-3-mini-4k-instruct": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000013,
|
||||
"output_cost_per_token": 0.00000052,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
||||
},
|
||||
"azure_ai/Phi-3-mini-128k-instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000013,
|
||||
"output_cost_per_token": 0.00000052,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
||||
},
|
||||
"azure_ai/Phi-3-small-8k-instruct": {
|
||||
"max_tokens": 8192,
|
||||
"max_input_tokens": 8192,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.0000006,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
||||
},
|
||||
"azure_ai/Phi-3-small-128k-instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.0000006,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
||||
},
|
||||
"azure_ai/Phi-3-medium-4k-instruct": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000017,
|
||||
"output_cost_per_token": 0.00000068,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
||||
},
|
||||
"azure_ai/Phi-3-medium-128k-instruct": {
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000017,
|
||||
"output_cost_per_token": 0.00000068,
|
||||
"litellm_provider": "azure_ai",
|
||||
"mode": "chat",
|
||||
"supports_vision": false,
|
||||
"source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/"
|
||||
},
|
||||
"azure_ai/cohere-rerank-v3-multilingual": {
|
||||
"max_tokens": 4096,
|
||||
"max_input_tokens": 4096,
|
||||
|
@ -2212,16 +2363,16 @@
|
|||
"input_cost_per_image": 0.00032875,
|
||||
"input_cost_per_audio_per_second": 0.00003125,
|
||||
"input_cost_per_video_per_second": 0.00032875,
|
||||
"input_cost_per_token": 0.000000078125,
|
||||
"input_cost_per_character": 0.0000003125,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_character": 0.0000003125,
|
||||
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||
"output_cost_per_token": 0.0000003125,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000025,
|
||||
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||
"output_cost_per_token": 0.000005,
|
||||
"output_cost_per_character": 0.00000125,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||
"output_cost_per_token_above_128k_tokens": 0.00001,
|
||||
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
|
@ -2238,16 +2389,16 @@
|
|||
"input_cost_per_image": 0.00032875,
|
||||
"input_cost_per_audio_per_second": 0.00003125,
|
||||
"input_cost_per_video_per_second": 0.00032875,
|
||||
"input_cost_per_token": 0.000000078125,
|
||||
"input_cost_per_character": 0.0000003125,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_character": 0.0000003125,
|
||||
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||
"output_cost_per_token": 0.0000003125,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000025,
|
||||
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||
"output_cost_per_token": 0.000005,
|
||||
"output_cost_per_character": 0.00000125,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||
"output_cost_per_token_above_128k_tokens": 0.00001,
|
||||
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
|
@ -2264,16 +2415,16 @@
|
|||
"input_cost_per_image": 0.00032875,
|
||||
"input_cost_per_audio_per_second": 0.00003125,
|
||||
"input_cost_per_video_per_second": 0.00032875,
|
||||
"input_cost_per_token": 0.000000078125,
|
||||
"input_cost_per_character": 0.0000003125,
|
||||
"input_cost_per_token": 0.00000125,
|
||||
"input_cost_per_character": 0.0000003125,
|
||||
"input_cost_per_image_above_128k_tokens": 0.0006575,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
|
||||
"input_cost_per_token_above_128k_tokens": 0.00000015625,
|
||||
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||
"output_cost_per_token": 0.0000003125,
|
||||
"input_cost_per_token_above_128k_tokens": 0.0000025,
|
||||
"input_cost_per_character_above_128k_tokens": 0.000000625,
|
||||
"output_cost_per_token": 0.000005,
|
||||
"output_cost_per_character": 0.00000125,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000000625,
|
||||
"output_cost_per_token_above_128k_tokens": 0.00001,
|
||||
"output_cost_per_character_above_128k_tokens": 0.0000025,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
|
@ -2373,17 +2524,17 @@
|
|||
"input_cost_per_image": 0.00002,
|
||||
"input_cost_per_video_per_second": 0.00002,
|
||||
"input_cost_per_audio_per_second": 0.000002,
|
||||
"input_cost_per_token": 0.000000004688,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"input_cost_per_character": 0.00000001875,
|
||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||
"input_cost_per_image_above_128k_tokens": 0.00004,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
||||
"output_cost_per_token": 0.0000000046875,
|
||||
"output_cost_per_character": 0.00000001875,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
||||
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
||||
"output_cost_per_token": 0.0000003,
|
||||
"output_cost_per_character": 0.000000075,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
||||
"output_cost_per_character_above_128k_tokens": 0.00000015,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -2437,17 +2588,17 @@
|
|||
"input_cost_per_image": 0.00002,
|
||||
"input_cost_per_video_per_second": 0.00002,
|
||||
"input_cost_per_audio_per_second": 0.000002,
|
||||
"input_cost_per_token": 0.000000004688,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"input_cost_per_character": 0.00000001875,
|
||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||
"input_cost_per_image_above_128k_tokens": 0.00004,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
||||
"output_cost_per_token": 0.0000000046875,
|
||||
"output_cost_per_character": 0.00000001875,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
||||
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
||||
"output_cost_per_token": 0.0000003,
|
||||
"output_cost_per_character": 0.000000075,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
||||
"output_cost_per_character_above_128k_tokens": 0.00000015,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -2469,17 +2620,17 @@
|
|||
"input_cost_per_image": 0.00002,
|
||||
"input_cost_per_video_per_second": 0.00002,
|
||||
"input_cost_per_audio_per_second": 0.000002,
|
||||
"input_cost_per_token": 0.000000004688,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"input_cost_per_character": 0.00000001875,
|
||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||
"input_cost_per_image_above_128k_tokens": 0.00004,
|
||||
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
|
||||
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
|
||||
"output_cost_per_token": 0.0000000046875,
|
||||
"output_cost_per_character": 0.00000001875,
|
||||
"output_cost_per_token_above_128k_tokens": 0.000000009375,
|
||||
"output_cost_per_character_above_128k_tokens": 0.0000000375,
|
||||
"output_cost_per_token": 0.0000003,
|
||||
"output_cost_per_character": 0.000000075,
|
||||
"output_cost_per_token_above_128k_tokens": 0.0000006,
|
||||
"output_cost_per_character_above_128k_tokens": 0.00000015,
|
||||
"litellm_provider": "vertex_ai-language-models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
|
@ -2501,7 +2652,7 @@
|
|||
"input_cost_per_image": 0.00002,
|
||||
"input_cost_per_video_per_second": 0.00002,
|
||||
"input_cost_per_audio_per_second": 0.000002,
|
||||
"input_cost_per_token": 0.000000004688,
|
||||
"input_cost_per_token": 0.000000075,
|
||||
"input_cost_per_character": 0.00000001875,
|
||||
"input_cost_per_token_above_128k_tokens": 0.000001,
|
||||
"input_cost_per_character_above_128k_tokens": 0.00000025,
|
||||
|
@ -2714,14 +2865,15 @@
|
|||
"source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
|
||||
},
|
||||
"vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": {
|
||||
"max_tokens": 8192,
|
||||
"max_tokens": 128000,
|
||||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 8192,
|
||||
"max_output_tokens": 2048,
|
||||
"input_cost_per_token": 0.0,
|
||||
"output_cost_per_token": 0.0,
|
||||
"litellm_provider": "vertex_ai-llama_models",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true,
|
||||
"supports_vision": true,
|
||||
"source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas"
|
||||
},
|
||||
"vertex_ai/mistral-large@latest": {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue