Model pricing updates for Azure & VertexAI (#10178)
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 23s
Helm unit test / unit-test (push) Successful in 27s

This commit is contained in:
Marty Sullivan 2025-04-20 14:33:45 -04:00 committed by GitHub
parent 1ff7625984
commit 0b63c7a2eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 192 additions and 0 deletions

View file

@ -1472,6 +1472,73 @@
"litellm_provider": "openai",
"supported_endpoints": ["/v1/audio/speech"]
},
"azure/computer-use-preview": {
"max_tokens": 1024,
"max_input_tokens": 8192,
"max_output_tokens": 1024,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000012,
"litellm_provider": "azure",
"mode": "chat",
"supported_endpoints": ["/v1/responses"],
"supported_modalities": ["text", "image"],
"supported_output_modalities": ["text"],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": false,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_native_streaming": false,
"supports_reasoning": true
},
"azure/gpt-4o-audio-preview-2024-12-17": {
"max_tokens": 16384,
"max_input_tokens": 128000,
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000025,
"input_cost_per_audio_token": 0.00004,
"output_cost_per_token": 0.00001,
"output_cost_per_audio_token": 0.00008,
"litellm_provider": "azure",
"mode": "chat",
"supported_endpoints": ["/v1/chat/completions"],
"supported_modalities": ["text", "audio"],
"supported_output_modalities": ["text", "audio"],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_response_schema": false,
"supports_vision": false,
"supports_prompt_caching": false,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_native_streaming": true,
"supports_reasoning": false
},
"azure/gpt-4o-mini-audio-preview-2024-12-17": {
"max_tokens": 16384,
"max_input_tokens": 128000,
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000025,
"input_cost_per_audio_token": 0.00004,
"output_cost_per_token": 0.00001,
"output_cost_per_audio_token": 0.00008,
"litellm_provider": "azure",
"mode": "chat",
"supported_endpoints": ["/v1/chat/completions"],
"supported_modalities": ["text", "audio"],
"supported_output_modalities": ["text", "audio"],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_response_schema": false,
"supports_vision": false,
"supports_prompt_caching": false,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_native_streaming": true,
"supports_reasoning": false
},
"azure/gpt-4.1": {
"max_tokens": 32768,
"max_input_tokens": 1047576,
@ -5465,6 +5532,35 @@
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
"gemini-2.5-pro-preview-03-25": {
"max_tokens": 65536,
"max_input_tokens": 1048576,
"max_output_tokens": 65536,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_audio_token": 0.00000125,
"input_cost_per_token": 0.00000125,
"input_cost_per_token_above_200k_tokens": 0.0000025,
"output_cost_per_token": 0.00001,
"output_cost_per_token_above_200k_tokens": 0.000015,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_reasoning": true,
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": false,
"supports_tool_choice": true,
"supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"],
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview"
},
"gemini/gemini-2.0-pro-exp-02-05": {
"max_tokens": 8192,
"max_input_tokens": 2097152,

View file

@ -1472,6 +1472,73 @@
"litellm_provider": "openai",
"supported_endpoints": ["/v1/audio/speech"]
},
"azure/computer-use-preview": {
"max_tokens": 1024,
"max_input_tokens": 8192,
"max_output_tokens": 1024,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000012,
"litellm_provider": "azure",
"mode": "chat",
"supported_endpoints": ["/v1/responses"],
"supported_modalities": ["text", "image"],
"supported_output_modalities": ["text"],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_response_schema": true,
"supports_vision": true,
"supports_prompt_caching": false,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_native_streaming": false,
"supports_reasoning": true
},
"azure/gpt-4o-audio-preview-2024-12-17": {
"max_tokens": 16384,
"max_input_tokens": 128000,
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000025,
"input_cost_per_audio_token": 0.00004,
"output_cost_per_token": 0.00001,
"output_cost_per_audio_token": 0.00008,
"litellm_provider": "azure",
"mode": "chat",
"supported_endpoints": ["/v1/chat/completions"],
"supported_modalities": ["text", "audio"],
"supported_output_modalities": ["text", "audio"],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_response_schema": false,
"supports_vision": false,
"supports_prompt_caching": false,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_native_streaming": true,
"supports_reasoning": false
},
"azure/gpt-4o-mini-audio-preview-2024-12-17": {
"max_tokens": 16384,
"max_input_tokens": 128000,
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000025,
"input_cost_per_audio_token": 0.00004,
"output_cost_per_token": 0.00001,
"output_cost_per_audio_token": 0.00008,
"litellm_provider": "azure",
"mode": "chat",
"supported_endpoints": ["/v1/chat/completions"],
"supported_modalities": ["text", "audio"],
"supported_output_modalities": ["text", "audio"],
"supports_function_calling": true,
"supports_parallel_function_calling": true,
"supports_response_schema": false,
"supports_vision": false,
"supports_prompt_caching": false,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_native_streaming": true,
"supports_reasoning": false
},
"azure/gpt-4.1": {
"max_tokens": 32768,
"max_input_tokens": 1047576,
@ -5465,6 +5532,35 @@
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
"supports_tool_choice": true
},
"gemini-2.5-pro-preview-03-25": {
"max_tokens": 65536,
"max_input_tokens": 1048576,
"max_output_tokens": 65536,
"max_images_per_prompt": 3000,
"max_videos_per_prompt": 10,
"max_video_length": 1,
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_audio_token": 0.00000125,
"input_cost_per_token": 0.00000125,
"input_cost_per_token_above_200k_tokens": 0.0000025,
"output_cost_per_token": 0.00001,
"output_cost_per_token_above_200k_tokens": 0.000015,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_reasoning": true,
"supports_system_messages": true,
"supports_function_calling": true,
"supports_vision": true,
"supports_response_schema": true,
"supports_audio_output": false,
"supports_tool_choice": true,
"supported_endpoints": ["/v1/chat/completions", "/v1/completions", "/v1/batch"],
"supported_modalities": ["text", "image", "audio", "video"],
"supported_output_modalities": ["text"],
"source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview"
},
"gemini/gemini-2.0-pro-exp-02-05": {
"max_tokens": 8192,
"max_input_tokens": 2097152,