diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index a37a431dc..cfc2cef72 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -80,6 +80,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true, "supports_prompt_caching": true }, @@ -94,6 +95,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true, "supports_prompt_caching": true }, @@ -108,7 +110,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true, + "supports_vision": false, "supports_prompt_caching": true }, "o1-mini-2024-09-12": { @@ -122,7 +124,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true, + "supports_vision": false, "supports_prompt_caching": true }, "o1-preview": { @@ -136,7 +138,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true, + "supports_vision": false, "supports_prompt_caching": true }, "o1-preview-2024-09-12": { @@ -150,7 +152,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true, + "supports_vision": false, "supports_prompt_caching": true }, "chatgpt-4o-latest": { @@ -190,6 +192,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true, "supports_prompt_caching": true }, @@ -461,6 +464,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true }, "ft:gpt-4o-mini-2024-07-18": { @@ -473,6 +477,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true }, "ft:davinci-002": { @@ -652,7 +657,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true, + "supports_vision": false, "supports_prompt_caching": true }, "azure/o1-mini-2024-09-12": { @@ -666,7 +671,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true, + "supports_vision": false, "supports_prompt_caching": true }, "azure/o1-preview": { @@ -680,7 +685,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true, + "supports_vision": false, "supports_prompt_caching": true }, "azure/o1-preview-2024-09-12": { @@ -694,7 +699,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true, + "supports_vision": false, "supports_prompt_caching": true }, "azure/gpt-4o": { @@ -721,6 +726,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true }, "azure/gpt-4o-2024-05-13": { @@ -746,6 +752,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true }, "azure/global-standard/gpt-4o-mini": { @@ -758,6 +765,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true }, "azure/gpt-4o-mini": { @@ -771,6 +779,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true, "supports_prompt_caching": true }, @@ -785,6 +794,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, + "supports_response_schema": true, "supports_vision": true, "supports_prompt_caching": true }, @@ -1109,6 +1119,52 @@ "supports_function_calling": true, "mode": "chat" }, + "azure_ai/mistral-large-2407": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "mode": "chat", + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.mistral-ai-large-2407-offer?tab=Overview" + }, + "azure_ai/ministral-3b": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000004, + "output_cost_per_token": 0.00000004, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "mode": "chat", + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/000-000.ministral-3b-2410-offer?tab=Overview" + }, + "azure_ai/Llama-3.2-11B-Vision-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000037, + "output_cost_per_token": 0.00000037, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "supports_vision": true, + "mode": "chat", + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-11b-vision-instruct-offer?tab=Overview" + }, + "azure_ai/Llama-3.2-90B-Vision-Instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000204, + "output_cost_per_token": 0.00000204, + "litellm_provider": "azure_ai", + "supports_function_calling": true, + "supports_vision": true, + "mode": "chat", + "source": "https://azuremarketplace.microsoft.com/en/marketplace/apps/metagenai.meta-llama-3-2-90b-vision-instruct-offer?tab=Overview" + }, "azure_ai/Meta-Llama-3-70B-Instruct": { "max_tokens": 8192, "max_input_tokens": 8192, @@ -1148,6 +1204,105 @@ "mode": "chat", "source":"https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice" }, + "azure_ai/Phi-3.5-mini-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000052, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3.5-vision-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000052, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": true, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3.5-MoE-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000016, + "output_cost_per_token": 0.00000064, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-mini-4k-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000052, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-mini-128k-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000052, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-small-8k-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000006, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-small-128k-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.0000006, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-medium-4k-instruct": { + "max_tokens": 4096, + "max_input_tokens": 4096, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000017, + "output_cost_per_token": 0.00000068, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, + "azure_ai/Phi-3-medium-128k-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00000017, + "output_cost_per_token": 0.00000068, + "litellm_provider": "azure_ai", + "mode": "chat", + "supports_vision": false, + "source": "https://azure.microsoft.com/en-us/pricing/details/phi-3/" + }, "azure_ai/cohere-rerank-v3-multilingual": { "max_tokens": 4096, "max_input_tokens": 4096, @@ -2208,16 +2363,16 @@ "input_cost_per_image": 0.00032875, "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_video_per_second": 0.00032875, - "input_cost_per_token": 0.000000078125, - "input_cost_per_character": 0.0000003125, + "input_cost_per_token": 0.00000125, + "input_cost_per_character": 0.0000003125, "input_cost_per_image_above_128k_tokens": 0.0006575, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, - "input_cost_per_token_above_128k_tokens": 0.00000015625, - "input_cost_per_character_above_128k_tokens": 0.000000625, - "output_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.000005, "output_cost_per_character": 0.00000125, - "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token_above_128k_tokens": 0.00001, "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", @@ -2234,16 +2389,16 @@ "input_cost_per_image": 0.00032875, "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_video_per_second": 0.00032875, - "input_cost_per_token": 0.000000078125, - "input_cost_per_character": 0.0000003125, + "input_cost_per_token": 0.00000125, + "input_cost_per_character": 0.0000003125, "input_cost_per_image_above_128k_tokens": 0.0006575, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, - "input_cost_per_token_above_128k_tokens": 0.00000015625, - "input_cost_per_character_above_128k_tokens": 0.000000625, - "output_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.000005, "output_cost_per_character": 0.00000125, - "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token_above_128k_tokens": 0.00001, "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", @@ -2260,16 +2415,16 @@ "input_cost_per_image": 0.00032875, "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_video_per_second": 0.00032875, - "input_cost_per_token": 0.000000078125, - "input_cost_per_character": 0.0000003125, + "input_cost_per_token": 0.00000125, + "input_cost_per_character": 0.0000003125, "input_cost_per_image_above_128k_tokens": 0.0006575, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, - "input_cost_per_token_above_128k_tokens": 0.00000015625, - "input_cost_per_character_above_128k_tokens": 0.000000625, - "output_cost_per_token": 0.0000003125, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_character_above_128k_tokens": 0.000000625, + "output_cost_per_token": 0.000005, "output_cost_per_character": 0.00000125, - "output_cost_per_token_above_128k_tokens": 0.000000625, + "output_cost_per_token_above_128k_tokens": 0.00001, "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", @@ -2369,17 +2524,17 @@ "input_cost_per_image": 0.00002, "input_cost_per_video_per_second": 0.00002, "input_cost_per_audio_per_second": 0.000002, - "input_cost_per_token": 0.000000004688, + "input_cost_per_token": 0.000000075, "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, - "output_cost_per_token": 0.0000000046875, - "output_cost_per_character": 0.00000001875, - "output_cost_per_token_above_128k_tokens": 0.000000009375, - "output_cost_per_character_above_128k_tokens": 0.0000000375, + "output_cost_per_token": 0.0000003, + "output_cost_per_character": 0.000000075, + "output_cost_per_token_above_128k_tokens": 0.0000006, + "output_cost_per_character_above_128k_tokens": 0.00000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2433,17 +2588,17 @@ "input_cost_per_image": 0.00002, "input_cost_per_video_per_second": 0.00002, "input_cost_per_audio_per_second": 0.000002, - "input_cost_per_token": 0.000000004688, + "input_cost_per_token": 0.000000075, "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, - "output_cost_per_token": 0.0000000046875, - "output_cost_per_character": 0.00000001875, - "output_cost_per_token_above_128k_tokens": 0.000000009375, - "output_cost_per_character_above_128k_tokens": 0.0000000375, + "output_cost_per_token": 0.0000003, + "output_cost_per_character": 0.000000075, + "output_cost_per_token_above_128k_tokens": 0.0000006, + "output_cost_per_character_above_128k_tokens": 0.00000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2465,17 +2620,17 @@ "input_cost_per_image": 0.00002, "input_cost_per_video_per_second": 0.00002, "input_cost_per_audio_per_second": 0.000002, - "input_cost_per_token": 0.000000004688, + "input_cost_per_token": 0.000000075, "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, "input_cost_per_image_above_128k_tokens": 0.00004, "input_cost_per_video_per_second_above_128k_tokens": 0.00004, "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, - "output_cost_per_token": 0.0000000046875, - "output_cost_per_character": 0.00000001875, - "output_cost_per_token_above_128k_tokens": 0.000000009375, - "output_cost_per_character_above_128k_tokens": 0.0000000375, + "output_cost_per_token": 0.0000003, + "output_cost_per_character": 0.000000075, + "output_cost_per_token_above_128k_tokens": 0.0000006, + "output_cost_per_character_above_128k_tokens": 0.00000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -2497,7 +2652,7 @@ "input_cost_per_image": 0.00002, "input_cost_per_video_per_second": 0.00002, "input_cost_per_audio_per_second": 0.000002, - "input_cost_per_token": 0.000000004688, + "input_cost_per_token": 0.000000075, "input_cost_per_character": 0.00000001875, "input_cost_per_token_above_128k_tokens": 0.000001, "input_cost_per_character_above_128k_tokens": 0.00000025, @@ -2710,14 +2865,15 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models" }, "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": { - "max_tokens": 8192, + "max_tokens": 128000, "max_input_tokens": 128000, - "max_output_tokens": 8192, + "max_output_tokens": 2048, "input_cost_per_token": 0.0, "output_cost_per_token": 0.0, "litellm_provider": "vertex_ai-llama_models", "mode": "chat", "supports_system_messages": true, + "supports_vision": true, "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas" }, "vertex_ai/mistral-large@latest": { @@ -3790,7 +3946,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": false }, "openrouter/openai/o1-mini-2024-09-12": { "max_tokens": 65536, @@ -3802,7 +3958,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": false }, "openrouter/openai/o1-preview": { "max_tokens": 32768, @@ -3814,7 +3970,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": false }, "openrouter/openai/o1-preview-2024-09-12": { "max_tokens": 32768, @@ -3826,7 +3982,7 @@ "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": false }, "openrouter/openai/gpt-4o": { "max_tokens": 4096, diff --git a/pyproject.toml b/pyproject.toml index 8681486e8..c083db2a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.52.1" +version = "1.52.2" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -91,7 +91,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.52.1" +version = "1.52.2" version_files = [ "pyproject.toml:^version" ]