From 930606ad6331646b958c98b77654ea2a257f07a0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 5 Oct 2024 13:22:06 +0530 Subject: [PATCH] add azure o1 models to model cost map (#6075) --- ...odel_prices_and_context_window_backup.json | 72 +++++++++++++++++-- model_prices_and_context_window.json | 72 +++++++++++++++++-- 2 files changed, 136 insertions(+), 8 deletions(-) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 4a6e8b691..6332fac25 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -83,11 +83,13 @@ "max_output_tokens": 65536, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000012, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "o1-preview": { "max_tokens": 32768, @@ -109,11 +111,13 @@ "max_output_tokens": 32768, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "chatgpt-4o-latest": { "max_tokens": 4096, @@ -582,17 +586,75 @@ "output_cost_per_second": 0.0001, "litellm_provider": "azure" }, + "azure/o1-mini": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "cache_read_input_token_cost": 0.0000015, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true + }, + "azure/o1-mini-2024-09-12": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "cache_read_input_token_cost": 0.0000015, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true + }, + "azure/o1-preview": { + "max_tokens": 32768, + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true + }, + "azure/o1-preview-2024-09-12": { + "max_tokens": 32768, + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true + }, "azure/gpt-4o": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.000005, "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "azure/gpt-4o-2024-08-06": { "max_tokens": 16384, @@ -636,11 +698,13 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.000000165, "output_cost_per_token": 0.00000066, + "cache_read_input_token_cost": 0.000000075, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "azure/gpt-4-turbo-2024-04-09": { "max_tokens": 4096, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 4a6e8b691..6332fac25 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -83,11 +83,13 @@ "max_output_tokens": 65536, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000012, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "o1-preview": { "max_tokens": 32768, @@ -109,11 +111,13 @@ "max_output_tokens": 32768, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "chatgpt-4o-latest": { "max_tokens": 4096, @@ -582,17 +586,75 @@ "output_cost_per_second": 0.0001, "litellm_provider": "azure" }, + "azure/o1-mini": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "cache_read_input_token_cost": 0.0000015, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true + }, + "azure/o1-mini-2024-09-12": { + "max_tokens": 65536, + "max_input_tokens": 128000, + "max_output_tokens": 65536, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "cache_read_input_token_cost": 0.0000015, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true + }, + "azure/o1-preview": { + "max_tokens": 32768, + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true + }, + "azure/o1-preview-2024-09-12": { + "max_tokens": 32768, + "max_input_tokens": 128000, + "max_output_tokens": 32768, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000060, + "cache_read_input_token_cost": 0.0000075, + "litellm_provider": "azure", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true, + "supports_prompt_caching": true + }, "azure/gpt-4o": { "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, "input_cost_per_token": 0.000005, "output_cost_per_token": 0.000015, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "azure/gpt-4o-2024-08-06": { "max_tokens": 16384, @@ -636,11 +698,13 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.000000165, "output_cost_per_token": 0.00000066, + "cache_read_input_token_cost": 0.000000075, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, "supports_parallel_function_calling": true, - "supports_vision": true + "supports_vision": true, + "supports_prompt_caching": true }, "azure/gpt-4-turbo-2024-04-09": { "max_tokens": 4096,