From 4a90bd03e883992eeec88ce92884ea0987f48bcd Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 23 Dec 2024 17:42:58 -0800 Subject: [PATCH] (Feat) Add input_cost_per_token_batches, output_cost_per_token_batches for OpenAI cost tracking Batches API (#7391) * add input_cost_per_token_batches * input_cost_per_token_batches --- cookbook/misc/update_json_caching.py | 54 +++++++++++++++++++ ...odel_prices_and_context_window_backup.json | 30 ++++++++++- model_prices_and_context_window.json | 30 ++++++++++- 3 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 cookbook/misc/update_json_caching.py diff --git a/cookbook/misc/update_json_caching.py b/cookbook/misc/update_json_caching.py new file mode 100644 index 0000000000..8202d7033f --- /dev/null +++ b/cookbook/misc/update_json_caching.py @@ -0,0 +1,54 @@ +import json + +# List of models to update +models_to_update = [ + "gpt-4o-mini", + "gpt-4o-mini-2024-07-18", + "gpt-4o", + "gpt-4o-2024-11-20", + "gpt-4o-2024-08-06", + "gpt-4o-2024-05-13", + "text-embedding-3-small", + "text-embedding-3-large", + "text-embedding-ada-002-v2", + "ft:gpt-4o-2024-08-06", + "ft:gpt-4o-mini-2024-07-18", + "ft:gpt-3.5-turbo", + "ft:davinci-002", + "ft:babbage-002", +] + + +def update_model_prices(file_path): + # Read the JSON file as text first to preserve number formatting + with open(file_path, "r") as file: + original_text = file.read() + data = json.loads(original_text) + + # Update specified models + for model_name in models_to_update: + print("finding model", model_name) + if model_name in data: + print("found model") + model = data[model_name] + if "input_cost_per_token" in model: + # Format new values to match original style + model["input_cost_per_token_batches"] = float( + "{:.12f}".format(model["input_cost_per_token"] / 2) + ) + if "output_cost_per_token" in model: + model["output_cost_per_token_batches"] = float( + "{:.12f}".format(model["output_cost_per_token"] / 2) + ) + print("new pricing for model=") + # Convert all float values to full decimal format before printing + formatted_model = { + k: "{:.9f}".format(v) if isinstance(v, float) else v + for k, v in data[model_name].items() + } + print(json.dumps(formatted_model, indent=4)) + + +# Run the update +file_path = "model_prices_and_context_window.json" +update_model_prices(file_path) diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index bf6f78d187..29604251df 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -104,6 +104,8 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.0000025, "output_cost_per_token": 0.000010, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.00000500, "cache_read_input_token_cost": 0.00000125, "litellm_provider": "openai", "mode": "chat", @@ -168,6 +170,8 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.00000015, "output_cost_per_token": 0.00000060, + "input_cost_per_token_batches": 0.000000075, + "output_cost_per_token_batches": 0.00000030, "cache_read_input_token_cost": 0.000000075, "litellm_provider": "openai", "mode": "chat", @@ -184,6 +188,8 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.00000015, "output_cost_per_token": 0.00000060, + "input_cost_per_token_batches": 0.000000075, + "output_cost_per_token_batches": 0.00000030, "cache_read_input_token_cost": 0.000000075, "litellm_provider": "openai", "mode": "chat", @@ -294,6 +300,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000005, "output_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000025, + "output_cost_per_token_batches": 0.0000075, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -308,6 +316,8 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.0000025, "output_cost_per_token": 0.000010, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.0000050, "cache_read_input_token_cost": 0.00000125, "litellm_provider": "openai", "mode": "chat", @@ -324,6 +334,8 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.0000025, "output_cost_per_token": 0.000010, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.0000050, "cache_read_input_token_cost": 0.00000125, "litellm_provider": "openai", "mode": "chat", @@ -658,6 +670,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000006, + "input_cost_per_token_batches": 0.0000015, + "output_cost_per_token_batches": 0.000003, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true @@ -710,6 +724,8 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.00000375, "output_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.000001875, + "output_cost_per_token_batches": 0.000007500, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -739,8 +755,10 @@ "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 0.0000003, - "cache_read_input_token_cost": 0.00000015, "output_cost_per_token": 0.0000012, + "input_cost_per_token_batches": 0.000000150, + "output_cost_per_token_batches": 0.000000600, + "cache_read_input_token_cost": 0.00000015, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -756,6 +774,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, + "output_cost_per_token_batches": 0.000001, "litellm_provider": "text-completion-openai", "mode": "completion" }, @@ -765,6 +785,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.0000004, "output_cost_per_token": 0.0000004, + "input_cost_per_token_batches": 0.0000002, + "output_cost_per_token_batches": 0.0000002, "litellm_provider": "text-completion-openai", "mode": "completion" }, @@ -774,6 +796,8 @@ "output_vector_size": 3072, "input_cost_per_token": 0.00000013, "output_cost_per_token": 0.000000, + "input_cost_per_token_batches": 0.000000065, + "output_cost_per_token_batches": 0.000000000, "litellm_provider": "openai", "mode": "embedding" }, @@ -783,6 +807,8 @@ "output_vector_size": 1536, "input_cost_per_token": 0.00000002, "output_cost_per_token": 0.000000, + "input_cost_per_token_batches": 0.000000010, + "output_cost_per_token_batches": 0.000000000, "litellm_provider": "openai", "mode": "embedding" }, @@ -800,6 +826,8 @@ "max_input_tokens": 8191, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000, + "input_cost_per_token_batches": 0.000000050, + "output_cost_per_token_batches": 0.000000000, "litellm_provider": "openai", "mode": "embedding" }, diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index bf6f78d187..29604251df 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -104,6 +104,8 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.0000025, "output_cost_per_token": 0.000010, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.00000500, "cache_read_input_token_cost": 0.00000125, "litellm_provider": "openai", "mode": "chat", @@ -168,6 +170,8 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.00000015, "output_cost_per_token": 0.00000060, + "input_cost_per_token_batches": 0.000000075, + "output_cost_per_token_batches": 0.00000030, "cache_read_input_token_cost": 0.000000075, "litellm_provider": "openai", "mode": "chat", @@ -184,6 +188,8 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.00000015, "output_cost_per_token": 0.00000060, + "input_cost_per_token_batches": 0.000000075, + "output_cost_per_token_batches": 0.00000030, "cache_read_input_token_cost": 0.000000075, "litellm_provider": "openai", "mode": "chat", @@ -294,6 +300,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000005, "output_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000025, + "output_cost_per_token_batches": 0.0000075, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -308,6 +316,8 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.0000025, "output_cost_per_token": 0.000010, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.0000050, "cache_read_input_token_cost": 0.00000125, "litellm_provider": "openai", "mode": "chat", @@ -324,6 +334,8 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.0000025, "output_cost_per_token": 0.000010, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.0000050, "cache_read_input_token_cost": 0.00000125, "litellm_provider": "openai", "mode": "chat", @@ -658,6 +670,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000006, + "input_cost_per_token_batches": 0.0000015, + "output_cost_per_token_batches": 0.000003, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true @@ -710,6 +724,8 @@ "max_output_tokens": 16384, "input_cost_per_token": 0.00000375, "output_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.000001875, + "output_cost_per_token_batches": 0.000007500, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -739,8 +755,10 @@ "max_input_tokens": 128000, "max_output_tokens": 16384, "input_cost_per_token": 0.0000003, - "cache_read_input_token_cost": 0.00000015, "output_cost_per_token": 0.0000012, + "input_cost_per_token_batches": 0.000000150, + "output_cost_per_token_batches": 0.000000600, + "cache_read_input_token_cost": 0.00000015, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -756,6 +774,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, + "output_cost_per_token_batches": 0.000001, "litellm_provider": "text-completion-openai", "mode": "completion" }, @@ -765,6 +785,8 @@ "max_output_tokens": 4096, "input_cost_per_token": 0.0000004, "output_cost_per_token": 0.0000004, + "input_cost_per_token_batches": 0.0000002, + "output_cost_per_token_batches": 0.0000002, "litellm_provider": "text-completion-openai", "mode": "completion" }, @@ -774,6 +796,8 @@ "output_vector_size": 3072, "input_cost_per_token": 0.00000013, "output_cost_per_token": 0.000000, + "input_cost_per_token_batches": 0.000000065, + "output_cost_per_token_batches": 0.000000000, "litellm_provider": "openai", "mode": "embedding" }, @@ -783,6 +807,8 @@ "output_vector_size": 1536, "input_cost_per_token": 0.00000002, "output_cost_per_token": 0.000000, + "input_cost_per_token_batches": 0.000000010, + "output_cost_per_token_batches": 0.000000000, "litellm_provider": "openai", "mode": "embedding" }, @@ -800,6 +826,8 @@ "max_input_tokens": 8191, "input_cost_per_token": 0.0000001, "output_cost_per_token": 0.000000, + "input_cost_per_token_batches": 0.000000050, + "output_cost_per_token_batches": 0.000000000, "litellm_provider": "openai", "mode": "embedding" },