(Feat) Add input_cost_per_token_batches, output_cost_per_token_batches for OpenAI cost tracking Batches API (#7391)

* add input_cost_per_token_batches

* input_cost_per_token_batches
This commit is contained in:
Ishaan Jaff 2024-12-23 17:42:58 -08:00 committed by GitHub
parent 5ba0fb27d8
commit 4a90bd03e8
3 changed files with 112 additions and 2 deletions

View file

@ -0,0 +1,54 @@
import json
# List of models to update
models_to_update = [
"gpt-4o-mini",
"gpt-4o-mini-2024-07-18",
"gpt-4o",
"gpt-4o-2024-11-20",
"gpt-4o-2024-08-06",
"gpt-4o-2024-05-13",
"text-embedding-3-small",
"text-embedding-3-large",
"text-embedding-ada-002-v2",
"ft:gpt-4o-2024-08-06",
"ft:gpt-4o-mini-2024-07-18",
"ft:gpt-3.5-turbo",
"ft:davinci-002",
"ft:babbage-002",
]
def update_model_prices(file_path):
# Read the JSON file as text first to preserve number formatting
with open(file_path, "r") as file:
original_text = file.read()
data = json.loads(original_text)
# Update specified models
for model_name in models_to_update:
print("finding model", model_name)
if model_name in data:
print("found model")
model = data[model_name]
if "input_cost_per_token" in model:
# Format new values to match original style
model["input_cost_per_token_batches"] = float(
"{:.12f}".format(model["input_cost_per_token"] / 2)
)
if "output_cost_per_token" in model:
model["output_cost_per_token_batches"] = float(
"{:.12f}".format(model["output_cost_per_token"] / 2)
)
print("new pricing for model=")
# Convert all float values to full decimal format before printing
formatted_model = {
k: "{:.9f}".format(v) if isinstance(v, float) else v
for k, v in data[model_name].items()
}
print(json.dumps(formatted_model, indent=4))
# Run the update
file_path = "model_prices_and_context_window.json"
update_model_prices(file_path)

View file

@ -104,6 +104,8 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000025,
"output_cost_per_token": 0.000010,
"input_cost_per_token_batches": 0.00000125,
"output_cost_per_token_batches": 0.00000500,
"cache_read_input_token_cost": 0.00000125,
"litellm_provider": "openai",
"mode": "chat",
@ -168,6 +170,8 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060,
"input_cost_per_token_batches": 0.000000075,
"output_cost_per_token_batches": 0.00000030,
"cache_read_input_token_cost": 0.000000075,
"litellm_provider": "openai",
"mode": "chat",
@ -184,6 +188,8 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060,
"input_cost_per_token_batches": 0.000000075,
"output_cost_per_token_batches": 0.00000030,
"cache_read_input_token_cost": 0.000000075,
"litellm_provider": "openai",
"mode": "chat",
@ -294,6 +300,8 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015,
"input_cost_per_token_batches": 0.0000025,
"output_cost_per_token_batches": 0.0000075,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
@ -308,6 +316,8 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000025,
"output_cost_per_token": 0.000010,
"input_cost_per_token_batches": 0.00000125,
"output_cost_per_token_batches": 0.0000050,
"cache_read_input_token_cost": 0.00000125,
"litellm_provider": "openai",
"mode": "chat",
@ -324,6 +334,8 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000025,
"output_cost_per_token": 0.000010,
"input_cost_per_token_batches": 0.00000125,
"output_cost_per_token_batches": 0.0000050,
"cache_read_input_token_cost": 0.00000125,
"litellm_provider": "openai",
"mode": "chat",
@ -658,6 +670,8 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006,
"input_cost_per_token_batches": 0.0000015,
"output_cost_per_token_batches": 0.000003,
"litellm_provider": "openai",
"mode": "chat",
"supports_system_messages": true
@ -710,6 +724,8 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.00000375,
"output_cost_per_token": 0.000015,
"input_cost_per_token_batches": 0.000001875,
"output_cost_per_token_batches": 0.000007500,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
@ -739,8 +755,10 @@
"max_input_tokens": 128000,
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000003,
"cache_read_input_token_cost": 0.00000015,
"output_cost_per_token": 0.0000012,
"input_cost_per_token_batches": 0.000000150,
"output_cost_per_token_batches": 0.000000600,
"cache_read_input_token_cost": 0.00000015,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
@ -756,6 +774,8 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
"input_cost_per_token_batches": 0.000001,
"output_cost_per_token_batches": 0.000001,
"litellm_provider": "text-completion-openai",
"mode": "completion"
},
@ -765,6 +785,8 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000004,
"output_cost_per_token": 0.0000004,
"input_cost_per_token_batches": 0.0000002,
"output_cost_per_token_batches": 0.0000002,
"litellm_provider": "text-completion-openai",
"mode": "completion"
},
@ -774,6 +796,8 @@
"output_vector_size": 3072,
"input_cost_per_token": 0.00000013,
"output_cost_per_token": 0.000000,
"input_cost_per_token_batches": 0.000000065,
"output_cost_per_token_batches": 0.000000000,
"litellm_provider": "openai",
"mode": "embedding"
},
@ -783,6 +807,8 @@
"output_vector_size": 1536,
"input_cost_per_token": 0.00000002,
"output_cost_per_token": 0.000000,
"input_cost_per_token_batches": 0.000000010,
"output_cost_per_token_batches": 0.000000000,
"litellm_provider": "openai",
"mode": "embedding"
},
@ -800,6 +826,8 @@
"max_input_tokens": 8191,
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.000000,
"input_cost_per_token_batches": 0.000000050,
"output_cost_per_token_batches": 0.000000000,
"litellm_provider": "openai",
"mode": "embedding"
},

View file

@ -104,6 +104,8 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000025,
"output_cost_per_token": 0.000010,
"input_cost_per_token_batches": 0.00000125,
"output_cost_per_token_batches": 0.00000500,
"cache_read_input_token_cost": 0.00000125,
"litellm_provider": "openai",
"mode": "chat",
@ -168,6 +170,8 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060,
"input_cost_per_token_batches": 0.000000075,
"output_cost_per_token_batches": 0.00000030,
"cache_read_input_token_cost": 0.000000075,
"litellm_provider": "openai",
"mode": "chat",
@ -184,6 +188,8 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000060,
"input_cost_per_token_batches": 0.000000075,
"output_cost_per_token_batches": 0.00000030,
"cache_read_input_token_cost": 0.000000075,
"litellm_provider": "openai",
"mode": "chat",
@ -294,6 +300,8 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015,
"input_cost_per_token_batches": 0.0000025,
"output_cost_per_token_batches": 0.0000075,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
@ -308,6 +316,8 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000025,
"output_cost_per_token": 0.000010,
"input_cost_per_token_batches": 0.00000125,
"output_cost_per_token_batches": 0.0000050,
"cache_read_input_token_cost": 0.00000125,
"litellm_provider": "openai",
"mode": "chat",
@ -324,6 +334,8 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000025,
"output_cost_per_token": 0.000010,
"input_cost_per_token_batches": 0.00000125,
"output_cost_per_token_batches": 0.0000050,
"cache_read_input_token_cost": 0.00000125,
"litellm_provider": "openai",
"mode": "chat",
@ -658,6 +670,8 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.000003,
"output_cost_per_token": 0.000006,
"input_cost_per_token_batches": 0.0000015,
"output_cost_per_token_batches": 0.000003,
"litellm_provider": "openai",
"mode": "chat",
"supports_system_messages": true
@ -710,6 +724,8 @@
"max_output_tokens": 16384,
"input_cost_per_token": 0.00000375,
"output_cost_per_token": 0.000015,
"input_cost_per_token_batches": 0.000001875,
"output_cost_per_token_batches": 0.000007500,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
@ -739,8 +755,10 @@
"max_input_tokens": 128000,
"max_output_tokens": 16384,
"input_cost_per_token": 0.0000003,
"cache_read_input_token_cost": 0.00000015,
"output_cost_per_token": 0.0000012,
"input_cost_per_token_batches": 0.000000150,
"output_cost_per_token_batches": 0.000000600,
"cache_read_input_token_cost": 0.00000015,
"litellm_provider": "openai",
"mode": "chat",
"supports_function_calling": true,
@ -756,6 +774,8 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.000002,
"output_cost_per_token": 0.000002,
"input_cost_per_token_batches": 0.000001,
"output_cost_per_token_batches": 0.000001,
"litellm_provider": "text-completion-openai",
"mode": "completion"
},
@ -765,6 +785,8 @@
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000004,
"output_cost_per_token": 0.0000004,
"input_cost_per_token_batches": 0.0000002,
"output_cost_per_token_batches": 0.0000002,
"litellm_provider": "text-completion-openai",
"mode": "completion"
},
@ -774,6 +796,8 @@
"output_vector_size": 3072,
"input_cost_per_token": 0.00000013,
"output_cost_per_token": 0.000000,
"input_cost_per_token_batches": 0.000000065,
"output_cost_per_token_batches": 0.000000000,
"litellm_provider": "openai",
"mode": "embedding"
},
@ -783,6 +807,8 @@
"output_vector_size": 1536,
"input_cost_per_token": 0.00000002,
"output_cost_per_token": 0.000000,
"input_cost_per_token_batches": 0.000000010,
"output_cost_per_token_batches": 0.000000000,
"litellm_provider": "openai",
"mode": "embedding"
},
@ -800,6 +826,8 @@
"max_input_tokens": 8191,
"input_cost_per_token": 0.0000001,
"output_cost_per_token": 0.000000,
"input_cost_per_token_batches": 0.000000050,
"output_cost_per_token_batches": 0.000000000,
"litellm_provider": "openai",
"mode": "embedding"
},