mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
(Feat) Add input_cost_per_token_batches, output_cost_per_token_batches for OpenAI cost tracking Batches API (#7391)
* add input_cost_per_token_batches * input_cost_per_token_batches
This commit is contained in:
parent
5ba0fb27d8
commit
4a90bd03e8
3 changed files with 112 additions and 2 deletions
54
cookbook/misc/update_json_caching.py
Normal file
54
cookbook/misc/update_json_caching.py
Normal file
|
@ -0,0 +1,54 @@
|
|||
import json
|
||||
|
||||
# List of models to update
|
||||
models_to_update = [
|
||||
"gpt-4o-mini",
|
||||
"gpt-4o-mini-2024-07-18",
|
||||
"gpt-4o",
|
||||
"gpt-4o-2024-11-20",
|
||||
"gpt-4o-2024-08-06",
|
||||
"gpt-4o-2024-05-13",
|
||||
"text-embedding-3-small",
|
||||
"text-embedding-3-large",
|
||||
"text-embedding-ada-002-v2",
|
||||
"ft:gpt-4o-2024-08-06",
|
||||
"ft:gpt-4o-mini-2024-07-18",
|
||||
"ft:gpt-3.5-turbo",
|
||||
"ft:davinci-002",
|
||||
"ft:babbage-002",
|
||||
]
|
||||
|
||||
|
||||
def update_model_prices(file_path):
|
||||
# Read the JSON file as text first to preserve number formatting
|
||||
with open(file_path, "r") as file:
|
||||
original_text = file.read()
|
||||
data = json.loads(original_text)
|
||||
|
||||
# Update specified models
|
||||
for model_name in models_to_update:
|
||||
print("finding model", model_name)
|
||||
if model_name in data:
|
||||
print("found model")
|
||||
model = data[model_name]
|
||||
if "input_cost_per_token" in model:
|
||||
# Format new values to match original style
|
||||
model["input_cost_per_token_batches"] = float(
|
||||
"{:.12f}".format(model["input_cost_per_token"] / 2)
|
||||
)
|
||||
if "output_cost_per_token" in model:
|
||||
model["output_cost_per_token_batches"] = float(
|
||||
"{:.12f}".format(model["output_cost_per_token"] / 2)
|
||||
)
|
||||
print("new pricing for model=")
|
||||
# Convert all float values to full decimal format before printing
|
||||
formatted_model = {
|
||||
k: "{:.9f}".format(v) if isinstance(v, float) else v
|
||||
for k, v in data[model_name].items()
|
||||
}
|
||||
print(json.dumps(formatted_model, indent=4))
|
||||
|
||||
|
||||
# Run the update
|
||||
file_path = "model_prices_and_context_window.json"
|
||||
update_model_prices(file_path)
|
|
@ -104,6 +104,8 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"output_cost_per_token": 0.000010,
|
||||
"input_cost_per_token_batches": 0.00000125,
|
||||
"output_cost_per_token_batches": 0.00000500,
|
||||
"cache_read_input_token_cost": 0.00000125,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
|
@ -168,6 +170,8 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000060,
|
||||
"input_cost_per_token_batches": 0.000000075,
|
||||
"output_cost_per_token_batches": 0.00000030,
|
||||
"cache_read_input_token_cost": 0.000000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
|
@ -184,6 +188,8 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000060,
|
||||
"input_cost_per_token_batches": 0.000000075,
|
||||
"output_cost_per_token_batches": 0.00000030,
|
||||
"cache_read_input_token_cost": 0.000000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
|
@ -294,6 +300,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000005,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"input_cost_per_token_batches": 0.0000025,
|
||||
"output_cost_per_token_batches": 0.0000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
|
@ -308,6 +316,8 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"output_cost_per_token": 0.000010,
|
||||
"input_cost_per_token_batches": 0.00000125,
|
||||
"output_cost_per_token_batches": 0.0000050,
|
||||
"cache_read_input_token_cost": 0.00000125,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
|
@ -324,6 +334,8 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"output_cost_per_token": 0.000010,
|
||||
"input_cost_per_token_batches": 0.00000125,
|
||||
"output_cost_per_token_batches": 0.0000050,
|
||||
"cache_read_input_token_cost": 0.00000125,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
|
@ -658,6 +670,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000006,
|
||||
"input_cost_per_token_batches": 0.0000015,
|
||||
"output_cost_per_token_batches": 0.000003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true
|
||||
|
@ -710,6 +724,8 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.00000375,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"input_cost_per_token_batches": 0.000001875,
|
||||
"output_cost_per_token_batches": 0.000007500,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
|
@ -739,8 +755,10 @@
|
|||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.0000003,
|
||||
"cache_read_input_token_cost": 0.00000015,
|
||||
"output_cost_per_token": 0.0000012,
|
||||
"input_cost_per_token_batches": 0.000000150,
|
||||
"output_cost_per_token_batches": 0.000000600,
|
||||
"cache_read_input_token_cost": 0.00000015,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
|
@ -756,6 +774,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000002,
|
||||
"input_cost_per_token_batches": 0.000001,
|
||||
"output_cost_per_token_batches": 0.000001,
|
||||
"litellm_provider": "text-completion-openai",
|
||||
"mode": "completion"
|
||||
},
|
||||
|
@ -765,6 +785,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.0000004,
|
||||
"output_cost_per_token": 0.0000004,
|
||||
"input_cost_per_token_batches": 0.0000002,
|
||||
"output_cost_per_token_batches": 0.0000002,
|
||||
"litellm_provider": "text-completion-openai",
|
||||
"mode": "completion"
|
||||
},
|
||||
|
@ -774,6 +796,8 @@
|
|||
"output_vector_size": 3072,
|
||||
"input_cost_per_token": 0.00000013,
|
||||
"output_cost_per_token": 0.000000,
|
||||
"input_cost_per_token_batches": 0.000000065,
|
||||
"output_cost_per_token_batches": 0.000000000,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "embedding"
|
||||
},
|
||||
|
@ -783,6 +807,8 @@
|
|||
"output_vector_size": 1536,
|
||||
"input_cost_per_token": 0.00000002,
|
||||
"output_cost_per_token": 0.000000,
|
||||
"input_cost_per_token_batches": 0.000000010,
|
||||
"output_cost_per_token_batches": 0.000000000,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "embedding"
|
||||
},
|
||||
|
@ -800,6 +826,8 @@
|
|||
"max_input_tokens": 8191,
|
||||
"input_cost_per_token": 0.0000001,
|
||||
"output_cost_per_token": 0.000000,
|
||||
"input_cost_per_token_batches": 0.000000050,
|
||||
"output_cost_per_token_batches": 0.000000000,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "embedding"
|
||||
},
|
||||
|
|
|
@ -104,6 +104,8 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"output_cost_per_token": 0.000010,
|
||||
"input_cost_per_token_batches": 0.00000125,
|
||||
"output_cost_per_token_batches": 0.00000500,
|
||||
"cache_read_input_token_cost": 0.00000125,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
|
@ -168,6 +170,8 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000060,
|
||||
"input_cost_per_token_batches": 0.000000075,
|
||||
"output_cost_per_token_batches": 0.00000030,
|
||||
"cache_read_input_token_cost": 0.000000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
|
@ -184,6 +188,8 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000060,
|
||||
"input_cost_per_token_batches": 0.000000075,
|
||||
"output_cost_per_token_batches": 0.00000030,
|
||||
"cache_read_input_token_cost": 0.000000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
|
@ -294,6 +300,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000005,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"input_cost_per_token_batches": 0.0000025,
|
||||
"output_cost_per_token_batches": 0.0000075,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
|
@ -308,6 +316,8 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"output_cost_per_token": 0.000010,
|
||||
"input_cost_per_token_batches": 0.00000125,
|
||||
"output_cost_per_token_batches": 0.0000050,
|
||||
"cache_read_input_token_cost": 0.00000125,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
|
@ -324,6 +334,8 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.0000025,
|
||||
"output_cost_per_token": 0.000010,
|
||||
"input_cost_per_token_batches": 0.00000125,
|
||||
"output_cost_per_token_batches": 0.0000050,
|
||||
"cache_read_input_token_cost": 0.00000125,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
|
@ -658,6 +670,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000003,
|
||||
"output_cost_per_token": 0.000006,
|
||||
"input_cost_per_token_batches": 0.0000015,
|
||||
"output_cost_per_token_batches": 0.000003,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_system_messages": true
|
||||
|
@ -710,6 +724,8 @@
|
|||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.00000375,
|
||||
"output_cost_per_token": 0.000015,
|
||||
"input_cost_per_token_batches": 0.000001875,
|
||||
"output_cost_per_token_batches": 0.000007500,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
|
@ -739,8 +755,10 @@
|
|||
"max_input_tokens": 128000,
|
||||
"max_output_tokens": 16384,
|
||||
"input_cost_per_token": 0.0000003,
|
||||
"cache_read_input_token_cost": 0.00000015,
|
||||
"output_cost_per_token": 0.0000012,
|
||||
"input_cost_per_token_batches": 0.000000150,
|
||||
"output_cost_per_token_batches": 0.000000600,
|
||||
"cache_read_input_token_cost": 0.00000015,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "chat",
|
||||
"supports_function_calling": true,
|
||||
|
@ -756,6 +774,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.000002,
|
||||
"output_cost_per_token": 0.000002,
|
||||
"input_cost_per_token_batches": 0.000001,
|
||||
"output_cost_per_token_batches": 0.000001,
|
||||
"litellm_provider": "text-completion-openai",
|
||||
"mode": "completion"
|
||||
},
|
||||
|
@ -765,6 +785,8 @@
|
|||
"max_output_tokens": 4096,
|
||||
"input_cost_per_token": 0.0000004,
|
||||
"output_cost_per_token": 0.0000004,
|
||||
"input_cost_per_token_batches": 0.0000002,
|
||||
"output_cost_per_token_batches": 0.0000002,
|
||||
"litellm_provider": "text-completion-openai",
|
||||
"mode": "completion"
|
||||
},
|
||||
|
@ -774,6 +796,8 @@
|
|||
"output_vector_size": 3072,
|
||||
"input_cost_per_token": 0.00000013,
|
||||
"output_cost_per_token": 0.000000,
|
||||
"input_cost_per_token_batches": 0.000000065,
|
||||
"output_cost_per_token_batches": 0.000000000,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "embedding"
|
||||
},
|
||||
|
@ -783,6 +807,8 @@
|
|||
"output_vector_size": 1536,
|
||||
"input_cost_per_token": 0.00000002,
|
||||
"output_cost_per_token": 0.000000,
|
||||
"input_cost_per_token_batches": 0.000000010,
|
||||
"output_cost_per_token_batches": 0.000000000,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "embedding"
|
||||
},
|
||||
|
@ -800,6 +826,8 @@
|
|||
"max_input_tokens": 8191,
|
||||
"input_cost_per_token": 0.0000001,
|
||||
"output_cost_per_token": 0.000000,
|
||||
"input_cost_per_token_batches": 0.000000050,
|
||||
"output_cost_per_token_batches": 0.000000000,
|
||||
"litellm_provider": "openai",
|
||||
"mode": "embedding"
|
||||
},
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue