fix(utils.py): default to time-based tracking for unmapped replicate models. fix time-based cost calc for replicate

This commit is contained in:
Krrish Dholakia 2024-04-29 08:36:01 -07:00
parent a18844b230
commit 3725732c4d

View file

@ -3641,12 +3641,12 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
a100_80gb_price_per_second_public = ( a100_80gb_price_per_second_public = (
0.001400 # assume all calls sent to A100 80GB for now 0.001400 # assume all calls sent to A100 80GB for now
) )
if total_time == 0.0: if total_time == 0.0: # total time is in ms
start_time = completion_response["created"] start_time = completion_response["created"]
end_time = completion_response["ended"] end_time = completion_response["ended"]
total_time = end_time - start_time total_time = end_time - start_time
return a100_80gb_price_per_second_public * total_time return a100_80gb_price_per_second_public * total_time / 1000
def _select_tokenizer(model: str): def _select_tokenizer(model: str):
@ -4269,8 +4269,11 @@ def completion_cost(
model = get_model_params_and_category(model) model = get_model_params_and_category(model)
# replicate llms are calculate based on time for request running # replicate llms are calculate based on time for request running
# see https://replicate.com/pricing # see https://replicate.com/pricing
# elif model in litellm.replicate_models or "replicate" in model: elif (
# return get_replicate_completion_pricing(completion_response, total_time) model in litellm.replicate_models or "replicate" in model
) and model not in litellm.model_cost:
# for unmapped replicate model, default to replicate's time tracking logic
return get_replicate_completion_pricing(completion_response, total_time)
( (
prompt_tokens_cost_usd_dollar, prompt_tokens_cost_usd_dollar,