Merge pull request #3354 from BerriAI/litellm_replicate_cost_tracking

fix(utils.py): replicate now also has token based pricing for some models
This commit is contained in:
Krish Dholakia 2024-04-29 09:13:41 -07:00 committed by GitHub
commit ec2510029a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 294 additions and 4 deletions

View file

@ -3641,12 +3641,12 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
a100_80gb_price_per_second_public = (
0.001400 # assume all calls sent to A100 80GB for now
)
if total_time == 0.0:
if total_time == 0.0: # total time is in ms
start_time = completion_response["created"]
end_time = completion_response["ended"]
total_time = end_time - start_time
return a100_80gb_price_per_second_public * total_time
return a100_80gb_price_per_second_public * total_time / 1000
def _select_tokenizer(model: str):
@ -3668,7 +3668,7 @@ def _select_tokenizer(model: str):
tokenizer = Tokenizer.from_str(json_str)
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
# llama2
elif "llama-2" in model.lower():
elif "llama-2" in model.lower() or "replicate" in model.lower():
tokenizer = Tokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
return {"type": "huggingface_tokenizer", "tokenizer": tokenizer}
# default - tiktoken
@ -4269,7 +4269,10 @@ def completion_cost(
model = get_model_params_and_category(model)
# replicate llms are calculate based on time for request running
# see https://replicate.com/pricing
elif model in litellm.replicate_models or "replicate" in model:
elif (
model in litellm.replicate_models or "replicate" in model
) and model not in litellm.model_cost:
# for unmapped replicate model, default to replicate's time tracking logic
return get_replicate_completion_pricing(completion_response, total_time)
(