fix(utils.py): default to time-based tracking for unmapped replicate models. fix time-based cost calc for replicate

2024-04-29 08:36:01 -07:00 · 2024-04-29 08:36:01 -07:00 · 3725732c4d
commit 3725732c4d
parent a18844b230
1 changed files with 7 additions and 4 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -3641,12 +3641,12 @@ def get_replicate_completion_pricing(completion_response=None, total_time=0.0):
    a100_80gb_price_per_second_public = (
        0.001400  # assume all calls sent to A100 80GB for now
    )
-    if total_time == 0.0:
+    if total_time == 0.0:  # total time is in ms
        start_time = completion_response["created"]
        end_time = completion_response["ended"]
        total_time = end_time - start_time

-    return a100_80gb_price_per_second_public * total_time
+    return a100_80gb_price_per_second_public * total_time / 1000


 def _select_tokenizer(model: str):
@ -4269,8 +4269,11 @@ def completion_cost(
            model = get_model_params_and_category(model)
        # replicate llms are calculate based on time for request running
        # see https://replicate.com/pricing
-        # elif model in litellm.replicate_models or "replicate" in model:
-        #     return get_replicate_completion_pricing(completion_response, total_time)
+        elif (
+            model in litellm.replicate_models or "replicate" in model
+        ) and model not in litellm.model_cost:
+            # for unmapped replicate model, default to replicate's time tracking logic
+            return get_replicate_completion_pricing(completion_response, total_time)

        (
            prompt_tokens_cost_usd_dollar,