diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py index f17d5a464..fecd53e19 100644 --- a/litellm/tests/test_completion_cost.py +++ b/litellm/tests/test_completion_cost.py @@ -328,3 +328,56 @@ def test_dalle_3_azure_cost_tracking(): completion_response=response, call_type="image_generation" ) assert cost > 0 + + +def test_replicate_llama3_cost_tracking(): + litellm.set_verbose = True + model = "replicate/meta/meta-llama-3-8b-instruct" + litellm.register_model( + { + "replicate/meta/meta-llama-3-8b-instruct": { + "input_cost_per_token": 0.00000005, + "output_cost_per_token": 0.00000025, + "litellm_provider": "replicate", + } + } + ) + response = litellm.ModelResponse( + id="chatcmpl-cad7282f-7f68-41e7-a5ab-9eb33ae301dc", + choices=[ + litellm.utils.Choices( + finish_reason="stop", + index=0, + message=litellm.utils.Message( + content="I'm doing well, thanks for asking! I'm here to help you with any questions or tasks you may have. How can I assist you today?", + role="assistant", + ), + ) + ], + created=1714401369, + model="replicate/meta/meta-llama-3-8b-instruct", + object="chat.completion", + system_fingerprint=None, + usage=litellm.utils.Usage( + prompt_tokens=48, completion_tokens=31, total_tokens=79 + ), + ) + cost = litellm.completion_cost( + completion_response=response, + messages=[{"role": "user", "content": "Hey, how's it going?"}], + ) + + print(f"cost: {cost}") + cost = round(cost, 5) + expected_cost = round( + litellm.model_cost["replicate/meta/meta-llama-3-8b-instruct"][ + "input_cost_per_token" + ] + * 48 + + litellm.model_cost["replicate/meta/meta-llama-3-8b-instruct"][ + "output_cost_per_token" + ] + * 31, + 5, + ) + assert cost == expected_cost diff --git a/litellm/utils.py b/litellm/utils.py index 6e62b64c9..e1d80c2d2 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4269,8 +4269,8 @@ def completion_cost( model = get_model_params_and_category(model) # replicate llms are calculate based on time for request running # see https://replicate.com/pricing - elif model in litellm.replicate_models or "replicate" in model: - return get_replicate_completion_pricing(completion_response, total_time) + # elif model in litellm.replicate_models or "replicate" in model: + # return get_replicate_completion_pricing(completion_response, total_time) ( prompt_tokens_cost_usd_dollar,