diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index eafd924bc6..1416e2d060 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -319,7 +319,7 @@ def cost_per_token( # noqa: PLR0915 if model_info["input_cost_per_token"] > 0: ## COST PER TOKEN ## prompt_tokens_cost_usd_dollar = ( - model_info["input_cost_per_token"] * prompt_tokens + model_info["input_cost_per_token"] * (prompt_tokens - cache_read_input_tokens) ) elif ( model_info.get("input_cost_per_second", None) is not None @@ -335,6 +335,10 @@ def cost_per_token( # noqa: PLR0915 prompt_tokens_cost_usd_dollar = ( model_info["input_cost_per_second"] * response_time_ms / 1000 # type: ignore ) + ## Prompt Caching cost calculation + if model_info["cache_read_input_token_cost"] is not None and model_info["cache_read_input_token_cost"] > 0: + ## COST PER TOKEN ## + prompt_tokens_cost_usd_dollar += model_info["cache_read_input_token_cost"] * cache_read_input_tokens if model_info["output_cost_per_token"] > 0: completion_tokens_cost_usd_dollar = (