diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 757cf9e238..4cf38fe1c5 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -291,7 +291,7 @@ def cost_per_token( # noqa: PLR0915 if model_info["input_cost_per_token"] > 0: ## COST PER TOKEN ## prompt_tokens_cost_usd_dollar = ( - model_info["input_cost_per_token"] * prompt_tokens + model_info["input_cost_per_token"] * (prompt_tokens - cache_read_input_tokens) ) elif ( model_info.get("input_cost_per_second", None) is not None @@ -307,6 +307,10 @@ def cost_per_token( # noqa: PLR0915 prompt_tokens_cost_usd_dollar = ( model_info["input_cost_per_second"] * response_time_ms / 1000 # type: ignore ) + ## Prompt Caching cost calculation + if model_info["cache_read_input_token_cost"] is not None and model_info["cache_read_input_token_cost"] > 0: + ## COST PER TOKEN ## + prompt_tokens_cost_usd_dollar += model_info["cache_read_input_token_cost"] * cache_read_input_tokens if model_info["output_cost_per_token"] > 0: completion_tokens_cost_usd_dollar = (