From 24101fc7af80225dc0852d905ddfc9839fb9f9c5 Mon Sep 17 00:00:00 2001 From: yeahyung Date: Mon, 10 Mar 2025 16:02:32 +0900 Subject: [PATCH] Fix: price cached_tokens when using custom handler --- litellm/cost_calculator.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 757cf9e238..4cf38fe1c5 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -291,7 +291,7 @@ def cost_per_token( # noqa: PLR0915 if model_info["input_cost_per_token"] > 0: ## COST PER TOKEN ## prompt_tokens_cost_usd_dollar = ( - model_info["input_cost_per_token"] * prompt_tokens + model_info["input_cost_per_token"] * (prompt_tokens - cache_read_input_tokens) ) elif ( model_info.get("input_cost_per_second", None) is not None @@ -307,6 +307,10 @@ def cost_per_token( # noqa: PLR0915 prompt_tokens_cost_usd_dollar = ( model_info["input_cost_per_second"] * response_time_ms / 1000 # type: ignore ) + ## Prompt Caching cost calculation + if model_info["cache_read_input_token_cost"] is not None and model_info["cache_read_input_token_cost"] > 0: + ## COST PER TOKEN ## + prompt_tokens_cost_usd_dollar += model_info["cache_read_input_token_cost"] * cache_read_input_tokens if model_info["output_cost_per_token"] > 0: completion_tokens_cost_usd_dollar = (