Fix: price cached_tokens when using custom handler

2025-04-24 10:14:26 +00:00 · 2025-03-10 16:02:32 +09:00 · 2025-03-10 16:02:32 +09:00 · 24101fc7af
commit 24101fc7af
parent 995fc001e0
1 changed files with 5 additions and 1 deletions
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@ -291,7 +291,7 @@ def cost_per_token(  # noqa: PLR0915
        if model_info["input_cost_per_token"] > 0:
            ## COST PER TOKEN ##
            prompt_tokens_cost_usd_dollar = (
-                model_info["input_cost_per_token"] * prompt_tokens
+                model_info["input_cost_per_token"] * (prompt_tokens - cache_read_input_tokens)
            )
        elif (
            model_info.get("input_cost_per_second", None) is not None
@ -307,6 +307,10 @@ def cost_per_token(  # noqa: PLR0915
            prompt_tokens_cost_usd_dollar = (
                model_info["input_cost_per_second"] * response_time_ms / 1000  # type: ignore
            )
+        ## Prompt Caching cost calculation
+        if model_info["cache_read_input_token_cost"] is not None and model_info["cache_read_input_token_cost"] > 0:
+            ## COST PER TOKEN ##
+            prompt_tokens_cost_usd_dollar += model_info["cache_read_input_token_cost"] * cache_read_input_tokens

        if model_info["output_cost_per_token"] > 0:
            completion_tokens_cost_usd_dollar = (