diff --git a/litellm/llms/AzureOpenAI/cost_calculation.py b/litellm/llms/AzureOpenAI/cost_calculation.py index 5d619e994..f46a2f3a3 100644 --- a/litellm/llms/AzureOpenAI/cost_calculation.py +++ b/litellm/llms/AzureOpenAI/cost_calculation.py @@ -27,7 +27,8 @@ def cost_per_token( model_info = get_model_info(model=model, custom_llm_provider="azure") ## CALCULATE INPUT COST - prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"] + total_prompt_tokens: float = usage["prompt_tokens"] - usage._cache_read_input_tokens + prompt_cost: float = total_prompt_tokens * model_info["input_cost_per_token"] ## CALCULATE OUTPUT COST completion_cost: float = ( diff --git a/litellm/llms/OpenAI/cost_calculation.py b/litellm/llms/OpenAI/cost_calculation.py index 6b8e5f41e..7fb4c64a5 100644 --- a/litellm/llms/OpenAI/cost_calculation.py +++ b/litellm/llms/OpenAI/cost_calculation.py @@ -27,7 +27,8 @@ def cost_per_token( model_info = get_model_info(model=model, custom_llm_provider="openai") ## CALCULATE INPUT COST - prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"] + total_prompt_tokens: float = usage["prompt_tokens"] - usage._cache_read_input_tokens + prompt_cost: float = total_prompt_tokens * model_info["input_cost_per_token"] ## CALCULATE OUTPUT COST completion_cost: float = ( diff --git a/tests/local_testing/test_completion_cost.py b/tests/local_testing/test_completion_cost.py index 584f8c841..c1db0fbd5 100644 --- a/tests/local_testing/test_completion_cost.py +++ b/tests/local_testing/test_completion_cost.py @@ -1393,10 +1393,13 @@ def test_cost_azure_openai_prompt_caching(): usage = response_2.usage _expected_cost2 = ( - usage.prompt_tokens * model_info["input_cost_per_token"] - + usage.completion_tokens * model_info["output_cost_per_token"] - + usage.prompt_tokens_details.cached_tokens - * model_info["cache_read_input_token_cost"] + (usage.prompt_tokens - usage.prompt_tokens_details.cached_tokens) + * model_info["input_cost_per_token"] + + (usage.completion_tokens * model_info["output_cost_per_token"]) + + ( + usage.prompt_tokens_details.cached_tokens + * model_info["cache_read_input_token_cost"] + ) ) print("_expected_cost2", _expected_cost2) @@ -1515,7 +1518,8 @@ def test_cost_openai_prompt_caching(): usage = response_2.usage _expected_cost2 = ( - usage.prompt_tokens * model_info["input_cost_per_token"] + (usage.prompt_tokens - usage.prompt_tokens_details.cached_tokens) + * model_info["input_cost_per_token"] + usage.completion_tokens * model_info["output_cost_per_token"] + usage.prompt_tokens_details.cached_tokens * model_info["cache_read_input_token_cost"]