(fix) prompt caching cost calculation OpenAI, Azure OpenAI (#6231)

* fix prompt caching cost calculation

* fix testing for prompt cache cost calc
This commit is contained in:
Ishaan Jaff 2024-10-15 18:55:31 +05:30 committed by GitHub
parent 846bb4cb91
commit 1994100028
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 13 additions and 7 deletions

View file

@ -27,7 +27,8 @@ def cost_per_token(
model_info = get_model_info(model=model, custom_llm_provider="azure")
## CALCULATE INPUT COST
prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"]
total_prompt_tokens: float = usage["prompt_tokens"] - usage._cache_read_input_tokens
prompt_cost: float = total_prompt_tokens * model_info["input_cost_per_token"]
## CALCULATE OUTPUT COST
completion_cost: float = (

View file

@ -27,7 +27,8 @@ def cost_per_token(
model_info = get_model_info(model=model, custom_llm_provider="openai")
## CALCULATE INPUT COST
prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"]
total_prompt_tokens: float = usage["prompt_tokens"] - usage._cache_read_input_tokens
prompt_cost: float = total_prompt_tokens * model_info["input_cost_per_token"]
## CALCULATE OUTPUT COST
completion_cost: float = (

View file

@ -1393,10 +1393,13 @@ def test_cost_azure_openai_prompt_caching():
usage = response_2.usage
_expected_cost2 = (
usage.prompt_tokens * model_info["input_cost_per_token"]
+ usage.completion_tokens * model_info["output_cost_per_token"]
+ usage.prompt_tokens_details.cached_tokens
* model_info["cache_read_input_token_cost"]
(usage.prompt_tokens - usage.prompt_tokens_details.cached_tokens)
* model_info["input_cost_per_token"]
+ (usage.completion_tokens * model_info["output_cost_per_token"])
+ (
usage.prompt_tokens_details.cached_tokens
* model_info["cache_read_input_token_cost"]
)
)
print("_expected_cost2", _expected_cost2)
@ -1515,7 +1518,8 @@ def test_cost_openai_prompt_caching():
usage = response_2.usage
_expected_cost2 = (
usage.prompt_tokens * model_info["input_cost_per_token"]
(usage.prompt_tokens - usage.prompt_tokens_details.cached_tokens)
* model_info["input_cost_per_token"]
+ usage.completion_tokens * model_info["output_cost_per_token"]
+ usage.prompt_tokens_details.cached_tokens
* model_info["cache_read_input_token_cost"]