forked from phoenix/litellm-mirror
(fix) prompt caching cost calculation OpenAI, Azure OpenAI (#6231)
* fix prompt caching cost calculation * fix testing for prompt cache cost calc
This commit is contained in:
parent
846bb4cb91
commit
1994100028
3 changed files with 13 additions and 7 deletions
|
@ -27,7 +27,8 @@ def cost_per_token(
|
|||
model_info = get_model_info(model=model, custom_llm_provider="azure")
|
||||
|
||||
## CALCULATE INPUT COST
|
||||
prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"]
|
||||
total_prompt_tokens: float = usage["prompt_tokens"] - usage._cache_read_input_tokens
|
||||
prompt_cost: float = total_prompt_tokens * model_info["input_cost_per_token"]
|
||||
|
||||
## CALCULATE OUTPUT COST
|
||||
completion_cost: float = (
|
||||
|
|
|
@ -27,7 +27,8 @@ def cost_per_token(
|
|||
model_info = get_model_info(model=model, custom_llm_provider="openai")
|
||||
|
||||
## CALCULATE INPUT COST
|
||||
prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"]
|
||||
total_prompt_tokens: float = usage["prompt_tokens"] - usage._cache_read_input_tokens
|
||||
prompt_cost: float = total_prompt_tokens * model_info["input_cost_per_token"]
|
||||
|
||||
## CALCULATE OUTPUT COST
|
||||
completion_cost: float = (
|
||||
|
|
|
@ -1393,10 +1393,13 @@ def test_cost_azure_openai_prompt_caching():
|
|||
usage = response_2.usage
|
||||
|
||||
_expected_cost2 = (
|
||||
usage.prompt_tokens * model_info["input_cost_per_token"]
|
||||
+ usage.completion_tokens * model_info["output_cost_per_token"]
|
||||
+ usage.prompt_tokens_details.cached_tokens
|
||||
* model_info["cache_read_input_token_cost"]
|
||||
(usage.prompt_tokens - usage.prompt_tokens_details.cached_tokens)
|
||||
* model_info["input_cost_per_token"]
|
||||
+ (usage.completion_tokens * model_info["output_cost_per_token"])
|
||||
+ (
|
||||
usage.prompt_tokens_details.cached_tokens
|
||||
* model_info["cache_read_input_token_cost"]
|
||||
)
|
||||
)
|
||||
|
||||
print("_expected_cost2", _expected_cost2)
|
||||
|
@ -1515,7 +1518,8 @@ def test_cost_openai_prompt_caching():
|
|||
usage = response_2.usage
|
||||
|
||||
_expected_cost2 = (
|
||||
usage.prompt_tokens * model_info["input_cost_per_token"]
|
||||
(usage.prompt_tokens - usage.prompt_tokens_details.cached_tokens)
|
||||
* model_info["input_cost_per_token"]
|
||||
+ usage.completion_tokens * model_info["output_cost_per_token"]
|
||||
+ usage.prompt_tokens_details.cached_tokens
|
||||
* model_info["cache_read_input_token_cost"]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue