mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
All checks were successful
Read Version from pyproject.toml / read-version (push) Successful in 11s
61 lines
2.3 KiB
Python
61 lines
2.3 KiB
Python
"""
|
|
Helper util for handling azure openai-specific cost calculation
|
|
- e.g.: prompt caching
|
|
"""
|
|
|
|
from typing import Optional, Tuple
|
|
|
|
from litellm._logging import verbose_logger
|
|
from litellm.types.utils import Usage
|
|
from litellm.utils import get_model_info
|
|
|
|
|
|
def cost_per_token(
|
|
model: str, usage: Usage, response_time_ms: Optional[float] = 0.0
|
|
) -> Tuple[float, float]:
|
|
"""
|
|
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
|
|
|
Input:
|
|
- model: str, the model name without provider prefix
|
|
- usage: LiteLLM Usage block, containing anthropic caching information
|
|
|
|
Returns:
|
|
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
|
"""
|
|
## GET MODEL INFO
|
|
model_info = get_model_info(model=model, custom_llm_provider="azure")
|
|
cached_tokens: Optional[int] = None
|
|
## CALCULATE INPUT COST
|
|
non_cached_text_tokens = usage.prompt_tokens
|
|
if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
|
|
cached_tokens = usage.prompt_tokens_details.cached_tokens
|
|
non_cached_text_tokens = non_cached_text_tokens - cached_tokens
|
|
prompt_cost: float = non_cached_text_tokens * model_info["input_cost_per_token"]
|
|
|
|
## CALCULATE OUTPUT COST
|
|
completion_cost: float = (
|
|
usage["completion_tokens"] * model_info["output_cost_per_token"]
|
|
)
|
|
|
|
## Prompt Caching cost calculation
|
|
if model_info.get("cache_read_input_token_cost") is not None and cached_tokens:
|
|
# Note: We read ._cache_read_input_tokens from the Usage - since cost_calculator.py standardizes the cache read tokens on usage._cache_read_input_tokens
|
|
prompt_cost += cached_tokens * (
|
|
model_info.get("cache_read_input_token_cost", 0) or 0
|
|
)
|
|
|
|
## Speech / Audio cost calculation
|
|
if (
|
|
"output_cost_per_second" in model_info
|
|
and model_info["output_cost_per_second"] is not None
|
|
and response_time_ms is not None
|
|
):
|
|
verbose_logger.debug(
|
|
f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; response time: {response_time_ms}"
|
|
)
|
|
## COST PER SECOND ##
|
|
prompt_cost = 0
|
|
completion_cost = model_info["output_cost_per_second"] * response_time_ms / 1000
|
|
|
|
return prompt_cost, completion_cost
|