litellm-mirror/litellm/llms/anthropic/cost_calculation.py

"""
Helper util for handling anthropic-specific cost calculation
- e.g.: prompt caching
"""

from typing import Tuple

from litellm.types.utils import Usage
from litellm.utils import get_model_info


def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
    """
    Calculates the cost per token for a given model, prompt tokens, and completion tokens.

    Input:
        - model: str, the model name without provider prefix
        - usage: LiteLLM Usage block, containing anthropic caching information

    Returns:
        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
    """
    ## GET MODEL INFO
    model_info = get_model_info(model=model, custom_llm_provider="anthropic")

    ## CALCULATE INPUT COST
    ### Cost of processing (non-cache hit + cache hit) + Cost of cache-writing (cache writing)
    prompt_cost = 0.0
    ### PROCESSING COST
    non_cache_hit_tokens = usage.prompt_tokens
    cache_hit_tokens = 0
    if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
        cache_hit_tokens = usage.prompt_tokens_details.cached_tokens
        non_cache_hit_tokens = non_cache_hit_tokens - cache_hit_tokens

    prompt_cost = float(non_cache_hit_tokens) * model_info["input_cost_per_token"]

    _cache_read_input_token_cost = model_info.get("cache_read_input_token_cost")
    if (
        _cache_read_input_token_cost is not None
        and usage.prompt_tokens_details
        and usage.prompt_tokens_details.cached_tokens
    ):
        prompt_cost += (
            float(usage.prompt_tokens_details.cached_tokens)
            * _cache_read_input_token_cost
        )

    ### CACHE WRITING COST
    _cache_creation_input_token_cost = model_info.get("cache_creation_input_token_cost")
    if _cache_creation_input_token_cost is not None:
        prompt_cost += (
            float(usage._cache_creation_input_tokens) * _cache_creation_input_token_cost
        )

    ## CALCULATE OUTPUT COST
    completion_cost = usage["completion_tokens"] * model_info["output_cost_per_token"]

    return prompt_cost, completion_cost