mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 11:14:04 +00:00
* fix(factory.py): skip empty text blocks for bedrock user messages Fixes https://github.com/BerriAI/litellm/issues/7169 * Add support for Gemini 2.0 GoogleSearch tool (#7257) * Add support for google_search tool in gemini 2.0 * Add/modify tests * Fix grounding check * Remove 2.0 grounding test; exclude experimental model in VERTEX_MODELS_TO_NOT_TEST * Swap order of tools * DFix formatting * fix(get_api_base.py): return api base in streaming response Fixes https://github.com/BerriAI/litellm/issues/7249 Closes https://github.com/BerriAI/litellm/pull/7250 * fix(cost_calculator.py): only set base model to model if not none Fixes https://github.com/BerriAI/litellm/issues/7223 * fix(cost_calculator.py): enforce stricter order when picking model for cost calculation * fix(cost_calculator.py): fix '_select_model_name_for_cost_calc' to return model name with region name prefix if provided * fix(utils.py): fix 'get_model_info()' to handle edge case where model name starts with custom llm provider AND custom llm provider is given * fix(cost_calculator.py): handle `custom_llm_provider-` scenario * fix(cost_calculator.py): e2e working tts cost tracking ensures initial message is passed in, to cost calculator * fix(factory.py): suppress linting errors * fix(cost_calculator.py): strip llm provider from model name after selecting cost calc model * fix(litellm_logging.py): store initial request in 'input' field + accept base_model to be passed in litellm_params directly * test: handle none env var value in flaky test * fix(litellm_logging.py): fix linting errors --------- Co-authored-by: Sam B <samlingx@gmail.com>
120 lines
4.5 KiB
Python
120 lines
4.5 KiB
Python
"""
|
|
Helper util for handling openai-specific cost calculation
|
|
- e.g.: prompt caching
|
|
"""
|
|
|
|
from typing import Literal, Optional, Tuple
|
|
|
|
from litellm._logging import verbose_logger
|
|
from litellm.types.utils import CallTypes, Usage
|
|
from litellm.utils import get_model_info
|
|
|
|
|
|
def cost_router(call_type: CallTypes) -> Literal["cost_per_token", "cost_per_second"]:
|
|
if call_type == CallTypes.atranscription or call_type == CallTypes.transcription:
|
|
return "cost_per_second"
|
|
else:
|
|
return "cost_per_token"
|
|
|
|
|
|
def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
|
|
"""
|
|
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
|
|
|
Input:
|
|
- model: str, the model name without provider prefix
|
|
- usage: LiteLLM Usage block, containing anthropic caching information
|
|
|
|
Returns:
|
|
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
|
"""
|
|
## GET MODEL INFO
|
|
model_info = get_model_info(model=model, custom_llm_provider="openai")
|
|
## CALCULATE INPUT COST
|
|
### Non-cached text tokens
|
|
non_cached_text_tokens = usage.prompt_tokens
|
|
cached_tokens: Optional[int] = None
|
|
if usage.prompt_tokens_details and usage.prompt_tokens_details.cached_tokens:
|
|
cached_tokens = usage.prompt_tokens_details.cached_tokens
|
|
non_cached_text_tokens = non_cached_text_tokens - cached_tokens
|
|
prompt_cost: float = non_cached_text_tokens * model_info["input_cost_per_token"]
|
|
## Prompt Caching cost calculation
|
|
if model_info.get("cache_read_input_token_cost") is not None and cached_tokens:
|
|
# Note: We read ._cache_read_input_tokens from the Usage - since cost_calculator.py standardizes the cache read tokens on usage._cache_read_input_tokens
|
|
prompt_cost += cached_tokens * (
|
|
model_info.get("cache_read_input_token_cost", 0) or 0
|
|
)
|
|
|
|
_audio_tokens: Optional[int] = (
|
|
usage.prompt_tokens_details.audio_tokens
|
|
if usage.prompt_tokens_details is not None
|
|
else None
|
|
)
|
|
_audio_cost_per_token: Optional[float] = model_info.get(
|
|
"input_cost_per_audio_token"
|
|
)
|
|
if _audio_tokens is not None and _audio_cost_per_token is not None:
|
|
audio_cost: float = _audio_tokens * _audio_cost_per_token
|
|
prompt_cost += audio_cost
|
|
|
|
## CALCULATE OUTPUT COST
|
|
completion_cost: float = (
|
|
usage["completion_tokens"] * model_info["output_cost_per_token"]
|
|
)
|
|
_output_cost_per_audio_token: Optional[float] = model_info.get(
|
|
"output_cost_per_audio_token"
|
|
)
|
|
_output_audio_tokens: Optional[int] = (
|
|
usage.completion_tokens_details.audio_tokens
|
|
if usage.completion_tokens_details is not None
|
|
else None
|
|
)
|
|
if _output_cost_per_audio_token is not None and _output_audio_tokens is not None:
|
|
audio_cost = _output_audio_tokens * _output_cost_per_audio_token
|
|
completion_cost += audio_cost
|
|
|
|
return prompt_cost, completion_cost
|
|
|
|
|
|
def cost_per_second(
|
|
model: str, custom_llm_provider: Optional[str], duration: float = 0.0
|
|
) -> Tuple[float, float]:
|
|
"""
|
|
Calculates the cost per second for a given model, prompt tokens, and completion tokens.
|
|
|
|
Input:
|
|
- model: str, the model name without provider prefix
|
|
- custom_llm_provider: str, the custom llm provider
|
|
- duration: float, the duration of the response in seconds
|
|
|
|
Returns:
|
|
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
|
|
"""
|
|
## GET MODEL INFO
|
|
model_info = get_model_info(
|
|
model=model, custom_llm_provider=custom_llm_provider or "openai"
|
|
)
|
|
prompt_cost = 0.0
|
|
completion_cost = 0.0
|
|
## Speech / Audio cost calculation
|
|
if (
|
|
"output_cost_per_second" in model_info
|
|
and model_info["output_cost_per_second"] is not None
|
|
):
|
|
verbose_logger.debug(
|
|
f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; duration: {duration}"
|
|
)
|
|
## COST PER SECOND ##
|
|
completion_cost = model_info["output_cost_per_second"] * duration
|
|
elif (
|
|
"input_cost_per_second" in model_info
|
|
and model_info["input_cost_per_second"] is not None
|
|
):
|
|
verbose_logger.debug(
|
|
f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; duration: {duration}"
|
|
)
|
|
## COST PER SECOND ##
|
|
prompt_cost = model_info["input_cost_per_second"] * duration
|
|
completion_cost = 0.0
|
|
|
|
return prompt_cost, completion_cost
|