feat(llm_cost_calc/google.py): do character based cost calculation for vertex ai

Calculate cost for vertex ai responses using characters in query/response

 Closes https://github.com/BerriAI/litellm/issues/4165
This commit is contained in:
Krrish Dholakia 2024-06-19 17:18:42 -07:00
parent cab057da4a
commit 16da21e839
5 changed files with 287 additions and 17 deletions

View file

@ -1,8 +1,10 @@
# What is this?
## Cost calculation for Google AI Studio / Vertex AI models
from typing import Literal, Tuple
import traceback
from typing import List, Literal, Optional, Tuple
import litellm
from litellm import verbose_logger
"""
Gemini pricing covers:
@ -12,6 +14,12 @@ Gemini pricing covers:
- video
"""
"""
Vertex AI -> character based pricing
Google AI Studio -> token based pricing
"""
models_without_dynamic_pricing = ["gemini-1.0-pro", "gemini-pro"]
@ -21,6 +29,124 @@ def _is_above_128k(tokens: float) -> bool:
return False
def cost_per_character(
model: str,
custom_llm_provider: str,
prompt_tokens: float,
completion_tokens: float,
prompt_characters: float,
completion_characters: float,
) -> Tuple[float, float]:
"""
Calculates the cost per character for a given VertexAI model, input messages, and response object.
Input:
- model: str, the model name without provider prefix
- custom_llm_provider: str, "vertex_ai-*"
- prompt_characters: float, the number of input characters
- completion_characters: float, the number of output characters
Returns:
Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
Raises:
Exception if model requires >128k pricing, but model cost not mapped
"""
model_info = litellm.get_model_info(
model=model, custom_llm_provider=custom_llm_provider
)
## GET MODEL INFO
model_info = litellm.get_model_info(
model=model, custom_llm_provider=custom_llm_provider
)
## CALCULATE INPUT COST
try:
if (
_is_above_128k(tokens=prompt_characters * 4) # 1 token = 4 char
and model not in models_without_dynamic_pricing
):
## check if character pricing, else default to token pricing
assert (
"input_cost_per_character_above_128k_tokens" in model_info
and model_info["input_cost_per_character_above_128k_tokens"] is not None
), "model info for model={} does not have 'input_cost_per_character_above_128k_tokens'-pricing for > 128k tokens\nmodel_info={}".format(
model, model_info
)
prompt_cost = (
prompt_characters
* model_info["input_cost_per_character_above_128k_tokens"]
)
else:
assert (
"input_cost_per_character" in model_info
and model_info["input_cost_per_character"] is not None
), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
model, model_info
)
prompt_cost = prompt_characters * model_info["input_cost_per_character"]
except Exception as e:
verbose_logger.error(
"litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Exception occured - {}\n{}\n\
Defaulting to (cost_per_token * 4) calculation for prompt_cost".format(
str(e), traceback.format_exc()
)
)
initial_prompt_cost, _ = cost_per_token(
model=model,
custom_llm_provider=custom_llm_provider,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
)
prompt_cost = initial_prompt_cost * 4
## CALCULATE OUTPUT COST
try:
if (
_is_above_128k(tokens=completion_characters * 4) # 1 token = 4 char
and model not in models_without_dynamic_pricing
):
assert (
"output_cost_per_character_above_128k_tokens" in model_info
and model_info["output_cost_per_character_above_128k_tokens"]
is not None
), "model info for model={} does not have 'output_cost_per_character_above_128k_tokens' pricing\nmodel_info={}".format(
model, model_info
)
completion_cost = (
completion_tokens
* model_info["output_cost_per_character_above_128k_tokens"]
)
else:
assert (
"output_cost_per_character" in model_info
and model_info["output_cost_per_character"] is not None
), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
model, model_info
)
completion_cost = (
completion_tokens * model_info["output_cost_per_character"]
)
except Exception as e:
verbose_logger.error(
"litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Exception occured - {}\n{}\n\
Defaulting to (cost_per_token * 4) calculation for completion_cost".format(
str(e), traceback.format_exc()
)
)
_, initial_completion_cost = cost_per_token(
model=model,
custom_llm_provider=custom_llm_provider,
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
)
completion_cost = initial_completion_cost * 4
return prompt_cost, completion_cost
def cost_per_token(
model: str,
custom_llm_provider: str,
@ -53,7 +179,8 @@ def cost_per_token(
and model not in models_without_dynamic_pricing
):
assert (
model_info["input_cost_per_token_above_128k_tokens"] is not None
"input_cost_per_token_above_128k_tokens" in model_info
and model_info["input_cost_per_token_above_128k_tokens"] is not None
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
model, model_info
)
@ -69,7 +196,8 @@ def cost_per_token(
and model not in models_without_dynamic_pricing
):
assert (
model_info["output_cost_per_token_above_128k_tokens"] is not None
"output_cost_per_token_above_128k_tokens" in model_info
and model_info["output_cost_per_token_above_128k_tokens"] is not None
), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
model, model_info
)