fix(cost_calculator.py): support openai+azure tts calls

This commit is contained in:
Krrish Dholakia 2024-07-05 20:58:08 -07:00
parent d528b66db0
commit 407639cc7d
6 changed files with 191 additions and 5 deletions

View file

@ -13,6 +13,7 @@ from litellm.litellm_core_utils.llm_cost_calc.google import (
from litellm.litellm_core_utils.llm_cost_calc.google import ( from litellm.litellm_core_utils.llm_cost_calc.google import (
cost_per_token as google_cost_per_token, cost_per_token as google_cost_per_token,
) )
from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_character
from litellm.utils import ( from litellm.utils import (
CallTypes, CallTypes,
CostPerToken, CostPerToken,
@ -62,6 +63,23 @@ def cost_per_token(
### CUSTOM PRICING ### ### CUSTOM PRICING ###
custom_cost_per_token: Optional[CostPerToken] = None, custom_cost_per_token: Optional[CostPerToken] = None,
custom_cost_per_second: Optional[float] = None, custom_cost_per_second: Optional[float] = None,
### CALL TYPE ###
call_type: Literal[
"embedding",
"aembedding",
"completion",
"acompletion",
"atext_completion",
"text_completion",
"image_generation",
"aimage_generation",
"moderation",
"amoderation",
"atranscription",
"transcription",
"aspeech",
"speech",
] = "completion",
) -> Tuple[float, float]: ) -> Tuple[float, float]:
""" """
Calculates the cost per token for a given model, prompt tokens, and completion tokens. Calculates the cost per token for a given model, prompt tokens, and completion tokens.
@ -76,6 +94,7 @@ def cost_per_token(
custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list) custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call. custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
custom_cost_per_second: Optional[float]: the cost per second for the llm api call. custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
call_type: Optional[str]: the call type
Returns: Returns:
tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively. tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
@ -159,6 +178,27 @@ def cost_per_token(
prompt_tokens=prompt_tokens, prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens, completion_tokens=completion_tokens,
) )
elif call_type == "speech" or call_type == "aspeech":
prompt_cost, completion_cost = _generic_cost_per_character(
model=model_without_prefix,
custom_llm_provider=custom_llm_provider,
prompt_characters=prompt_characters,
completion_characters=completion_characters,
custom_prompt_cost=None,
custom_completion_cost=0,
)
if prompt_cost is None or completion_cost is None:
raise ValueError(
"cost for tts call is None. prompt_cost={}, completion_cost={}, model={}, custom_llm_provider={}, prompt_characters={}, completion_characters={}".format(
prompt_cost,
completion_cost,
model_without_prefix,
custom_llm_provider,
prompt_characters,
completion_characters,
)
)
return prompt_cost, completion_cost
elif model in model_cost_ref: elif model in model_cost_ref:
print_verbose(f"Success: model={model} in model_cost_map") print_verbose(f"Success: model={model} in model_cost_map")
print_verbose( print_verbose(
@ -289,7 +329,7 @@ def cost_per_token(
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
else: else:
# if model is not in model_prices_and_context_window.json. Raise an exception-let users know # if model is not in model_prices_and_context_window.json. Raise an exception-let users know
error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n" error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}, custom_llm_provider={custom_llm_provider}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
raise litellm.exceptions.NotFoundError( # type: ignore raise litellm.exceptions.NotFoundError( # type: ignore
message=error_str, message=error_str,
model=model, model=model,
@ -535,6 +575,11 @@ def completion_cost(
raise Exception( raise Exception(
f"Model={image_gen_model_name} not found in completion cost model map" f"Model={image_gen_model_name} not found in completion cost model map"
) )
elif (
call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value
):
prompt_characters = litellm.utils._count_characters(text=prompt)
# Calculate cost based on prompt_tokens, completion_tokens # Calculate cost based on prompt_tokens, completion_tokens
if ( if (
"togethercomputer" in model "togethercomputer" in model
@ -591,6 +636,7 @@ def completion_cost(
custom_cost_per_token=custom_cost_per_token, custom_cost_per_token=custom_cost_per_token,
prompt_characters=prompt_characters, prompt_characters=prompt_characters,
completion_characters=completion_characters, completion_characters=completion_characters,
call_type=call_type,
) )
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar _final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
print_verbose( print_verbose(

View file

@ -0,0 +1,85 @@
# What is this?
## Helper utilities for cost_per_token()
import traceback
from typing import List, Literal, Optional, Tuple
import litellm
from litellm import verbose_logger
def _generic_cost_per_character(
model: str,
custom_llm_provider: str,
prompt_characters: float,
completion_characters: float,
custom_prompt_cost: Optional[float],
custom_completion_cost: Optional[float],
) -> Tuple[Optional[float], Optional[float]]:
"""
Generic function to help calculate cost per character.
"""
"""
Calculates the cost per character for a given model, input messages, and response object.
Input:
- model: str, the model name without provider prefix
- custom_llm_provider: str, "vertex_ai-*"
- prompt_characters: float, the number of input characters
- completion_characters: float, the number of output characters
Returns:
Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd.
- returns None if not able to calculate cost.
Raises:
Exception if 'input_cost_per_character' or 'output_cost_per_character' is missing from model_info
"""
args = locals()
## GET MODEL INFO
model_info = litellm.get_model_info(
model=model, custom_llm_provider=custom_llm_provider
)
## CALCULATE INPUT COST
try:
if custom_prompt_cost is None:
assert (
"input_cost_per_character" in model_info
and model_info["input_cost_per_character"] is not None
), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
model, model_info
)
custom_prompt_cost = model_info["input_cost_per_character"]
prompt_cost = prompt_characters * custom_prompt_cost
except Exception as e:
verbose_logger.error(
"litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
str(e), traceback.format_exc()
)
)
prompt_cost = None
## CALCULATE OUTPUT COST
try:
if custom_completion_cost is None:
assert (
"output_cost_per_character" in model_info
and model_info["output_cost_per_character"] is not None
), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
model, model_info
)
custom_completion_cost = model_info["output_cost_per_character"]
completion_cost = completion_characters * custom_completion_cost
except Exception as e:
verbose_logger.error(
"litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
str(e), traceback.format_exc()
)
)
completion_cost = None
return prompt_cost, completion_cost

View file

@ -398,6 +398,26 @@
"output_cost_per_second": 0.0001, "output_cost_per_second": 0.0001,
"litellm_provider": "openai" "litellm_provider": "openai"
}, },
"tts-1": {
"mode": "audio_speech",
"input_cost_per_character": 0.000015,
"litellm_provider": "openai"
},
"tts-1-hd": {
"mode": "audio_speech",
"input_cost_per_character": 0.000030,
"litellm_provider": "openai"
},
"azure/tts-1": {
"mode": "audio_speech",
"input_cost_per_character": 0.000015,
"litellm_provider": "azure"
},
"azure/tts-1-hd": {
"mode": "audio_speech",
"input_cost_per_character": 0.000030,
"litellm_provider": "azure"
},
"azure/whisper-1": { "azure/whisper-1": {
"mode": "audio_transcription", "mode": "audio_transcription",
"input_cost_per_second": 0, "input_cost_per_second": 0,

View file

@ -712,7 +712,6 @@ def test_vertex_ai_claude_completion_cost():
assert cost == predicted_cost assert cost == predicted_cost
@pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_completion_cost_hidden_params(sync_mode): async def test_completion_cost_hidden_params(sync_mode):
@ -732,6 +731,7 @@ async def test_completion_cost_hidden_params(sync_mode):
assert "response_cost" in response._hidden_params assert "response_cost" in response._hidden_params
assert isinstance(response._hidden_params["response_cost"], float) assert isinstance(response._hidden_params["response_cost"], float)
def test_vertex_ai_gemini_predict_cost(): def test_vertex_ai_gemini_predict_cost():
model = "gemini-1.5-flash" model = "gemini-1.5-flash"
messages = [{"role": "user", "content": "Hey, hows it going???"}] messages = [{"role": "user", "content": "Hey, hows it going???"}]
@ -739,3 +739,16 @@ def test_vertex_ai_gemini_predict_cost():
assert predictive_cost > 0 assert predictive_cost > 0
@pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"])
def test_completion_cost_tts(model):
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
cost = completion_cost(
model=model,
prompt="the quick brown fox jumped over the lazy dogs",
call_type="speech",
)
assert cost > 0

View file

@ -4705,7 +4705,9 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
) )
except Exception: except Exception:
raise Exception( raise Exception(
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" "This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json".format(
model, custom_llm_provider
)
) )

View file

@ -398,6 +398,26 @@
"output_cost_per_second": 0.0001, "output_cost_per_second": 0.0001,
"litellm_provider": "openai" "litellm_provider": "openai"
}, },
"tts-1": {
"mode": "audio_speech",
"input_cost_per_character": 0.000015,
"litellm_provider": "openai"
},
"tts-1-hd": {
"mode": "audio_speech",
"input_cost_per_character": 0.000030,
"litellm_provider": "openai"
},
"azure/tts-1": {
"mode": "audio_speech",
"input_cost_per_character": 0.000015,
"litellm_provider": "azure"
},
"azure/tts-1-hd": {
"mode": "audio_speech",
"input_cost_per_character": 0.000030,
"litellm_provider": "azure"
},
"azure/whisper-1": { "azure/whisper-1": {
"mode": "audio_transcription", "mode": "audio_transcription",
"input_cost_per_second": 0, "input_cost_per_second": 0,