mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
fix(cost_calculator.py): support openai+azure tts calls
This commit is contained in:
parent
d528b66db0
commit
407639cc7d
6 changed files with 191 additions and 5 deletions
|
@ -13,6 +13,7 @@ from litellm.litellm_core_utils.llm_cost_calc.google import (
|
|||
from litellm.litellm_core_utils.llm_cost_calc.google import (
|
||||
cost_per_token as google_cost_per_token,
|
||||
)
|
||||
from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_character
|
||||
from litellm.utils import (
|
||||
CallTypes,
|
||||
CostPerToken,
|
||||
|
@ -62,6 +63,23 @@ def cost_per_token(
|
|||
### CUSTOM PRICING ###
|
||||
custom_cost_per_token: Optional[CostPerToken] = None,
|
||||
custom_cost_per_second: Optional[float] = None,
|
||||
### CALL TYPE ###
|
||||
call_type: Literal[
|
||||
"embedding",
|
||||
"aembedding",
|
||||
"completion",
|
||||
"acompletion",
|
||||
"atext_completion",
|
||||
"text_completion",
|
||||
"image_generation",
|
||||
"aimage_generation",
|
||||
"moderation",
|
||||
"amoderation",
|
||||
"atranscription",
|
||||
"transcription",
|
||||
"aspeech",
|
||||
"speech",
|
||||
] = "completion",
|
||||
) -> Tuple[float, float]:
|
||||
"""
|
||||
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||
|
@ -76,6 +94,7 @@ def cost_per_token(
|
|||
custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
|
||||
custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
|
||||
custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
|
||||
call_type: Optional[str]: the call type
|
||||
|
||||
Returns:
|
||||
tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
|
||||
|
@ -159,6 +178,27 @@ def cost_per_token(
|
|||
prompt_tokens=prompt_tokens,
|
||||
completion_tokens=completion_tokens,
|
||||
)
|
||||
elif call_type == "speech" or call_type == "aspeech":
|
||||
prompt_cost, completion_cost = _generic_cost_per_character(
|
||||
model=model_without_prefix,
|
||||
custom_llm_provider=custom_llm_provider,
|
||||
prompt_characters=prompt_characters,
|
||||
completion_characters=completion_characters,
|
||||
custom_prompt_cost=None,
|
||||
custom_completion_cost=0,
|
||||
)
|
||||
if prompt_cost is None or completion_cost is None:
|
||||
raise ValueError(
|
||||
"cost for tts call is None. prompt_cost={}, completion_cost={}, model={}, custom_llm_provider={}, prompt_characters={}, completion_characters={}".format(
|
||||
prompt_cost,
|
||||
completion_cost,
|
||||
model_without_prefix,
|
||||
custom_llm_provider,
|
||||
prompt_characters,
|
||||
completion_characters,
|
||||
)
|
||||
)
|
||||
return prompt_cost, completion_cost
|
||||
elif model in model_cost_ref:
|
||||
print_verbose(f"Success: model={model} in model_cost_map")
|
||||
print_verbose(
|
||||
|
@ -289,7 +329,7 @@ def cost_per_token(
|
|||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||
else:
|
||||
# if model is not in model_prices_and_context_window.json. Raise an exception-let users know
|
||||
error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
|
||||
error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}, custom_llm_provider={custom_llm_provider}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
|
||||
raise litellm.exceptions.NotFoundError( # type: ignore
|
||||
message=error_str,
|
||||
model=model,
|
||||
|
@ -535,6 +575,11 @@ def completion_cost(
|
|||
raise Exception(
|
||||
f"Model={image_gen_model_name} not found in completion cost model map"
|
||||
)
|
||||
elif (
|
||||
call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value
|
||||
):
|
||||
prompt_characters = litellm.utils._count_characters(text=prompt)
|
||||
|
||||
# Calculate cost based on prompt_tokens, completion_tokens
|
||||
if (
|
||||
"togethercomputer" in model
|
||||
|
@ -591,6 +636,7 @@ def completion_cost(
|
|||
custom_cost_per_token=custom_cost_per_token,
|
||||
prompt_characters=prompt_characters,
|
||||
completion_characters=completion_characters,
|
||||
call_type=call_type,
|
||||
)
|
||||
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||
print_verbose(
|
||||
|
|
85
litellm/litellm_core_utils/llm_cost_calc/utils.py
Normal file
85
litellm/litellm_core_utils/llm_cost_calc/utils.py
Normal file
|
@ -0,0 +1,85 @@
|
|||
# What is this?
|
||||
## Helper utilities for cost_per_token()
|
||||
|
||||
import traceback
|
||||
from typing import List, Literal, Optional, Tuple
|
||||
|
||||
import litellm
|
||||
from litellm import verbose_logger
|
||||
|
||||
|
||||
def _generic_cost_per_character(
|
||||
model: str,
|
||||
custom_llm_provider: str,
|
||||
prompt_characters: float,
|
||||
completion_characters: float,
|
||||
custom_prompt_cost: Optional[float],
|
||||
custom_completion_cost: Optional[float],
|
||||
) -> Tuple[Optional[float], Optional[float]]:
|
||||
"""
|
||||
Generic function to help calculate cost per character.
|
||||
"""
|
||||
"""
|
||||
Calculates the cost per character for a given model, input messages, and response object.
|
||||
|
||||
Input:
|
||||
- model: str, the model name without provider prefix
|
||||
- custom_llm_provider: str, "vertex_ai-*"
|
||||
- prompt_characters: float, the number of input characters
|
||||
- completion_characters: float, the number of output characters
|
||||
|
||||
Returns:
|
||||
Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd.
|
||||
- returns None if not able to calculate cost.
|
||||
|
||||
Raises:
|
||||
Exception if 'input_cost_per_character' or 'output_cost_per_character' is missing from model_info
|
||||
"""
|
||||
args = locals()
|
||||
## GET MODEL INFO
|
||||
model_info = litellm.get_model_info(
|
||||
model=model, custom_llm_provider=custom_llm_provider
|
||||
)
|
||||
|
||||
## CALCULATE INPUT COST
|
||||
try:
|
||||
if custom_prompt_cost is None:
|
||||
assert (
|
||||
"input_cost_per_character" in model_info
|
||||
and model_info["input_cost_per_character"] is not None
|
||||
), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
|
||||
model, model_info
|
||||
)
|
||||
custom_prompt_cost = model_info["input_cost_per_character"]
|
||||
|
||||
prompt_cost = prompt_characters * custom_prompt_cost
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
"litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
|
||||
str(e), traceback.format_exc()
|
||||
)
|
||||
)
|
||||
|
||||
prompt_cost = None
|
||||
|
||||
## CALCULATE OUTPUT COST
|
||||
try:
|
||||
if custom_completion_cost is None:
|
||||
assert (
|
||||
"output_cost_per_character" in model_info
|
||||
and model_info["output_cost_per_character"] is not None
|
||||
), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
|
||||
model, model_info
|
||||
)
|
||||
custom_completion_cost = model_info["output_cost_per_character"]
|
||||
completion_cost = completion_characters * custom_completion_cost
|
||||
except Exception as e:
|
||||
verbose_logger.error(
|
||||
"litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
|
||||
str(e), traceback.format_exc()
|
||||
)
|
||||
)
|
||||
|
||||
completion_cost = None
|
||||
|
||||
return prompt_cost, completion_cost
|
|
@ -397,7 +397,27 @@
|
|||
"input_cost_per_second": 0,
|
||||
"output_cost_per_second": 0.0001,
|
||||
"litellm_provider": "openai"
|
||||
},
|
||||
},
|
||||
"tts-1": {
|
||||
"mode": "audio_speech",
|
||||
"input_cost_per_character": 0.000015,
|
||||
"litellm_provider": "openai"
|
||||
},
|
||||
"tts-1-hd": {
|
||||
"mode": "audio_speech",
|
||||
"input_cost_per_character": 0.000030,
|
||||
"litellm_provider": "openai"
|
||||
},
|
||||
"azure/tts-1": {
|
||||
"mode": "audio_speech",
|
||||
"input_cost_per_character": 0.000015,
|
||||
"litellm_provider": "azure"
|
||||
},
|
||||
"azure/tts-1-hd": {
|
||||
"mode": "audio_speech",
|
||||
"input_cost_per_character": 0.000030,
|
||||
"litellm_provider": "azure"
|
||||
},
|
||||
"azure/whisper-1": {
|
||||
"mode": "audio_transcription",
|
||||
"input_cost_per_second": 0,
|
||||
|
|
|
@ -712,7 +712,6 @@ def test_vertex_ai_claude_completion_cost():
|
|||
assert cost == predicted_cost
|
||||
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_completion_cost_hidden_params(sync_mode):
|
||||
|
@ -732,6 +731,7 @@ async def test_completion_cost_hidden_params(sync_mode):
|
|||
assert "response_cost" in response._hidden_params
|
||||
assert isinstance(response._hidden_params["response_cost"], float)
|
||||
|
||||
|
||||
def test_vertex_ai_gemini_predict_cost():
|
||||
model = "gemini-1.5-flash"
|
||||
messages = [{"role": "user", "content": "Hey, hows it going???"}]
|
||||
|
@ -739,3 +739,16 @@ def test_vertex_ai_gemini_predict_cost():
|
|||
|
||||
assert predictive_cost > 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"])
|
||||
def test_completion_cost_tts(model):
|
||||
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||
|
||||
cost = completion_cost(
|
||||
model=model,
|
||||
prompt="the quick brown fox jumped over the lazy dogs",
|
||||
call_type="speech",
|
||||
)
|
||||
|
||||
assert cost > 0
|
||||
|
|
|
@ -4705,7 +4705,9 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
|||
)
|
||||
except Exception:
|
||||
raise Exception(
|
||||
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
||||
"This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json".format(
|
||||
model, custom_llm_provider
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -397,7 +397,27 @@
|
|||
"input_cost_per_second": 0,
|
||||
"output_cost_per_second": 0.0001,
|
||||
"litellm_provider": "openai"
|
||||
},
|
||||
},
|
||||
"tts-1": {
|
||||
"mode": "audio_speech",
|
||||
"input_cost_per_character": 0.000015,
|
||||
"litellm_provider": "openai"
|
||||
},
|
||||
"tts-1-hd": {
|
||||
"mode": "audio_speech",
|
||||
"input_cost_per_character": 0.000030,
|
||||
"litellm_provider": "openai"
|
||||
},
|
||||
"azure/tts-1": {
|
||||
"mode": "audio_speech",
|
||||
"input_cost_per_character": 0.000015,
|
||||
"litellm_provider": "azure"
|
||||
},
|
||||
"azure/tts-1-hd": {
|
||||
"mode": "audio_speech",
|
||||
"input_cost_per_character": 0.000030,
|
||||
"litellm_provider": "azure"
|
||||
},
|
||||
"azure/whisper-1": {
|
||||
"mode": "audio_transcription",
|
||||
"input_cost_per_second": 0,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue