mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
fix(cost_calculator.py): support openai+azure tts calls
This commit is contained in:
parent
d528b66db0
commit
407639cc7d
6 changed files with 191 additions and 5 deletions
|
@ -13,6 +13,7 @@ from litellm.litellm_core_utils.llm_cost_calc.google import (
|
||||||
from litellm.litellm_core_utils.llm_cost_calc.google import (
|
from litellm.litellm_core_utils.llm_cost_calc.google import (
|
||||||
cost_per_token as google_cost_per_token,
|
cost_per_token as google_cost_per_token,
|
||||||
)
|
)
|
||||||
|
from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_character
|
||||||
from litellm.utils import (
|
from litellm.utils import (
|
||||||
CallTypes,
|
CallTypes,
|
||||||
CostPerToken,
|
CostPerToken,
|
||||||
|
@ -62,6 +63,23 @@ def cost_per_token(
|
||||||
### CUSTOM PRICING ###
|
### CUSTOM PRICING ###
|
||||||
custom_cost_per_token: Optional[CostPerToken] = None,
|
custom_cost_per_token: Optional[CostPerToken] = None,
|
||||||
custom_cost_per_second: Optional[float] = None,
|
custom_cost_per_second: Optional[float] = None,
|
||||||
|
### CALL TYPE ###
|
||||||
|
call_type: Literal[
|
||||||
|
"embedding",
|
||||||
|
"aembedding",
|
||||||
|
"completion",
|
||||||
|
"acompletion",
|
||||||
|
"atext_completion",
|
||||||
|
"text_completion",
|
||||||
|
"image_generation",
|
||||||
|
"aimage_generation",
|
||||||
|
"moderation",
|
||||||
|
"amoderation",
|
||||||
|
"atranscription",
|
||||||
|
"transcription",
|
||||||
|
"aspeech",
|
||||||
|
"speech",
|
||||||
|
] = "completion",
|
||||||
) -> Tuple[float, float]:
|
) -> Tuple[float, float]:
|
||||||
"""
|
"""
|
||||||
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
Calculates the cost per token for a given model, prompt tokens, and completion tokens.
|
||||||
|
@ -76,6 +94,7 @@ def cost_per_token(
|
||||||
custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
|
custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list)
|
||||||
custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
|
custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call.
|
||||||
custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
|
custom_cost_per_second: Optional[float]: the cost per second for the llm api call.
|
||||||
|
call_type: Optional[str]: the call type
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
|
tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively.
|
||||||
|
@ -159,6 +178,27 @@ def cost_per_token(
|
||||||
prompt_tokens=prompt_tokens,
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=completion_tokens,
|
completion_tokens=completion_tokens,
|
||||||
)
|
)
|
||||||
|
elif call_type == "speech" or call_type == "aspeech":
|
||||||
|
prompt_cost, completion_cost = _generic_cost_per_character(
|
||||||
|
model=model_without_prefix,
|
||||||
|
custom_llm_provider=custom_llm_provider,
|
||||||
|
prompt_characters=prompt_characters,
|
||||||
|
completion_characters=completion_characters,
|
||||||
|
custom_prompt_cost=None,
|
||||||
|
custom_completion_cost=0,
|
||||||
|
)
|
||||||
|
if prompt_cost is None or completion_cost is None:
|
||||||
|
raise ValueError(
|
||||||
|
"cost for tts call is None. prompt_cost={}, completion_cost={}, model={}, custom_llm_provider={}, prompt_characters={}, completion_characters={}".format(
|
||||||
|
prompt_cost,
|
||||||
|
completion_cost,
|
||||||
|
model_without_prefix,
|
||||||
|
custom_llm_provider,
|
||||||
|
prompt_characters,
|
||||||
|
completion_characters,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return prompt_cost, completion_cost
|
||||||
elif model in model_cost_ref:
|
elif model in model_cost_ref:
|
||||||
print_verbose(f"Success: model={model} in model_cost_map")
|
print_verbose(f"Success: model={model} in model_cost_map")
|
||||||
print_verbose(
|
print_verbose(
|
||||||
|
@ -289,7 +329,7 @@ def cost_per_token(
|
||||||
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
|
||||||
else:
|
else:
|
||||||
# if model is not in model_prices_and_context_window.json. Raise an exception-let users know
|
# if model is not in model_prices_and_context_window.json. Raise an exception-let users know
|
||||||
error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
|
error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}, custom_llm_provider={custom_llm_provider}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n"
|
||||||
raise litellm.exceptions.NotFoundError( # type: ignore
|
raise litellm.exceptions.NotFoundError( # type: ignore
|
||||||
message=error_str,
|
message=error_str,
|
||||||
model=model,
|
model=model,
|
||||||
|
@ -535,6 +575,11 @@ def completion_cost(
|
||||||
raise Exception(
|
raise Exception(
|
||||||
f"Model={image_gen_model_name} not found in completion cost model map"
|
f"Model={image_gen_model_name} not found in completion cost model map"
|
||||||
)
|
)
|
||||||
|
elif (
|
||||||
|
call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value
|
||||||
|
):
|
||||||
|
prompt_characters = litellm.utils._count_characters(text=prompt)
|
||||||
|
|
||||||
# Calculate cost based on prompt_tokens, completion_tokens
|
# Calculate cost based on prompt_tokens, completion_tokens
|
||||||
if (
|
if (
|
||||||
"togethercomputer" in model
|
"togethercomputer" in model
|
||||||
|
@ -591,6 +636,7 @@ def completion_cost(
|
||||||
custom_cost_per_token=custom_cost_per_token,
|
custom_cost_per_token=custom_cost_per_token,
|
||||||
prompt_characters=prompt_characters,
|
prompt_characters=prompt_characters,
|
||||||
completion_characters=completion_characters,
|
completion_characters=completion_characters,
|
||||||
|
call_type=call_type,
|
||||||
)
|
)
|
||||||
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
_final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
|
||||||
print_verbose(
|
print_verbose(
|
||||||
|
|
85
litellm/litellm_core_utils/llm_cost_calc/utils.py
Normal file
85
litellm/litellm_core_utils/llm_cost_calc/utils.py
Normal file
|
@ -0,0 +1,85 @@
|
||||||
|
# What is this?
|
||||||
|
## Helper utilities for cost_per_token()
|
||||||
|
|
||||||
|
import traceback
|
||||||
|
from typing import List, Literal, Optional, Tuple
|
||||||
|
|
||||||
|
import litellm
|
||||||
|
from litellm import verbose_logger
|
||||||
|
|
||||||
|
|
||||||
|
def _generic_cost_per_character(
|
||||||
|
model: str,
|
||||||
|
custom_llm_provider: str,
|
||||||
|
prompt_characters: float,
|
||||||
|
completion_characters: float,
|
||||||
|
custom_prompt_cost: Optional[float],
|
||||||
|
custom_completion_cost: Optional[float],
|
||||||
|
) -> Tuple[Optional[float], Optional[float]]:
|
||||||
|
"""
|
||||||
|
Generic function to help calculate cost per character.
|
||||||
|
"""
|
||||||
|
"""
|
||||||
|
Calculates the cost per character for a given model, input messages, and response object.
|
||||||
|
|
||||||
|
Input:
|
||||||
|
- model: str, the model name without provider prefix
|
||||||
|
- custom_llm_provider: str, "vertex_ai-*"
|
||||||
|
- prompt_characters: float, the number of input characters
|
||||||
|
- completion_characters: float, the number of output characters
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd.
|
||||||
|
- returns None if not able to calculate cost.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception if 'input_cost_per_character' or 'output_cost_per_character' is missing from model_info
|
||||||
|
"""
|
||||||
|
args = locals()
|
||||||
|
## GET MODEL INFO
|
||||||
|
model_info = litellm.get_model_info(
|
||||||
|
model=model, custom_llm_provider=custom_llm_provider
|
||||||
|
)
|
||||||
|
|
||||||
|
## CALCULATE INPUT COST
|
||||||
|
try:
|
||||||
|
if custom_prompt_cost is None:
|
||||||
|
assert (
|
||||||
|
"input_cost_per_character" in model_info
|
||||||
|
and model_info["input_cost_per_character"] is not None
|
||||||
|
), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
|
||||||
|
model, model_info
|
||||||
|
)
|
||||||
|
custom_prompt_cost = model_info["input_cost_per_character"]
|
||||||
|
|
||||||
|
prompt_cost = prompt_characters * custom_prompt_cost
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(
|
||||||
|
"litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
|
||||||
|
str(e), traceback.format_exc()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt_cost = None
|
||||||
|
|
||||||
|
## CALCULATE OUTPUT COST
|
||||||
|
try:
|
||||||
|
if custom_completion_cost is None:
|
||||||
|
assert (
|
||||||
|
"output_cost_per_character" in model_info
|
||||||
|
and model_info["output_cost_per_character"] is not None
|
||||||
|
), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
|
||||||
|
model, model_info
|
||||||
|
)
|
||||||
|
custom_completion_cost = model_info["output_cost_per_character"]
|
||||||
|
completion_cost = completion_characters * custom_completion_cost
|
||||||
|
except Exception as e:
|
||||||
|
verbose_logger.error(
|
||||||
|
"litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
|
||||||
|
str(e), traceback.format_exc()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
completion_cost = None
|
||||||
|
|
||||||
|
return prompt_cost, completion_cost
|
|
@ -398,6 +398,26 @@
|
||||||
"output_cost_per_second": 0.0001,
|
"output_cost_per_second": 0.0001,
|
||||||
"litellm_provider": "openai"
|
"litellm_provider": "openai"
|
||||||
},
|
},
|
||||||
|
"tts-1": {
|
||||||
|
"mode": "audio_speech",
|
||||||
|
"input_cost_per_character": 0.000015,
|
||||||
|
"litellm_provider": "openai"
|
||||||
|
},
|
||||||
|
"tts-1-hd": {
|
||||||
|
"mode": "audio_speech",
|
||||||
|
"input_cost_per_character": 0.000030,
|
||||||
|
"litellm_provider": "openai"
|
||||||
|
},
|
||||||
|
"azure/tts-1": {
|
||||||
|
"mode": "audio_speech",
|
||||||
|
"input_cost_per_character": 0.000015,
|
||||||
|
"litellm_provider": "azure"
|
||||||
|
},
|
||||||
|
"azure/tts-1-hd": {
|
||||||
|
"mode": "audio_speech",
|
||||||
|
"input_cost_per_character": 0.000030,
|
||||||
|
"litellm_provider": "azure"
|
||||||
|
},
|
||||||
"azure/whisper-1": {
|
"azure/whisper-1": {
|
||||||
"mode": "audio_transcription",
|
"mode": "audio_transcription",
|
||||||
"input_cost_per_second": 0,
|
"input_cost_per_second": 0,
|
||||||
|
|
|
@ -712,7 +712,6 @@ def test_vertex_ai_claude_completion_cost():
|
||||||
assert cost == predicted_cost
|
assert cost == predicted_cost
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("sync_mode", [True, False])
|
@pytest.mark.parametrize("sync_mode", [True, False])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_completion_cost_hidden_params(sync_mode):
|
async def test_completion_cost_hidden_params(sync_mode):
|
||||||
|
@ -732,6 +731,7 @@ async def test_completion_cost_hidden_params(sync_mode):
|
||||||
assert "response_cost" in response._hidden_params
|
assert "response_cost" in response._hidden_params
|
||||||
assert isinstance(response._hidden_params["response_cost"], float)
|
assert isinstance(response._hidden_params["response_cost"], float)
|
||||||
|
|
||||||
|
|
||||||
def test_vertex_ai_gemini_predict_cost():
|
def test_vertex_ai_gemini_predict_cost():
|
||||||
model = "gemini-1.5-flash"
|
model = "gemini-1.5-flash"
|
||||||
messages = [{"role": "user", "content": "Hey, hows it going???"}]
|
messages = [{"role": "user", "content": "Hey, hows it going???"}]
|
||||||
|
@ -739,3 +739,16 @@ def test_vertex_ai_gemini_predict_cost():
|
||||||
|
|
||||||
assert predictive_cost > 0
|
assert predictive_cost > 0
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"])
|
||||||
|
def test_completion_cost_tts(model):
|
||||||
|
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
|
||||||
|
litellm.model_cost = litellm.get_model_cost_map(url="")
|
||||||
|
|
||||||
|
cost = completion_cost(
|
||||||
|
model=model,
|
||||||
|
prompt="the quick brown fox jumped over the lazy dogs",
|
||||||
|
call_type="speech",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert cost > 0
|
||||||
|
|
|
@ -4705,7 +4705,9 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod
|
||||||
)
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json"
|
"This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json".format(
|
||||||
|
model, custom_llm_provider
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -398,6 +398,26 @@
|
||||||
"output_cost_per_second": 0.0001,
|
"output_cost_per_second": 0.0001,
|
||||||
"litellm_provider": "openai"
|
"litellm_provider": "openai"
|
||||||
},
|
},
|
||||||
|
"tts-1": {
|
||||||
|
"mode": "audio_speech",
|
||||||
|
"input_cost_per_character": 0.000015,
|
||||||
|
"litellm_provider": "openai"
|
||||||
|
},
|
||||||
|
"tts-1-hd": {
|
||||||
|
"mode": "audio_speech",
|
||||||
|
"input_cost_per_character": 0.000030,
|
||||||
|
"litellm_provider": "openai"
|
||||||
|
},
|
||||||
|
"azure/tts-1": {
|
||||||
|
"mode": "audio_speech",
|
||||||
|
"input_cost_per_character": 0.000015,
|
||||||
|
"litellm_provider": "azure"
|
||||||
|
},
|
||||||
|
"azure/tts-1-hd": {
|
||||||
|
"mode": "audio_speech",
|
||||||
|
"input_cost_per_character": 0.000030,
|
||||||
|
"litellm_provider": "azure"
|
||||||
|
},
|
||||||
"azure/whisper-1": {
|
"azure/whisper-1": {
|
||||||
"mode": "audio_transcription",
|
"mode": "audio_transcription",
|
||||||
"input_cost_per_second": 0,
|
"input_cost_per_second": 0,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue