diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 062e98be9..e4963a6f1 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -13,6 +13,7 @@ from litellm.litellm_core_utils.llm_cost_calc.google import ( from litellm.litellm_core_utils.llm_cost_calc.google import ( cost_per_token as google_cost_per_token, ) +from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_character from litellm.utils import ( CallTypes, CostPerToken, @@ -62,6 +63,23 @@ def cost_per_token( ### CUSTOM PRICING ### custom_cost_per_token: Optional[CostPerToken] = None, custom_cost_per_second: Optional[float] = None, + ### CALL TYPE ### + call_type: Literal[ + "embedding", + "aembedding", + "completion", + "acompletion", + "atext_completion", + "text_completion", + "image_generation", + "aimage_generation", + "moderation", + "amoderation", + "atranscription", + "transcription", + "aspeech", + "speech", + ] = "completion", ) -> Tuple[float, float]: """ Calculates the cost per token for a given model, prompt tokens, and completion tokens. @@ -76,6 +94,7 @@ def cost_per_token( custom_llm_provider (str): The llm provider to whom the call was made (see init.py for full list) custom_cost_per_token: Optional[CostPerToken]: the cost per input + output token for the llm api call. custom_cost_per_second: Optional[float]: the cost per second for the llm api call. + call_type: Optional[str]: the call type Returns: tuple: A tuple containing the cost in USD dollars for prompt tokens and completion tokens, respectively. @@ -159,6 +178,27 @@ def cost_per_token( prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, ) + elif call_type == "speech" or call_type == "aspeech": + prompt_cost, completion_cost = _generic_cost_per_character( + model=model_without_prefix, + custom_llm_provider=custom_llm_provider, + prompt_characters=prompt_characters, + completion_characters=completion_characters, + custom_prompt_cost=None, + custom_completion_cost=0, + ) + if prompt_cost is None or completion_cost is None: + raise ValueError( + "cost for tts call is None. prompt_cost={}, completion_cost={}, model={}, custom_llm_provider={}, prompt_characters={}, completion_characters={}".format( + prompt_cost, + completion_cost, + model_without_prefix, + custom_llm_provider, + prompt_characters, + completion_characters, + ) + ) + return prompt_cost, completion_cost elif model in model_cost_ref: print_verbose(f"Success: model={model} in model_cost_map") print_verbose( @@ -289,7 +329,7 @@ def cost_per_token( return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar else: # if model is not in model_prices_and_context_window.json. Raise an exception-let users know - error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n" + error_str = f"Model not in model_prices_and_context_window.json. You passed model={model}, custom_llm_provider={custom_llm_provider}. Register pricing for model - https://docs.litellm.ai/docs/proxy/custom_pricing\n" raise litellm.exceptions.NotFoundError( # type: ignore message=error_str, model=model, @@ -535,6 +575,11 @@ def completion_cost( raise Exception( f"Model={image_gen_model_name} not found in completion cost model map" ) + elif ( + call_type == CallTypes.speech.value or call_type == CallTypes.aspeech.value + ): + prompt_characters = litellm.utils._count_characters(text=prompt) + # Calculate cost based on prompt_tokens, completion_tokens if ( "togethercomputer" in model @@ -591,6 +636,7 @@ def completion_cost( custom_cost_per_token=custom_cost_per_token, prompt_characters=prompt_characters, completion_characters=completion_characters, + call_type=call_type, ) _final_cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar print_verbose( diff --git a/litellm/litellm_core_utils/llm_cost_calc/utils.py b/litellm/litellm_core_utils/llm_cost_calc/utils.py new file mode 100644 index 000000000..e986a22a6 --- /dev/null +++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py @@ -0,0 +1,85 @@ +# What is this? +## Helper utilities for cost_per_token() + +import traceback +from typing import List, Literal, Optional, Tuple + +import litellm +from litellm import verbose_logger + + +def _generic_cost_per_character( + model: str, + custom_llm_provider: str, + prompt_characters: float, + completion_characters: float, + custom_prompt_cost: Optional[float], + custom_completion_cost: Optional[float], +) -> Tuple[Optional[float], Optional[float]]: + """ + Generic function to help calculate cost per character. + """ + """ + Calculates the cost per character for a given model, input messages, and response object. + + Input: + - model: str, the model name without provider prefix + - custom_llm_provider: str, "vertex_ai-*" + - prompt_characters: float, the number of input characters + - completion_characters: float, the number of output characters + + Returns: + Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd. + - returns None if not able to calculate cost. + + Raises: + Exception if 'input_cost_per_character' or 'output_cost_per_character' is missing from model_info + """ + args = locals() + ## GET MODEL INFO + model_info = litellm.get_model_info( + model=model, custom_llm_provider=custom_llm_provider + ) + + ## CALCULATE INPUT COST + try: + if custom_prompt_cost is None: + assert ( + "input_cost_per_character" in model_info + and model_info["input_cost_per_character"] is not None + ), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format( + model, model_info + ) + custom_prompt_cost = model_info["input_cost_per_character"] + + prompt_cost = prompt_characters * custom_prompt_cost + except Exception as e: + verbose_logger.error( + "litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format( + str(e), traceback.format_exc() + ) + ) + + prompt_cost = None + + ## CALCULATE OUTPUT COST + try: + if custom_completion_cost is None: + assert ( + "output_cost_per_character" in model_info + and model_info["output_cost_per_character"] is not None + ), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format( + model, model_info + ) + custom_completion_cost = model_info["output_cost_per_character"] + completion_cost = completion_characters * custom_completion_cost + except Exception as e: + verbose_logger.error( + "litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format( + str(e), traceback.format_exc() + ) + ) + + completion_cost = None + + return prompt_cost, completion_cost diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 34b161344..be2fab51d 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -397,7 +397,27 @@ "input_cost_per_second": 0, "output_cost_per_second": 0.0001, "litellm_provider": "openai" - }, + }, + "tts-1": { + "mode": "audio_speech", + "input_cost_per_character": 0.000015, + "litellm_provider": "openai" + }, + "tts-1-hd": { + "mode": "audio_speech", + "input_cost_per_character": 0.000030, + "litellm_provider": "openai" + }, + "azure/tts-1": { + "mode": "audio_speech", + "input_cost_per_character": 0.000015, + "litellm_provider": "azure" + }, + "azure/tts-1-hd": { + "mode": "audio_speech", + "input_cost_per_character": 0.000030, + "litellm_provider": "azure" + }, "azure/whisper-1": { "mode": "audio_transcription", "input_cost_per_second": 0, diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py index bffb68e0e..1b4df0ecc 100644 --- a/litellm/tests/test_completion_cost.py +++ b/litellm/tests/test_completion_cost.py @@ -712,7 +712,6 @@ def test_vertex_ai_claude_completion_cost(): assert cost == predicted_cost - @pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.asyncio async def test_completion_cost_hidden_params(sync_mode): @@ -732,6 +731,7 @@ async def test_completion_cost_hidden_params(sync_mode): assert "response_cost" in response._hidden_params assert isinstance(response._hidden_params["response_cost"], float) + def test_vertex_ai_gemini_predict_cost(): model = "gemini-1.5-flash" messages = [{"role": "user", "content": "Hey, hows it going???"}] @@ -739,3 +739,16 @@ def test_vertex_ai_gemini_predict_cost(): assert predictive_cost > 0 + +@pytest.mark.parametrize("model", ["openai/tts-1", "azure/tts-1"]) +def test_completion_cost_tts(model): + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + cost = completion_cost( + model=model, + prompt="the quick brown fox jumped over the lazy dogs", + call_type="speech", + ) + + assert cost > 0 diff --git a/litellm/utils.py b/litellm/utils.py index 490b809a1..50e31053d 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4705,7 +4705,9 @@ def get_model_info(model: str, custom_llm_provider: Optional[str] = None) -> Mod ) except Exception: raise Exception( - "This model isn't mapped yet. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" + "This model isn't mapped yet. model={}, custom_llm_provider={}. Add it here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json".format( + model, custom_llm_provider + ) ) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 34b161344..be2fab51d 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -397,7 +397,27 @@ "input_cost_per_second": 0, "output_cost_per_second": 0.0001, "litellm_provider": "openai" - }, + }, + "tts-1": { + "mode": "audio_speech", + "input_cost_per_character": 0.000015, + "litellm_provider": "openai" + }, + "tts-1-hd": { + "mode": "audio_speech", + "input_cost_per_character": 0.000030, + "litellm_provider": "openai" + }, + "azure/tts-1": { + "mode": "audio_speech", + "input_cost_per_character": 0.000015, + "litellm_provider": "azure" + }, + "azure/tts-1-hd": { + "mode": "audio_speech", + "input_cost_per_character": 0.000030, + "litellm_provider": "azure" + }, "azure/whisper-1": { "mode": "audio_transcription", "input_cost_per_second": 0,