From 3f965df68b03e431be99df82b00a9811d544ac8f Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Thu, 11 Jul 2024 11:52:18 -0700 Subject: [PATCH] fix(llm_cost_calc/google.py): fix google embedding cost calculation Fixes https://github.com/BerriAI/litellm/issues/4630 --- litellm/cost_calculator.py | 30 +++++--- litellm/integrations/slack_alerting.py | 6 +- litellm/litellm_core_utils/litellm_logging.py | 2 +- .../llm_cost_calc/google.py | 28 ++++++- litellm/proxy/_new_secret_config.yaml | 12 ++- litellm/tests/test_completion_cost.py | 73 +++++++++++++++++++ 6 files changed, 133 insertions(+), 18 deletions(-) diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py index 0bc65a7f1d..13a9e4bdc6 100644 --- a/litellm/cost_calculator.py +++ b/litellm/cost_calculator.py @@ -15,10 +15,12 @@ from litellm.litellm_core_utils.llm_cost_calc.google import ( from litellm.litellm_core_utils.llm_cost_calc.google import ( cost_per_token as google_cost_per_token, ) +from litellm.litellm_core_utils.llm_cost_calc.google import ( + cost_router as google_cost_router, +) from litellm.litellm_core_utils.llm_cost_calc.utils import _generic_cost_per_character from litellm.types.llms.openai import HttpxBinaryResponseContent from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS - from litellm.utils import ( CallTypes, CostPerToken, @@ -160,22 +162,32 @@ def cost_per_token( # see this https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models print_verbose(f"Looking up model={model} in model_cost_map") - if custom_llm_provider == "vertex_ai" and "claude" in model: - return google_cost_per_token( - model=model_without_prefix, - custom_llm_provider=custom_llm_provider, - prompt_tokens=prompt_tokens, - completion_tokens=completion_tokens, - ) if custom_llm_provider == "vertex_ai": - return google_cost_per_character( + cost_router = google_cost_router( model=model_without_prefix, custom_llm_provider=custom_llm_provider, prompt_characters=prompt_characters, completion_characters=completion_characters, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens, + call_type=call_type, ) + if cost_router == "cost_per_character": + return google_cost_per_character( + model=model_without_prefix, + custom_llm_provider=custom_llm_provider, + prompt_characters=prompt_characters, + completion_characters=completion_characters, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + ) + elif cost_router == "cost_per_token": + return google_cost_per_token( + model=model_without_prefix, + custom_llm_provider=custom_llm_provider, + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + ) elif custom_llm_provider == "gemini": return google_cost_per_token( model=model_without_prefix, diff --git a/litellm/integrations/slack_alerting.py b/litellm/integrations/slack_alerting.py index 437e8ce135..b7b62b61f6 100644 --- a/litellm/integrations/slack_alerting.py +++ b/litellm/integrations/slack_alerting.py @@ -1530,9 +1530,9 @@ Model Info: """Log deployment latency""" try: if "daily_reports" in self.alert_types: - model_id = ( - kwargs.get("litellm_params", {}).get("model_info", {}).get("id", "") - ) + litellm_params = kwargs.get("litellm_params", {}) or {} + model_info = litellm_params.get("model_info", {}) or {} + model_id = model_info.get("id", "") or "" response_s: timedelta = end_time - start_time final_value = response_s diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 0271c57147..0edc90325d 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -1275,7 +1275,7 @@ class Logging: f"Model={self.model}; cost={self.model_call_details['response_cost']}" ) except litellm.NotFoundError as e: - verbose_logger.error( + verbose_logger.warning( f"Model={self.model} not found in completion cost map. Setting 'response_cost' to None" ) self.model_call_details["response_cost"] = None diff --git a/litellm/litellm_core_utils/llm_cost_calc/google.py b/litellm/litellm_core_utils/llm_cost_calc/google.py index 2c958cf88a..76da0da51e 100644 --- a/litellm/litellm_core_utils/llm_cost_calc/google.py +++ b/litellm/litellm_core_utils/llm_cost_calc/google.py @@ -1,7 +1,7 @@ # What is this? ## Cost calculation for Google AI Studio / Vertex AI models import traceback -from typing import List, Literal, Optional, Tuple +from typing import List, Literal, Optional, Tuple, Union import litellm from litellm import verbose_logger @@ -29,6 +29,32 @@ def _is_above_128k(tokens: float) -> bool: return False +def cost_router( + model: str, + custom_llm_provider: str, + prompt_tokens: float, + completion_tokens: float, + prompt_characters: float, + completion_characters: float, + call_type: Union[Literal["embedding", "aembedding"], str], +) -> Literal["cost_per_character", "cost_per_token"]: + """ + Route the cost calc to the right place, based on model/call_type/etc. + + Returns + - str, the specific google cost calc function it should route to. + """ + if custom_llm_provider == "vertex_ai" and "claude" in model: + return "cost_per_token" + elif custom_llm_provider == "gemini": + return "cost_per_token" + elif custom_llm_provider == "vertex_ai" and ( + call_type == "embedding" or call_type == "aembedding" + ): + return "cost_per_token" + return "cost_per_character" + + def cost_per_character( model: str, custom_llm_provider: str, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 0f1f981d7a..a8c9e88233 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -1,10 +1,14 @@ model_list: - - model_name: "*" + - model_name: azure-ai-mistral litellm_params: - model: "openai/*" - - model_name: claude-3-5-sonnet-20240620 + api_base: os.environ/AZURE_AI_MISTRAL_API_BASE + api_key: os.environ/AZURE_AI_MISTRAL_API_KEY + model: azure_ai/Mistral-large-nmefg + - model_name: azure-ai-phi litellm_params: - model: gpt-3.5-turbo + api_base: os.environ/AZURE_AI_PHI_API_BASE + api_key: os.environ/AZURE_AI_PHI_API_KEY + model: azure_ai/Phi-3-medium-128k-instruct-fpmvj general_settings: diff --git a/litellm/tests/test_completion_cost.py b/litellm/tests/test_completion_cost.py index 1b4df0ecc0..5d30ccb036 100644 --- a/litellm/tests/test_completion_cost.py +++ b/litellm/tests/test_completion_cost.py @@ -712,6 +712,79 @@ def test_vertex_ai_claude_completion_cost(): assert cost == predicted_cost +def test_vertex_ai_embedding_completion_cost(caplog): + """ + Relevant issue - https://github.com/BerriAI/litellm/issues/4630 + """ + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + text = "The quick brown fox jumps over the lazy dog." + input_tokens = litellm.token_counter( + model="vertex_ai/textembedding-gecko", text=text + ) + + model_info = litellm.get_model_info(model="vertex_ai/textembedding-gecko") + + print("\nExpected model info:\n{}\n\n".format(model_info)) + + expected_input_cost = input_tokens * model_info["input_cost_per_token"] + + ## CALCULATED COST + calculated_input_cost, calculated_output_cost = cost_per_token( + model="textembedding-gecko", + custom_llm_provider="vertex_ai", + prompt_tokens=input_tokens, + call_type="aembedding", + ) + + assert round(expected_input_cost, 6) == round(calculated_input_cost, 6) + print("expected_input_cost: {}".format(expected_input_cost)) + print("calculated_input_cost: {}".format(calculated_input_cost)) + + captured_logs = [rec.message for rec in caplog.records] + for item in captured_logs: + print("\nitem:{}\n".format(item)) + if ( + "litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Exception occured " + in item + ): + raise Exception("Error log raised for calculating embedding cost") + + +# def test_vertex_ai_embedding_completion_cost_e2e(): +# """ +# Relevant issue - https://github.com/BerriAI/litellm/issues/4630 +# """ +# from litellm.tests.test_amazing_vertex_completion import load_vertex_ai_credentials + +# load_vertex_ai_credentials() +# os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" +# litellm.model_cost = litellm.get_model_cost_map(url="") + +# text = "The quick brown fox jumps over the lazy dog." +# input_tokens = litellm.token_counter( +# model="vertex_ai/textembedding-gecko", text=text +# ) + +# model_info = litellm.get_model_info(model="vertex_ai/textembedding-gecko") + +# print("\nExpected model info:\n{}\n\n".format(model_info)) + +# expected_input_cost = input_tokens * model_info["input_cost_per_token"] + +# ## CALCULATED COST +# resp = litellm.embedding(model="textembedding-gecko", input=[text]) + +# calculated_input_cost = resp._hidden_params["response_cost"] + +# assert round(expected_input_cost, 6) == round(calculated_input_cost, 6) +# print("expected_input_cost: {}".format(expected_input_cost)) +# print("calculated_input_cost: {}".format(calculated_input_cost)) + +# assert False + + @pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.asyncio async def test_completion_cost_hidden_params(sync_mode):