feat(llm_cost_calc/google.py): do character based cost calculation for vertex ai

Calculate cost for vertex ai responses using characters in query/response Closes https://github.com/BerriAI/litellm/issues/4165
2024-06-19 17:18:42 -07:00 · 2024-06-19 17:18:42 -07:00 · 16da21e839
commit 16da21e839
parent cab057da4a
5 changed files with 287 additions and 17 deletions
--- a/litellm/litellm_core_utils/llm_cost_calc/google.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/google.py
@ -1,8 +1,10 @@
 # What is this?
 ## Cost calculation for Google AI Studio / Vertex AI models
-from typing import Literal, Tuple
+import traceback
+from typing import List, Literal, Optional, Tuple

 import litellm
+from litellm import verbose_logger

 """
 Gemini pricing covers: 
@ -12,6 +14,12 @@ Gemini pricing covers:
 - video
 """

+"""
+Vertex AI -> character based pricing 
+
+Google AI Studio -> token based pricing
+"""
+
 models_without_dynamic_pricing = ["gemini-1.0-pro", "gemini-pro"]


@ -21,6 +29,124 @@ def _is_above_128k(tokens: float) -> bool:
    return False


+def cost_per_character(
+    model: str,
+    custom_llm_provider: str,
+    prompt_tokens: float,
+    completion_tokens: float,
+    prompt_characters: float,
+    completion_characters: float,
+) -> Tuple[float, float]:
+    """
+    Calculates the cost per character for a given VertexAI model, input messages, and response object.
+
+    Input:
+        - model: str, the model name without provider prefix
+        - custom_llm_provider: str, "vertex_ai-*"
+        - prompt_characters: float, the number of input characters
+        - completion_characters: float, the number of output characters
+
+    Returns:
+        Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
+
+    Raises:
+        Exception if model requires >128k pricing, but model cost not mapped
+    """
+    model_info = litellm.get_model_info(
+        model=model, custom_llm_provider=custom_llm_provider
+    )
+
+    ## GET MODEL INFO
+    model_info = litellm.get_model_info(
+        model=model, custom_llm_provider=custom_llm_provider
+    )
+
+    ## CALCULATE INPUT COST
+    try:
+        if (
+            _is_above_128k(tokens=prompt_characters * 4)  # 1 token = 4 char
+            and model not in models_without_dynamic_pricing
+        ):
+            ## check if character pricing, else default to token pricing
+            assert (
+                "input_cost_per_character_above_128k_tokens" in model_info
+                and model_info["input_cost_per_character_above_128k_tokens"] is not None
+            ), "model info for model={} does not have 'input_cost_per_character_above_128k_tokens'-pricing for > 128k tokens\nmodel_info={}".format(
+                model, model_info
+            )
+            prompt_cost = (
+                prompt_characters
+                * model_info["input_cost_per_character_above_128k_tokens"]
+            )
+        else:
+            assert (
+                "input_cost_per_character" in model_info
+                and model_info["input_cost_per_character"] is not None
+            ), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
+                model, model_info
+            )
+            prompt_cost = prompt_characters * model_info["input_cost_per_character"]
+    except Exception as e:
+        verbose_logger.error(
+            "litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Exception occured - {}\n{}\n\
+                Defaulting to (cost_per_token * 4) calculation for prompt_cost".format(
+                str(e), traceback.format_exc()
+            )
+        )
+        initial_prompt_cost, _ = cost_per_token(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+        )
+
+        prompt_cost = initial_prompt_cost * 4
+
+    ## CALCULATE OUTPUT COST
+    try:
+        if (
+            _is_above_128k(tokens=completion_characters * 4)  # 1 token = 4 char
+            and model not in models_without_dynamic_pricing
+        ):
+            assert (
+                "output_cost_per_character_above_128k_tokens" in model_info
+                and model_info["output_cost_per_character_above_128k_tokens"]
+                is not None
+            ), "model info for model={} does not have 'output_cost_per_character_above_128k_tokens' pricing\nmodel_info={}".format(
+                model, model_info
+            )
+            completion_cost = (
+                completion_tokens
+                * model_info["output_cost_per_character_above_128k_tokens"]
+            )
+        else:
+            assert (
+                "output_cost_per_character" in model_info
+                and model_info["output_cost_per_character"] is not None
+            ), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
+                model, model_info
+            )
+            completion_cost = (
+                completion_tokens * model_info["output_cost_per_character"]
+            )
+    except Exception as e:
+        verbose_logger.error(
+            "litellm.litellm_core_utils.llm_cost_calc.google.cost_per_character(): Exception occured - {}\n{}\n\
+                Defaulting to (cost_per_token * 4) calculation for completion_cost".format(
+                str(e), traceback.format_exc()
+            )
+        )
+        _, initial_completion_cost = cost_per_token(
+            model=model,
+            custom_llm_provider=custom_llm_provider,
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+        )
+
+        completion_cost = initial_completion_cost * 4
+    return prompt_cost, completion_cost
+
+
 def cost_per_token(
    model: str,
    custom_llm_provider: str,
@ -53,7 +179,8 @@ def cost_per_token(
        and model not in models_without_dynamic_pricing
    ):
        assert (
-            model_info["input_cost_per_token_above_128k_tokens"] is not None
+            "input_cost_per_token_above_128k_tokens" in model_info
+            and model_info["input_cost_per_token_above_128k_tokens"] is not None
        ), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
            model, model_info
        )
@ -69,7 +196,8 @@ def cost_per_token(
        and model not in models_without_dynamic_pricing
    ):
        assert (
-            model_info["output_cost_per_token_above_128k_tokens"] is not None
+            "output_cost_per_token_above_128k_tokens" in model_info
+            and model_info["output_cost_per_token_above_128k_tokens"] is not None
        ), "model info for model={} does not have pricing for > 128k tokens\nmodel_info={}".format(
            model, model_info
        )