Merge branch 'main' into litellm_gemini_stream_tool_calling

2025-04-27 03:34:10 +00:00 · 2024-07-06 19:07:31 -07:00 · 2024-07-06 19:07:31 -07:00 · c643be0c0c
commit c643be0c0c
parent 9f900a1bed 877521664f
70 changed files with 1844 additions and 984 deletions
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@ -24,6 +24,8 @@ from litellm.integrations.custom_logger import CustomLogger
 from litellm.litellm_core_utils.redact_messages import (
    redact_message_input_output_from_logging,
 )
+from litellm.types.llms.openai import HttpxBinaryResponseContent
+from litellm.types.router import SPECIAL_MODEL_INFO_PARAMS
 from litellm.types.utils import (
    CallTypes,
    EmbeddingResponse,
@ -517,33 +519,36 @@ class Logging:
            self.model_call_details["cache_hit"] = cache_hit
            ## if model in model cost map - log the response cost
            ## else set cost to None
-            verbose_logger.debug(f"Model={self.model};")
            if (
-                result is not None
-                and (
+                result is not None and self.stream is not True
+            ):  # handle streaming separately
+                if (
                    isinstance(result, ModelResponse)
                    or isinstance(result, EmbeddingResponse)
                    or isinstance(result, ImageResponse)
                    or isinstance(result, TranscriptionResponse)
                    or isinstance(result, TextCompletionResponse)
-                )
-                and self.stream != True
-            ):  # handle streaming separately
-                self.model_call_details["response_cost"] = (
-                    litellm.response_cost_calculator(
-                        response_object=result,
-                        model=self.model,
-                        cache_hit=self.model_call_details.get("cache_hit", False),
-                        custom_llm_provider=self.model_call_details.get(
-                            "custom_llm_provider", None
-                        ),
-                        base_model=_get_base_model_from_metadata(
-                            model_call_details=self.model_call_details
-                        ),
-                        call_type=self.call_type,
-                        optional_params=self.optional_params,
+                    or isinstance(result, HttpxBinaryResponseContent)  # tts
+                ):
+                    custom_pricing = use_custom_pricing_for_model(
+                        litellm_params=self.litellm_params
+                    )
+                    self.model_call_details["response_cost"] = (
+                        litellm.response_cost_calculator(
+                            response_object=result,
+                            model=self.model,
+                            cache_hit=self.model_call_details.get("cache_hit", False),
+                            custom_llm_provider=self.model_call_details.get(
+                                "custom_llm_provider", None
+                            ),
+                            base_model=_get_base_model_from_metadata(
+                                model_call_details=self.model_call_details
+                            ),
+                            call_type=self.call_type,
+                            optional_params=self.optional_params,
+                            custom_pricing=custom_pricing,
+                        )
                    )
-                )
            else:  # streaming chunks + image gen.
                self.model_call_details["response_cost"] = None

@ -1798,7 +1803,6 @@ def set_callbacks(callback_list, function_id=None):

    try:
        for callback in callback_list:
-            print_verbose(f"init callback list: {callback}")
            if callback == "sentry":
                try:
                    import sentry_sdk
@ -2016,3 +2020,17 @@ def get_custom_logger_compatible_class(
            if isinstance(callback, _PROXY_DynamicRateLimitHandler):
                return callback  # type: ignore
    return None
+
+
+def use_custom_pricing_for_model(litellm_params: Optional[dict]) -> bool:
+    if litellm_params is None:
+        return False
+    metadata: Optional[dict] = litellm_params.get("metadata", {})
+    if metadata is None:
+        return False
+    model_info: Optional[dict] = metadata.get("model_info", {})
+    if model_info is not None:
+        for k, v in model_info.items():
+            if k in SPECIAL_MODEL_INFO_PARAMS:
+                return True
+    return False
--- a/litellm/litellm_core_utils/llm_cost_calc/utils.py
+++ b/litellm/litellm_core_utils/llm_cost_calc/utils.py
@ -0,0 +1,85 @@
+# What is this?
+## Helper utilities for cost_per_token()
+
+import traceback
+from typing import List, Literal, Optional, Tuple
+
+import litellm
+from litellm import verbose_logger
+
+
+def _generic_cost_per_character(
+    model: str,
+    custom_llm_provider: str,
+    prompt_characters: float,
+    completion_characters: float,
+    custom_prompt_cost: Optional[float],
+    custom_completion_cost: Optional[float],
+) -> Tuple[Optional[float], Optional[float]]:
+    """
+    Generic function to help calculate cost per character.
+    """
+    """
+    Calculates the cost per character for a given model, input messages, and response object.
+
+    Input:
+        - model: str, the model name without provider prefix
+        - custom_llm_provider: str, "vertex_ai-*"
+        - prompt_characters: float, the number of input characters
+        - completion_characters: float, the number of output characters
+
+    Returns:
+        Tuple[Optional[float], Optional[float]] - prompt_cost_in_usd, completion_cost_in_usd. 
+        - returns None if not able to calculate cost.
+
+    Raises:
+        Exception if 'input_cost_per_character' or 'output_cost_per_character' is missing from model_info
+    """
+    args = locals()
+    ## GET MODEL INFO
+    model_info = litellm.get_model_info(
+        model=model, custom_llm_provider=custom_llm_provider
+    )
+
+    ## CALCULATE INPUT COST
+    try:
+        if custom_prompt_cost is None:
+            assert (
+                "input_cost_per_character" in model_info
+                and model_info["input_cost_per_character"] is not None
+            ), "model info for model={} does not have 'input_cost_per_character'-pricing\nmodel_info={}".format(
+                model, model_info
+            )
+            custom_prompt_cost = model_info["input_cost_per_character"]
+
+        prompt_cost = prompt_characters * custom_prompt_cost
+    except Exception as e:
+        verbose_logger.error(
+            "litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
+                str(e), traceback.format_exc()
+            )
+        )
+
+        prompt_cost = None
+
+    ## CALCULATE OUTPUT COST
+    try:
+        if custom_completion_cost is None:
+            assert (
+                "output_cost_per_character" in model_info
+                and model_info["output_cost_per_character"] is not None
+            ), "model info for model={} does not have 'output_cost_per_character'-pricing\nmodel_info={}".format(
+                model, model_info
+            )
+            custom_completion_cost = model_info["output_cost_per_character"]
+        completion_cost = completion_characters * custom_completion_cost
+    except Exception as e:
+        verbose_logger.error(
+            "litellm.litellm_core_utils.llm_cost_calc.utils.py::cost_per_character(): Exception occured - {}\n{}\nDefaulting to None".format(
+                str(e), traceback.format_exc()
+            )
+        )
+
+        completion_cost = None
+
+    return prompt_cost, completion_cost