From 6518bc70a0ae2c76e105dac14bf01955b68f40cf Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Sun, 12 Jan 2025 15:27:54 -0800
Subject: [PATCH] (litellm SDK perf improvement) - use `verbose_logger.debug`
 and `_cached_get_model_info_helper` in `_response_cost_calculator`  (#7720)

* define _cached_get_model_info_helper

* use _cached_get_model_info_helper
---
 litellm/cost_calculator.py | 26 +++++++++++++++++---------
 litellm/utils.py           | 12 ++++++++++++
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/litellm/cost_calculator.py b/litellm/cost_calculator.py
index 93961b1372..0a428df90e 100644
--- a/litellm/cost_calculator.py
+++ b/litellm/cost_calculator.py
@@ -60,8 +60,7 @@ from litellm.utils import (
     ModelResponse,
     TextCompletionResponse,
     TranscriptionResponse,
-    _get_model_info_helper,
-    print_verbose,
+    _cached_get_model_info_helper,
     token_counter,
 )
 
@@ -279,7 +278,7 @@ def cost_per_token(  # noqa: PLR0915
     elif custom_llm_provider == "deepseek":
         return deepseek_cost_per_token(model=model, usage=usage_block)
     else:
-        model_info = _get_model_info_helper(
+        model_info = _cached_get_model_info_helper(
             model=model, custom_llm_provider=custom_llm_provider
         )
 
@@ -292,8 +291,11 @@ def cost_per_token(  # noqa: PLR0915
             model_info.get("input_cost_per_second", None) is not None
             and response_time_ms is not None
         ):
-            print_verbose(
-                f"For model={model} - input_cost_per_second: {model_info.get('input_cost_per_second')}; response time: {response_time_ms}"
+            verbose_logger.debug(
+                "For model=%s - input_cost_per_second: %s; response time: %s",
+                model,
+                model_info.get("input_cost_per_second", None),
+                response_time_ms,
             )
             ## COST PER SECOND ##
             prompt_tokens_cost_usd_dollar = (
@@ -308,16 +310,22 @@ def cost_per_token(  # noqa: PLR0915
             model_info.get("output_cost_per_second", None) is not None
             and response_time_ms is not None
         ):
-            print_verbose(
-                f"For model={model} - output_cost_per_second: {model_info.get('output_cost_per_second')}; response time: {response_time_ms}"
+            verbose_logger.debug(
+                "For model=%s - output_cost_per_second: %s; response time: %s",
+                model,
+                model_info.get("output_cost_per_second", None),
+                response_time_ms,
             )
             ## COST PER SECOND ##
             completion_tokens_cost_usd_dollar = (
                 model_info["output_cost_per_second"] * response_time_ms / 1000  # type: ignore
             )
 
-        print_verbose(
-            f"Returned custom cost for model={model} - prompt_tokens_cost_usd_dollar: {prompt_tokens_cost_usd_dollar}, completion_tokens_cost_usd_dollar: {completion_tokens_cost_usd_dollar}"
+        verbose_logger.debug(
+            "Returned custom cost for model=%s - prompt_tokens_cost_usd_dollar: %s, completion_tokens_cost_usd_dollar: %s",
+            model,
+            prompt_tokens_cost_usd_dollar,
+            completion_tokens_cost_usd_dollar,
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
 
diff --git a/litellm/utils.py b/litellm/utils.py
index c08253a630..97dc8537ea 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -4013,6 +4013,18 @@ def _get_max_position_embeddings(model_name: str) -> Optional[int]:
         return None
 
 
+@lru_cache(maxsize=16)
+def _cached_get_model_info_helper(
+    model: str, custom_llm_provider: Optional[str]
+) -> ModelInfoBase:
+    """
+    _get_model_info_helper wrapped with lru_cache
+
+    Speed Optimization to hit high RPS
+    """
+    return _get_model_info_helper(model=model, custom_llm_provider=custom_llm_provider)
+
+
 def _get_model_info_helper(  # noqa: PLR0915
     model: str, custom_llm_provider: Optional[str] = None
 ) -> ModelInfoBase: