(litellm SDK perf improvement) - use verbose_logger.debug and _cached_get_model_info_helper in _response_cost_calculator (#7720)

* define _cached_get_model_info_helper * use _cached_get_model_info_helper
2025-04-25 10:44:24 +00:00 · 2025-01-12 15:27:54 -08:00 · 2025-01-12 15:27:54 -08:00 · 6518bc70a0
commit 6518bc70a0
parent 15b52039d2
2 changed files with 29 additions and 9 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -4013,6 +4013,18 @@ def _get_max_position_embeddings(model_name: str) -> Optional[int]:
        return None


+@lru_cache(maxsize=16)
+def _cached_get_model_info_helper(
+    model: str, custom_llm_provider: Optional[str]
+) -> ModelInfoBase:
+    """
+    _get_model_info_helper wrapped with lru_cache
+
+    Speed Optimization to hit high RPS
+    """
+    return _get_model_info_helper(model=model, custom_llm_provider=custom_llm_provider)
+
+
 def _get_model_info_helper(  # noqa: PLR0915
    model: str, custom_llm_provider: Optional[str] = None
 ) -> ModelInfoBase: