diff --git a/litellm/caching/_internal_lru_cache.py b/litellm/caching/_internal_lru_cache.py
new file mode 100644
index 0000000000..54b0fe9690
--- /dev/null
+++ b/litellm/caching/_internal_lru_cache.py
@@ -0,0 +1,30 @@
+from functools import lru_cache
+from typing import Callable, Optional, TypeVar
+
+T = TypeVar("T")
+
+
+def lru_cache_wrapper(
+    maxsize: Optional[int] = None,
+) -> Callable[[Callable[..., T]], Callable[..., T]]:
+    """
+    Wrapper for lru_cache that caches success and exceptions
+    """
+
+    def decorator(f: Callable[..., T]) -> Callable[..., T]:
+        @lru_cache(maxsize=maxsize)
+        def wrapper(*args, **kwargs):
+            try:
+                return ("success", f(*args, **kwargs))
+            except Exception as e:
+                return ("error", e)
+
+        def wrapped(*args, **kwargs):
+            result = wrapper(*args, **kwargs)
+            if result[0] == "error":
+                raise result[1]
+            return result[1]
+
+        return wrapped
+
+    return decorator
diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py
index 04d500c1e1..4debb62886 100644
--- a/litellm/litellm_core_utils/litellm_logging.py
+++ b/litellm/litellm_core_utils/litellm_logging.py
@@ -12,6 +12,7 @@ import time
 import traceback
 import uuid
 from datetime import datetime as dt_object
+from functools import lru_cache
 from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast
 
 from pydantic import BaseModel
@@ -835,7 +836,7 @@ class Logging(LiteLLMLoggingBaseClass):
         except Exception as e:  # error calculating cost
             debug_info = StandardLoggingModelCostFailureDebugInformation(
                 error_str=str(e),
-                traceback_str=traceback.format_exc(),
+                traceback_str=_get_traceback_str_for_error(str(e)),
                 model=response_cost_calculator_kwargs["model"],
                 cache_hit=response_cost_calculator_kwargs["cache_hit"],
                 custom_llm_provider=response_cost_calculator_kwargs[
@@ -3320,3 +3321,11 @@ def modify_integration(integration_name, integration_params):
     if integration_name == "supabase":
         if "table_name" in integration_params:
             Supabase.supabase_table_name = integration_params["table_name"]
+
+
+@lru_cache(maxsize=16)
+def _get_traceback_str_for_error(error_str: str) -> str:
+    """
+    function wrapped with lru_cache to limit the number of times `traceback.format_exc()` is called
+    """
+    return traceback.format_exc()
diff --git a/litellm/utils.py b/litellm/utils.py
index 84542789e6..a6032edf83 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -57,6 +57,7 @@ import litellm._service_logger  # for storing API inputs, outputs, and metadata
 import litellm.litellm_core_utils
 import litellm.litellm_core_utils.audio_utils.utils
 import litellm.litellm_core_utils.json_validation_rule
+from litellm.caching._internal_lru_cache import lru_cache_wrapper
 from litellm.caching.caching import DualCache
 from litellm.caching.caching_handler import CachingHandlerResponse, LLMCachingHandler
 from litellm.integrations.custom_logger import CustomLogger
@@ -4013,7 +4014,7 @@ def _get_max_position_embeddings(model_name: str) -> Optional[int]:
         return None
 
 
-@lru_cache(maxsize=16)
+@lru_cache_wrapper(maxsize=16)
 def _cached_get_model_info_helper(
     model: str, custom_llm_provider: Optional[str]
 ) -> ModelInfoBase: