diff --git a/litellm/caching/_internal_lru_cache.py b/litellm/caching/_internal_lru_cache.py new file mode 100644 index 0000000000..54b0fe9690 --- /dev/null +++ b/litellm/caching/_internal_lru_cache.py @@ -0,0 +1,30 @@ +from functools import lru_cache +from typing import Callable, Optional, TypeVar + +T = TypeVar("T") + + +def lru_cache_wrapper( + maxsize: Optional[int] = None, +) -> Callable[[Callable[..., T]], Callable[..., T]]: + """ + Wrapper for lru_cache that caches success and exceptions + """ + + def decorator(f: Callable[..., T]) -> Callable[..., T]: + @lru_cache(maxsize=maxsize) + def wrapper(*args, **kwargs): + try: + return ("success", f(*args, **kwargs)) + except Exception as e: + return ("error", e) + + def wrapped(*args, **kwargs): + result = wrapper(*args, **kwargs) + if result[0] == "error": + raise result[1] + return result[1] + + return wrapped + + return decorator diff --git a/litellm/litellm_core_utils/litellm_logging.py b/litellm/litellm_core_utils/litellm_logging.py index 04d500c1e1..4debb62886 100644 --- a/litellm/litellm_core_utils/litellm_logging.py +++ b/litellm/litellm_core_utils/litellm_logging.py @@ -12,6 +12,7 @@ import time import traceback import uuid from datetime import datetime as dt_object +from functools import lru_cache from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union, cast from pydantic import BaseModel @@ -835,7 +836,7 @@ class Logging(LiteLLMLoggingBaseClass): except Exception as e: # error calculating cost debug_info = StandardLoggingModelCostFailureDebugInformation( error_str=str(e), - traceback_str=traceback.format_exc(), + traceback_str=_get_traceback_str_for_error(str(e)), model=response_cost_calculator_kwargs["model"], cache_hit=response_cost_calculator_kwargs["cache_hit"], custom_llm_provider=response_cost_calculator_kwargs[ @@ -3320,3 +3321,11 @@ def modify_integration(integration_name, integration_params): if integration_name == "supabase": if "table_name" in integration_params: Supabase.supabase_table_name = integration_params["table_name"] + + +@lru_cache(maxsize=16) +def _get_traceback_str_for_error(error_str: str) -> str: + """ + function wrapped with lru_cache to limit the number of times `traceback.format_exc()` is called + """ + return traceback.format_exc() diff --git a/litellm/utils.py b/litellm/utils.py index 84542789e6..a6032edf83 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -57,6 +57,7 @@ import litellm._service_logger # for storing API inputs, outputs, and metadata import litellm.litellm_core_utils import litellm.litellm_core_utils.audio_utils.utils import litellm.litellm_core_utils.json_validation_rule +from litellm.caching._internal_lru_cache import lru_cache_wrapper from litellm.caching.caching import DualCache from litellm.caching.caching_handler import CachingHandlerResponse, LLMCachingHandler from litellm.integrations.custom_logger import CustomLogger @@ -4013,7 +4014,7 @@ def _get_max_position_embeddings(model_name: str) -> Optional[int]: return None -@lru_cache(maxsize=16) +@lru_cache_wrapper(maxsize=16) def _cached_get_model_info_helper( model: str, custom_llm_provider: Optional[str] ) -> ModelInfoBase: