diff --git a/litellm/caching.py b/litellm/caching.py index 9b6011bf49..880018c4c2 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -2584,18 +2584,22 @@ class Cache: verbose_logger.exception(f"LiteLLM Cache: Excepton add_cache: {str(e)}") def should_use_cache(self, *args, **kwargs): + """ + Returns true if we should use the cache for LLM API calls + + If cache is default_on then this is True + If cache is default_off then this is only true when user has opted in to use cache + """ if self.mode == CacheMode.default_on: return True - else: - # when mode == default_off -> Cache is opt in only - _cache = kwargs.get("cache", None) - verbose_logger.debug( - f"should_use_cache: kwargs: {kwargs}; _cache: {_cache}" - ) - if _cache and isinstance(_cache, dict): - if _cache.get("use-cache", True) is False: - return True - return True + + # when mode == default_off -> Cache is opt in only + _cache = kwargs.get("cache", None) + verbose_logger.debug("should_use_cache: kwargs: %s; _cache: %s", kwargs, _cache) + if _cache and isinstance(_cache, dict): + if _cache.get("use-cache", False) is True: + return True + return False async def batch_cache_write(self, result, *args, **kwargs): cache_key, cached_data, kwargs = self._add_cache_logic( diff --git a/litellm/tests/test_caching.py b/litellm/tests/test_caching.py index 402f3d51f1..34c161ba4b 100644 --- a/litellm/tests/test_caching.py +++ b/litellm/tests/test_caching.py @@ -1941,6 +1941,8 @@ async def test_cache_default_off_acompletion(): ) print(f"Response3: {response3}") + await asyncio.sleep(2) + response4 = await litellm.acompletion( model="gpt-3.5-turbo", messages=[