diff --git a/litellm/__init__.py b/litellm/__init__.py index 363cb6205..ef2a5b3cf 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -30,9 +30,9 @@ togetherai_api_key: Optional[str] = None baseten_key: Optional[str] = None use_client = False logging = True -caching = False -cache: Optional[Cache] = None # set to litellm.caching Cache() object -caching_with_models = False # if you want the caching key to be model + prompt +caching = False # deprecated son +caching_with_models = False # if you want the caching key to be model + prompt # deprecated soon +cache: Optional[Cache] = None # cache object model_alias_map: Dict[str, str] = {} model_cost = { "babbage-002": { diff --git a/litellm/caching.py b/litellm/caching.py index 3cab7d460..d6a7ddacd 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -75,7 +75,6 @@ class Cache(): if cache_key is not None: self.cache.set_cache(cache_key, result) except: - pass @@ -83,6 +82,3 @@ class Cache(): - - - diff --git a/litellm/tests/test_caching.py b/litellm/tests/test_caching.py index 1845f3b47..7a6ca0882 100644 --- a/litellm/tests/test_caching.py +++ b/litellm/tests/test_caching.py @@ -29,13 +29,11 @@ def test_caching(): if response2 != response1: print(f"response1: {response1}") print(f"response2: {response2}") - pytest.fail(f"Error occurred: {e}") + pytest.fail(f"Error occurred: responses are not equal") except Exception as e: litellm.caching = False - print(f"error occurred: {traceback.format_exc()}") pytest.fail(f"Error occurred: {e}") - def test_caching_with_models(): litellm.caching_with_models = True response1 = completion(model="gpt-3.5-turbo", messages=messages) diff --git a/litellm/utils.py b/litellm/utils.py index 8e9b1dd0e..bc9789125 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -26,6 +26,7 @@ from .exceptions import ( OpenAIError, ) from typing import List, Dict, Union, Optional +from .caching import Cache ####### ENVIRONMENT VARIABLES #################### @@ -402,11 +403,16 @@ def client(original_function): kwargs["litellm_call_id"] = litellm_call_id start_time = datetime.datetime.now() # [OPTIONAL] CHECK CACHE - if (litellm.caching or litellm.caching_with_models or litellm.cache != None) and ( - cached_result := litellm.cache.get_cache(*args, **kwargs) - ) is not None: - result = cached_result - return result + # remove this after deprecating litellm.caching + if (litellm.caching or litellm.caching_with_models) and litellm.cache is None: + litellm.cache = Cache() + + # checking cache + if (litellm.cache != None or litellm.caching or litellm.caching_with_models): + cached_result = litellm.cache.get_cache(*args, **kwargs) + if cached_result != None: + return cached_result + # MODEL CALL result = original_function(*args, **kwargs) if "stream" in kwargs and kwargs["stream"] == True: