From d67e47d7fd27a78c124d727d372ab9376c1b21ad Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Tue, 23 Apr 2024 16:13:03 -0700 Subject: [PATCH] fix(test_caching.py): add longer delay for async test --- litellm/tests/test_caching.py | 69 +++++++++++++++++++++++++---------- litellm/utils.py | 21 +++-------- 2 files changed, 55 insertions(+), 35 deletions(-) diff --git a/litellm/tests/test_caching.py b/litellm/tests/test_caching.py index 13847574d..903ce69c7 100644 --- a/litellm/tests/test_caching.py +++ b/litellm/tests/test_caching.py @@ -178,32 +178,61 @@ def test_caching_with_default_ttl(): pytest.fail(f"Error occurred: {e}") -def test_caching_with_cache_controls(): +@pytest.mark.parametrize( + "sync_flag", + [True, False], +) +@pytest.mark.asyncio +async def test_caching_with_cache_controls(sync_flag): try: litellm.set_verbose = True litellm.cache = Cache() message = [{"role": "user", "content": f"Hey, how's it going? {uuid.uuid4()}"}] - ## TTL = 0 - response1 = completion( - model="gpt-3.5-turbo", messages=messages, cache={"ttl": 0} - ) - response2 = completion( - model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 10} - ) - print(f"response1: {response1}") - print(f"response2: {response2}") - assert response2["id"] != response1["id"] + if sync_flag: + ## TTL = 0 + response1 = completion( + model="gpt-3.5-turbo", messages=messages, cache={"ttl": 0} + ) + response2 = completion( + model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 10} + ) + + assert response2["id"] != response1["id"] + else: + ## TTL = 0 + response1 = await litellm.acompletion( + model="gpt-3.5-turbo", messages=messages, cache={"ttl": 0} + ) + await asyncio.sleep(10) + response2 = await litellm.acompletion( + model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 10} + ) + + assert response2["id"] != response1["id"] + message = [{"role": "user", "content": f"Hey, how's it going? {uuid.uuid4()}"}] ## TTL = 5 - response1 = completion( - model="gpt-3.5-turbo", messages=messages, cache={"ttl": 5} - ) - response2 = completion( - model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 5} - ) - print(f"response1: {response1}") - print(f"response2: {response2}") - assert response2["id"] == response1["id"] + if sync_flag: + response1 = completion( + model="gpt-3.5-turbo", messages=messages, cache={"ttl": 5} + ) + response2 = completion( + model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 5} + ) + print(f"response1: {response1}") + print(f"response2: {response2}") + assert response2["id"] == response1["id"] + else: + response1 = await litellm.acompletion( + model="gpt-3.5-turbo", messages=messages, cache={"ttl": 25} + ) + await asyncio.sleep(10) + response2 = await litellm.acompletion( + model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 25} + ) + print(f"response1: {response1}") + print(f"response2: {response2}") + assert response2["id"] == response1["id"] except Exception as e: print(f"error occurred: {traceback.format_exc()}") pytest.fail(f"Error occurred: {e}") diff --git a/litellm/utils.py b/litellm/utils.py index 2547b7d34..4ae229231 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2716,7 +2716,7 @@ def client(original_function): # [OPTIONAL] CHECK CACHE print_verbose( - f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}" + f"SYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache')['no-cache']: {kwargs.get('cache', {}).get('no-cache', False)}" ) # if caching is false or cache["no-cache"]==True, don't run this if ( @@ -2724,17 +2724,14 @@ def client(original_function): ( ( kwargs.get("caching", None) is None - and kwargs.get("cache", None) is None and litellm.cache is not None ) or kwargs.get("caching", False) == True ) - and ( - kwargs.get("cache", None) is None - or kwargs["cache"].get("no-cache", False) != True - ) + and kwargs.get("cache", {}).get("no-cache", False) != True ) and kwargs.get("aembedding", False) != True + and kwargs.get("atext_completion", False) != True and kwargs.get("acompletion", False) != True and kwargs.get("aimg_generation", False) != True and kwargs.get("atranscription", False) != True @@ -3014,21 +3011,16 @@ def client(original_function): # [OPTIONAL] CHECK CACHE print_verbose( - f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}" + f"ASYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}" ) # if caching is false, don't run this final_embedding_cached_response = None if ( - ( - kwargs.get("caching", None) is None - and kwargs.get("cache", None) is None - and litellm.cache is not None - ) + (kwargs.get("caching", None) is None and litellm.cache is not None) or kwargs.get("caching", False) == True ) and ( - kwargs.get("cache", None) is None - or kwargs["cache"].get("no-cache", False) != True + kwargs.get("cache", {}).get("no-cache", False) != True ): # allow users to control returning cached responses from the completion function # checking cache print_verbose("INSIDE CHECKING CACHE") @@ -3074,7 +3066,6 @@ def client(original_function): preset_cache_key # for streaming calls, we need to pass the preset_cache_key ) cached_result = litellm.cache.get_cache(*args, **kwargs) - if cached_result is not None and not isinstance( cached_result, list ):