fix(test_caching.py): add longer delay for async test

2024-04-23 16:13:03 -07:00 · 2024-04-23 16:13:03 -07:00 · d67e47d7fd
commit d67e47d7fd
parent 161e836427
2 changed files with 55 additions and 35 deletions
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -178,11 +178,17 @@ def test_caching_with_default_ttl():
        pytest.fail(f"Error occurred: {e}")
-def test_caching_with_cache_controls():
+@pytest.mark.parametrize(
    "sync_flag",
    [True, False],
 )
@pytest.mark.asyncio
 async def test_caching_with_cache_controls(sync_flag):
    try:
        litellm.set_verbose = True
        litellm.cache = Cache()
        message = [{"role": "user", "content": f"Hey, how's it going? {uuid.uuid4()}"}]
        if sync_flag:
            ## TTL = 0
            response1 = completion(
                model="gpt-3.5-turbo", messages=messages, cache={"ttl": 0}
@ -190,11 +196,23 @@ def test_caching_with_cache_controls():
            response2 = completion(
                model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 10}
            )
-        print(f"response1: {response1}")
+
        print(f"response2: {response2}")
            assert response2["id"] != response1["id"]
        else:
            ## TTL = 0
            response1 = await litellm.acompletion(
                model="gpt-3.5-turbo", messages=messages, cache={"ttl": 0}
            )
            await asyncio.sleep(10)
            response2 = await litellm.acompletion(
                model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 10}
            )
            assert response2["id"] != response1["id"]
        message = [{"role": "user", "content": f"Hey, how's it going? {uuid.uuid4()}"}]
        ## TTL = 5
        if sync_flag:
            response1 = completion(
                model="gpt-3.5-turbo", messages=messages, cache={"ttl": 5}
            )
@ -204,6 +222,17 @@ def test_caching_with_cache_controls():
            print(f"response1: {response1}")
            print(f"response2: {response2}")
            assert response2["id"] == response1["id"]
        else:
            response1 = await litellm.acompletion(
                model="gpt-3.5-turbo", messages=messages, cache={"ttl": 25}
            )
            await asyncio.sleep(10)
            response2 = await litellm.acompletion(
                model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 25}
            )
            print(f"response1: {response1}")
            print(f"response2: {response2}")
            assert response2["id"] == response1["id"]
    except Exception as e:
        print(f"error occurred: {traceback.format_exc()}")
        pytest.fail(f"Error occurred: {e}")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -2716,7 +2716,7 @@ def client(original_function):
            # [OPTIONAL] CHECK CACHE
            print_verbose(
-                f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}"
+                f"SYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache')['no-cache']: {kwargs.get('cache', {}).get('no-cache', False)}"
            )
            # if caching is false or cache["no-cache"]==True, don't run this
            if (
@ -2724,17 +2724,14 @@ def client(original_function):
                    (
                        (
                            kwargs.get("caching", None) is None
                            and kwargs.get("cache", None) is None
                            and litellm.cache is not None
                        )
                        or kwargs.get("caching", False) == True
                    )
-                    and (
+                    and kwargs.get("cache", {}).get("no-cache", False) != True
                        kwargs.get("cache", None) is None
                        or kwargs["cache"].get("no-cache", False) != True
                    )
                )
                and kwargs.get("aembedding", False) != True
                and kwargs.get("atext_completion", False) != True
                and kwargs.get("acompletion", False) != True
                and kwargs.get("aimg_generation", False) != True
                and kwargs.get("atranscription", False) != True
@ -3014,21 +3011,16 @@ def client(original_function):
            # [OPTIONAL] CHECK CACHE
            print_verbose(
-                f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}"
+                f"ASYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}"
            )
            # if caching is false, don't run this
            final_embedding_cached_response = None
            if (
-                (
+                (kwargs.get("caching", None) is None and litellm.cache is not None)
                    kwargs.get("caching", None) is None
                    and kwargs.get("cache", None) is None
                    and litellm.cache is not None
                )
                or kwargs.get("caching", False) == True
            ) and (
-                kwargs.get("cache", None) is None
+                kwargs.get("cache", {}).get("no-cache", False) != True
                or kwargs["cache"].get("no-cache", False) != True
            ):  # allow users to control returning cached responses from the completion function
                # checking cache
                print_verbose("INSIDE CHECKING CACHE")
@ -3074,7 +3066,6 @@ def client(original_function):
                            preset_cache_key  # for streaming calls, we need to pass the preset_cache_key
                        )
                        cached_result = litellm.cache.get_cache(*args, **kwargs)
                    if cached_result is not None and not isinstance(
                        cached_result, list
                    ):