fix(test_caching.py): add longer delay for async test

This commit is contained in:
Krrish Dholakia 2024-04-23 16:13:03 -07:00
parent 161e836427
commit d67e47d7fd
2 changed files with 55 additions and 35 deletions

View file

@ -178,11 +178,17 @@ def test_caching_with_default_ttl():
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
def test_caching_with_cache_controls(): @pytest.mark.parametrize(
"sync_flag",
[True, False],
)
@pytest.mark.asyncio
async def test_caching_with_cache_controls(sync_flag):
try: try:
litellm.set_verbose = True litellm.set_verbose = True
litellm.cache = Cache() litellm.cache = Cache()
message = [{"role": "user", "content": f"Hey, how's it going? {uuid.uuid4()}"}] message = [{"role": "user", "content": f"Hey, how's it going? {uuid.uuid4()}"}]
if sync_flag:
## TTL = 0 ## TTL = 0
response1 = completion( response1 = completion(
model="gpt-3.5-turbo", messages=messages, cache={"ttl": 0} model="gpt-3.5-turbo", messages=messages, cache={"ttl": 0}
@ -190,11 +196,23 @@ def test_caching_with_cache_controls():
response2 = completion( response2 = completion(
model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 10} model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 10}
) )
print(f"response1: {response1}")
print(f"response2: {response2}")
assert response2["id"] != response1["id"] assert response2["id"] != response1["id"]
else:
## TTL = 0
response1 = await litellm.acompletion(
model="gpt-3.5-turbo", messages=messages, cache={"ttl": 0}
)
await asyncio.sleep(10)
response2 = await litellm.acompletion(
model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 10}
)
assert response2["id"] != response1["id"]
message = [{"role": "user", "content": f"Hey, how's it going? {uuid.uuid4()}"}] message = [{"role": "user", "content": f"Hey, how's it going? {uuid.uuid4()}"}]
## TTL = 5 ## TTL = 5
if sync_flag:
response1 = completion( response1 = completion(
model="gpt-3.5-turbo", messages=messages, cache={"ttl": 5} model="gpt-3.5-turbo", messages=messages, cache={"ttl": 5}
) )
@ -204,6 +222,17 @@ def test_caching_with_cache_controls():
print(f"response1: {response1}") print(f"response1: {response1}")
print(f"response2: {response2}") print(f"response2: {response2}")
assert response2["id"] == response1["id"] assert response2["id"] == response1["id"]
else:
response1 = await litellm.acompletion(
model="gpt-3.5-turbo", messages=messages, cache={"ttl": 25}
)
await asyncio.sleep(10)
response2 = await litellm.acompletion(
model="gpt-3.5-turbo", messages=messages, cache={"s-maxage": 25}
)
print(f"response1: {response1}")
print(f"response2: {response2}")
assert response2["id"] == response1["id"]
except Exception as e: except Exception as e:
print(f"error occurred: {traceback.format_exc()}") print(f"error occurred: {traceback.format_exc()}")
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")

View file

@ -2716,7 +2716,7 @@ def client(original_function):
# [OPTIONAL] CHECK CACHE # [OPTIONAL] CHECK CACHE
print_verbose( print_verbose(
f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}" f"SYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache')['no-cache']: {kwargs.get('cache', {}).get('no-cache', False)}"
) )
# if caching is false or cache["no-cache"]==True, don't run this # if caching is false or cache["no-cache"]==True, don't run this
if ( if (
@ -2724,17 +2724,14 @@ def client(original_function):
( (
( (
kwargs.get("caching", None) is None kwargs.get("caching", None) is None
and kwargs.get("cache", None) is None
and litellm.cache is not None and litellm.cache is not None
) )
or kwargs.get("caching", False) == True or kwargs.get("caching", False) == True
) )
and ( and kwargs.get("cache", {}).get("no-cache", False) != True
kwargs.get("cache", None) is None
or kwargs["cache"].get("no-cache", False) != True
)
) )
and kwargs.get("aembedding", False) != True and kwargs.get("aembedding", False) != True
and kwargs.get("atext_completion", False) != True
and kwargs.get("acompletion", False) != True and kwargs.get("acompletion", False) != True
and kwargs.get("aimg_generation", False) != True and kwargs.get("aimg_generation", False) != True
and kwargs.get("atranscription", False) != True and kwargs.get("atranscription", False) != True
@ -3014,21 +3011,16 @@ def client(original_function):
# [OPTIONAL] CHECK CACHE # [OPTIONAL] CHECK CACHE
print_verbose( print_verbose(
f"kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}" f"ASYNC kwargs[caching]: {kwargs.get('caching', False)}; litellm.cache: {litellm.cache}; kwargs.get('cache'): {kwargs.get('cache', None)}"
) )
# if caching is false, don't run this # if caching is false, don't run this
final_embedding_cached_response = None final_embedding_cached_response = None
if ( if (
( (kwargs.get("caching", None) is None and litellm.cache is not None)
kwargs.get("caching", None) is None
and kwargs.get("cache", None) is None
and litellm.cache is not None
)
or kwargs.get("caching", False) == True or kwargs.get("caching", False) == True
) and ( ) and (
kwargs.get("cache", None) is None kwargs.get("cache", {}).get("no-cache", False) != True
or kwargs["cache"].get("no-cache", False) != True
): # allow users to control returning cached responses from the completion function ): # allow users to control returning cached responses from the completion function
# checking cache # checking cache
print_verbose("INSIDE CHECKING CACHE") print_verbose("INSIDE CHECKING CACHE")
@ -3074,7 +3066,6 @@ def client(original_function):
preset_cache_key # for streaming calls, we need to pass the preset_cache_key preset_cache_key # for streaming calls, we need to pass the preset_cache_key
) )
cached_result = litellm.cache.get_cache(*args, **kwargs) cached_result = litellm.cache.get_cache(*args, **kwargs)
if cached_result is not None and not isinstance( if cached_result is not None and not isinstance(
cached_result, list cached_result, list
): ):