diff --git a/litellm/caching.py b/litellm/caching.py index eda443941..ed856f86f 100644 --- a/litellm/caching.py +++ b/litellm/caching.py @@ -150,6 +150,9 @@ class RedisCache(BaseCache): await redis_client.set( name=key, value=json.dumps(value), ex=ttl, get=True ) + print_verbose( + f"Successfully Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}" + ) except Exception as e: # NON blocking - notify users Redis is throwing an exception print_verbose( @@ -216,7 +219,7 @@ class RedisCache(BaseCache): _redis_client = self.init_async_client() async with _redis_client as redis_client: try: - print_verbose(f"Get Redis Cache: key: {key}") + print_verbose(f"Get Async Redis Cache: key: {key}") cached_response = await redis_client.get(key) print_verbose( f"Got Async Redis Cache: key: {key}, cached_response {cached_response}" @@ -225,8 +228,9 @@ class RedisCache(BaseCache): return response except Exception as e: # NON blocking - notify users Redis is throwing an exception - traceback.print_exc() - logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e) + print_verbose( + f"LiteLLM Caching: async get() - Got exception from REDIS: {str(e)}" + ) async def async_get_cache_pipeline(self, key_list) -> dict: """ diff --git a/litellm/tests/test_caching.py b/litellm/tests/test_caching.py index 07d39b086..aa0681c61 100644 --- a/litellm/tests/test_caching.py +++ b/litellm/tests/test_caching.py @@ -474,11 +474,10 @@ def test_redis_cache_completion_stream(): # test_redis_cache_completion_stream() -def test_redis_cache_acompletion_stream(): - import asyncio - +@pytest.mark.asyncio +async def test_redis_cache_acompletion_stream(): try: - litellm.set_verbose = False + litellm.set_verbose = True random_word = generate_random_word() messages = [ { @@ -496,37 +495,31 @@ def test_redis_cache_acompletion_stream(): response_1_content = "" response_2_content = "" - async def call1(): - nonlocal response_1_content - response1 = await litellm.acompletion( - model="gpt-3.5-turbo", - messages=messages, - max_tokens=40, - temperature=1, - stream=True, - ) - async for chunk in response1: - response_1_content += chunk.choices[0].delta.content or "" - print(response_1_content) + response1 = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + max_tokens=40, + temperature=1, + stream=True, + ) + async for chunk in response1: + response_1_content += chunk.choices[0].delta.content or "" + print(response_1_content) - asyncio.run(call1()) time.sleep(0.5) print("\n\n Response 1 content: ", response_1_content, "\n\n") - async def call2(): - nonlocal response_2_content - response2 = await litellm.acompletion( - model="gpt-3.5-turbo", - messages=messages, - max_tokens=40, - temperature=1, - stream=True, - ) - async for chunk in response2: - response_2_content += chunk.choices[0].delta.content or "" - print(response_2_content) + response2 = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=messages, + max_tokens=40, + temperature=1, + stream=True, + ) + async for chunk in response2: + response_2_content += chunk.choices[0].delta.content or "" + print(response_2_content) - asyncio.run(call2()) print("\nresponse 1", response_1_content) print("\nresponse 2", response_2_content) assert ( @@ -536,14 +529,15 @@ def test_redis_cache_acompletion_stream(): litellm.success_callback = [] litellm._async_success_callback = [] except Exception as e: - print(e) + print(f"{str(e)}\n\n{traceback.format_exc()}") raise e # test_redis_cache_acompletion_stream() -def test_redis_cache_acompletion_stream_bedrock(): +@pytest.mark.asyncio +async def test_redis_cache_acompletion_stream_bedrock(): import asyncio try: @@ -565,39 +559,33 @@ def test_redis_cache_acompletion_stream_bedrock(): response_1_content = "" response_2_content = "" - async def call1(): - nonlocal response_1_content - response1 = await litellm.acompletion( - model="bedrock/anthropic.claude-v2", - messages=messages, - max_tokens=40, - temperature=1, - stream=True, - ) - async for chunk in response1: - print(chunk) - response_1_content += chunk.choices[0].delta.content or "" - print(response_1_content) + response1 = await litellm.acompletion( + model="bedrock/anthropic.claude-v2", + messages=messages, + max_tokens=40, + temperature=1, + stream=True, + ) + async for chunk in response1: + print(chunk) + response_1_content += chunk.choices[0].delta.content or "" + print(response_1_content) - asyncio.run(call1()) time.sleep(0.5) print("\n\n Response 1 content: ", response_1_content, "\n\n") - async def call2(): - nonlocal response_2_content - response2 = await litellm.acompletion( - model="bedrock/anthropic.claude-v2", - messages=messages, - max_tokens=40, - temperature=1, - stream=True, - ) - async for chunk in response2: - print(chunk) - response_2_content += chunk.choices[0].delta.content or "" - print(response_2_content) + response2 = await litellm.acompletion( + model="bedrock/anthropic.claude-v2", + messages=messages, + max_tokens=40, + temperature=1, + stream=True, + ) + async for chunk in response2: + print(chunk) + response_2_content += chunk.choices[0].delta.content or "" + print(response_2_content) - asyncio.run(call2()) print("\nresponse 1", response_1_content) print("\nresponse 2", response_2_content) assert ( @@ -612,8 +600,8 @@ def test_redis_cache_acompletion_stream_bedrock(): raise e -@pytest.mark.skip(reason="AWS Suspended Account") -def test_s3_cache_acompletion_stream_azure(): +@pytest.mark.asyncio +async def test_s3_cache_acompletion_stream_azure(): import asyncio try: @@ -637,41 +625,35 @@ def test_s3_cache_acompletion_stream_azure(): response_1_created = "" response_2_created = "" - async def call1(): - nonlocal response_1_content, response_1_created - response1 = await litellm.acompletion( - model="azure/chatgpt-v-2", - messages=messages, - max_tokens=40, - temperature=1, - stream=True, - ) - async for chunk in response1: - print(chunk) - response_1_created = chunk.created - response_1_content += chunk.choices[0].delta.content or "" - print(response_1_content) + response1 = await litellm.acompletion( + model="azure/chatgpt-v-2", + messages=messages, + max_tokens=40, + temperature=1, + stream=True, + ) + async for chunk in response1: + print(chunk) + response_1_created = chunk.created + response_1_content += chunk.choices[0].delta.content or "" + print(response_1_content) - asyncio.run(call1()) time.sleep(0.5) print("\n\n Response 1 content: ", response_1_content, "\n\n") - async def call2(): - nonlocal response_2_content, response_2_created - response2 = await litellm.acompletion( - model="azure/chatgpt-v-2", - messages=messages, - max_tokens=40, - temperature=1, - stream=True, - ) - async for chunk in response2: - print(chunk) - response_2_content += chunk.choices[0].delta.content or "" - response_2_created = chunk.created - print(response_2_content) + response2 = await litellm.acompletion( + model="azure/chatgpt-v-2", + messages=messages, + max_tokens=40, + temperature=1, + stream=True, + ) + async for chunk in response2: + print(chunk) + response_2_content += chunk.choices[0].delta.content or "" + response_2_created = chunk.created + print(response_2_content) - asyncio.run(call2()) print("\nresponse 1", response_1_content) print("\nresponse 2", response_2_content)