test(test_caching.py): fix async tests

2024-03-15 18:09:25 -07:00 · 2024-03-15 18:09:25 -07:00 · 3072137739
commit 3072137739
parent 8a20ea795b
2 changed files with 81 additions and 95 deletions
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -150,6 +150,9 @@ class RedisCache(BaseCache):
                await redis_client.set(
                    name=key, value=json.dumps(value), ex=ttl, get=True
                )
                print_verbose(
                    f"Successfully Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}"
                )
            except Exception as e:
                # NON blocking - notify users Redis is throwing an exception
                print_verbose(
@ -216,7 +219,7 @@ class RedisCache(BaseCache):
        _redis_client = self.init_async_client()
        async with _redis_client as redis_client:
            try:
-                print_verbose(f"Get Redis Cache: key: {key}")
+                print_verbose(f"Get Async Redis Cache: key: {key}")
                cached_response = await redis_client.get(key)
                print_verbose(
                    f"Got Async Redis Cache: key: {key}, cached_response {cached_response}"
@ -225,8 +228,9 @@ class RedisCache(BaseCache):
                return response
            except Exception as e:
                # NON blocking - notify users Redis is throwing an exception
-                traceback.print_exc()
+                print_verbose(
-                logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
+                    f"LiteLLM Caching: async get() - Got exception from REDIS: {str(e)}"
                )
    async def async_get_cache_pipeline(self, key_list) -> dict:
        """
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -474,11 +474,10 @@ def test_redis_cache_completion_stream():
 # test_redis_cache_completion_stream()
-def test_redis_cache_acompletion_stream():
+@pytest.mark.asyncio
-    import asyncio
+async def test_redis_cache_acompletion_stream():
    try:
-        litellm.set_verbose = False
+        litellm.set_verbose = True
        random_word = generate_random_word()
        messages = [
            {
@ -496,8 +495,6 @@ def test_redis_cache_acompletion_stream():
        response_1_content = ""
        response_2_content = ""
        async def call1():
            nonlocal response_1_content
        response1 = await litellm.acompletion(
            model="gpt-3.5-turbo",
            messages=messages,
@ -509,12 +506,9 @@ def test_redis_cache_acompletion_stream():
            response_1_content += chunk.choices[0].delta.content or ""
        print(response_1_content)
        asyncio.run(call1())
        time.sleep(0.5)
        print("\n\n Response 1 content: ", response_1_content, "\n\n")
        async def call2():
            nonlocal response_2_content
        response2 = await litellm.acompletion(
            model="gpt-3.5-turbo",
            messages=messages,
@ -526,7 +520,6 @@ def test_redis_cache_acompletion_stream():
            response_2_content += chunk.choices[0].delta.content or ""
        print(response_2_content)
        asyncio.run(call2())
        print("\nresponse 1", response_1_content)
        print("\nresponse 2", response_2_content)
        assert (
@ -536,14 +529,15 @@ def test_redis_cache_acompletion_stream():
        litellm.success_callback = []
        litellm._async_success_callback = []
    except Exception as e:
-        print(e)
+        print(f"{str(e)}\n\n{traceback.format_exc()}")
        raise e
 # test_redis_cache_acompletion_stream()
-def test_redis_cache_acompletion_stream_bedrock():
+@pytest.mark.asyncio
 async def test_redis_cache_acompletion_stream_bedrock():
    import asyncio
    try:
@ -565,8 +559,6 @@ def test_redis_cache_acompletion_stream_bedrock():
        response_1_content = ""
        response_2_content = ""
        async def call1():
            nonlocal response_1_content
        response1 = await litellm.acompletion(
            model="bedrock/anthropic.claude-v2",
            messages=messages,
@ -579,12 +571,9 @@ def test_redis_cache_acompletion_stream_bedrock():
            response_1_content += chunk.choices[0].delta.content or ""
        print(response_1_content)
        asyncio.run(call1())
        time.sleep(0.5)
        print("\n\n Response 1 content: ", response_1_content, "\n\n")
        async def call2():
            nonlocal response_2_content
        response2 = await litellm.acompletion(
            model="bedrock/anthropic.claude-v2",
            messages=messages,
@ -597,7 +586,6 @@ def test_redis_cache_acompletion_stream_bedrock():
            response_2_content += chunk.choices[0].delta.content or ""
        print(response_2_content)
        asyncio.run(call2())
        print("\nresponse 1", response_1_content)
        print("\nresponse 2", response_2_content)
        assert (
@ -612,8 +600,8 @@ def test_redis_cache_acompletion_stream_bedrock():
        raise e
-@pytest.mark.skip(reason="AWS Suspended Account")
+@pytest.mark.asyncio
-def test_s3_cache_acompletion_stream_azure():
+async def test_s3_cache_acompletion_stream_azure():
    import asyncio
    try:
@ -637,8 +625,6 @@ def test_s3_cache_acompletion_stream_azure():
        response_1_created = ""
        response_2_created = ""
        async def call1():
            nonlocal response_1_content, response_1_created
        response1 = await litellm.acompletion(
            model="azure/chatgpt-v-2",
            messages=messages,
@ -652,12 +638,9 @@ def test_s3_cache_acompletion_stream_azure():
            response_1_content += chunk.choices[0].delta.content or ""
        print(response_1_content)
        asyncio.run(call1())
        time.sleep(0.5)
        print("\n\n Response 1 content: ", response_1_content, "\n\n")
        async def call2():
            nonlocal response_2_content, response_2_created
        response2 = await litellm.acompletion(
            model="azure/chatgpt-v-2",
            messages=messages,
@ -671,7 +654,6 @@ def test_s3_cache_acompletion_stream_azure():
            response_2_created = chunk.created
        print(response_2_content)
        asyncio.run(call2())
        print("\nresponse 1", response_1_content)
        print("\nresponse 2", response_2_content)