test(test_caching.py): fix async tests

2024-03-15 18:09:25 -07:00 · 2024-03-15 18:09:25 -07:00 · 3072137739
commit 3072137739
parent 8a20ea795b
2 changed files with 81 additions and 95 deletions
--- a/litellm/caching.py
+++ b/litellm/caching.py
@ -150,6 +150,9 @@ class RedisCache(BaseCache):
                await redis_client.set(
                    name=key, value=json.dumps(value), ex=ttl, get=True
                )
                print_verbose(
                    f"Successfully Set ASYNC Redis Cache: key: {key}\nValue {value}\nttl={ttl}"
                )
            except Exception as e:
                # NON blocking - notify users Redis is throwing an exception
                print_verbose(
@ -216,7 +219,7 @@ class RedisCache(BaseCache):
        _redis_client = self.init_async_client()
        async with _redis_client as redis_client:
            try:
-                print_verbose(f"Get Redis Cache: key: {key}")
+                print_verbose(f"Get Async Redis Cache: key: {key}")
                cached_response = await redis_client.get(key)
                print_verbose(
                    f"Got Async Redis Cache: key: {key}, cached_response {cached_response}"
@ -225,8 +228,9 @@ class RedisCache(BaseCache):
                return response
            except Exception as e:
                # NON blocking - notify users Redis is throwing an exception
-                traceback.print_exc()
+                print_verbose(
-                logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
+                    f"LiteLLM Caching: async get() - Got exception from REDIS: {str(e)}"
                )
    async def async_get_cache_pipeline(self, key_list) -> dict:
        """
--- a/litellm/tests/test_caching.py
+++ b/litellm/tests/test_caching.py
@ -474,11 +474,10 @@ def test_redis_cache_completion_stream():
 # test_redis_cache_completion_stream()
-def test_redis_cache_acompletion_stream():
+@pytest.mark.asyncio
-    import asyncio
+async def test_redis_cache_acompletion_stream():
    try:
-        litellm.set_verbose = False
+        litellm.set_verbose = True
        random_word = generate_random_word()
        messages = [
            {
@ -496,37 +495,31 @@ def test_redis_cache_acompletion_stream():
        response_1_content = ""
        response_2_content = ""
-        async def call1():
+        response1 = await litellm.acompletion(
-            nonlocal response_1_content
+            model="gpt-3.5-turbo",
-            response1 = await litellm.acompletion(
+            messages=messages,
-                model="gpt-3.5-turbo",
+            max_tokens=40,
-                messages=messages,
+            temperature=1,
-                max_tokens=40,
+            stream=True,
-                temperature=1,
+        )
-                stream=True,
+        async for chunk in response1:
-            )
+            response_1_content += chunk.choices[0].delta.content or ""
-            async for chunk in response1:
+        print(response_1_content)
                response_1_content += chunk.choices[0].delta.content or ""
            print(response_1_content)
        asyncio.run(call1())
        time.sleep(0.5)
        print("\n\n Response 1 content: ", response_1_content, "\n\n")
-        async def call2():
+        response2 = await litellm.acompletion(
-            nonlocal response_2_content
+            model="gpt-3.5-turbo",
-            response2 = await litellm.acompletion(
+            messages=messages,
-                model="gpt-3.5-turbo",
+            max_tokens=40,
-                messages=messages,
+            temperature=1,
-                max_tokens=40,
+            stream=True,
-                temperature=1,
+        )
-                stream=True,
+        async for chunk in response2:
-            )
+            response_2_content += chunk.choices[0].delta.content or ""
-            async for chunk in response2:
+        print(response_2_content)
                response_2_content += chunk.choices[0].delta.content or ""
            print(response_2_content)
        asyncio.run(call2())
        print("\nresponse 1", response_1_content)
        print("\nresponse 2", response_2_content)
        assert (
@ -536,14 +529,15 @@ def test_redis_cache_acompletion_stream():
        litellm.success_callback = []
        litellm._async_success_callback = []
    except Exception as e:
-        print(e)
+        print(f"{str(e)}\n\n{traceback.format_exc()}")
        raise e
 # test_redis_cache_acompletion_stream()
-def test_redis_cache_acompletion_stream_bedrock():
+@pytest.mark.asyncio
 async def test_redis_cache_acompletion_stream_bedrock():
    import asyncio
    try:
@ -565,39 +559,33 @@ def test_redis_cache_acompletion_stream_bedrock():
        response_1_content = ""
        response_2_content = ""
-        async def call1():
+        response1 = await litellm.acompletion(
-            nonlocal response_1_content
+            model="bedrock/anthropic.claude-v2",
-            response1 = await litellm.acompletion(
+            messages=messages,
-                model="bedrock/anthropic.claude-v2",
+            max_tokens=40,
-                messages=messages,
+            temperature=1,
-                max_tokens=40,
+            stream=True,
-                temperature=1,
+        )
-                stream=True,
+        async for chunk in response1:
-            )
+            print(chunk)
-            async for chunk in response1:
+            response_1_content += chunk.choices[0].delta.content or ""
-                print(chunk)
+        print(response_1_content)
                response_1_content += chunk.choices[0].delta.content or ""
            print(response_1_content)
        asyncio.run(call1())
        time.sleep(0.5)
        print("\n\n Response 1 content: ", response_1_content, "\n\n")
-        async def call2():
+        response2 = await litellm.acompletion(
-            nonlocal response_2_content
+            model="bedrock/anthropic.claude-v2",
-            response2 = await litellm.acompletion(
+            messages=messages,
-                model="bedrock/anthropic.claude-v2",
+            max_tokens=40,
-                messages=messages,
+            temperature=1,
-                max_tokens=40,
+            stream=True,
-                temperature=1,
+        )
-                stream=True,
+        async for chunk in response2:
-            )
+            print(chunk)
-            async for chunk in response2:
+            response_2_content += chunk.choices[0].delta.content or ""
-                print(chunk)
+        print(response_2_content)
                response_2_content += chunk.choices[0].delta.content or ""
            print(response_2_content)
        asyncio.run(call2())
        print("\nresponse 1", response_1_content)
        print("\nresponse 2", response_2_content)
        assert (
@ -612,8 +600,8 @@ def test_redis_cache_acompletion_stream_bedrock():
        raise e
-@pytest.mark.skip(reason="AWS Suspended Account")
+@pytest.mark.asyncio
-def test_s3_cache_acompletion_stream_azure():
+async def test_s3_cache_acompletion_stream_azure():
    import asyncio
    try:
@ -637,41 +625,35 @@ def test_s3_cache_acompletion_stream_azure():
        response_1_created = ""
        response_2_created = ""
-        async def call1():
+        response1 = await litellm.acompletion(
-            nonlocal response_1_content, response_1_created
+            model="azure/chatgpt-v-2",
-            response1 = await litellm.acompletion(
+            messages=messages,
-                model="azure/chatgpt-v-2",
+            max_tokens=40,
-                messages=messages,
+            temperature=1,
-                max_tokens=40,
+            stream=True,
-                temperature=1,
+        )
-                stream=True,
+        async for chunk in response1:
-            )
+            print(chunk)
-            async for chunk in response1:
+            response_1_created = chunk.created
-                print(chunk)
+            response_1_content += chunk.choices[0].delta.content or ""
-                response_1_created = chunk.created
+        print(response_1_content)
                response_1_content += chunk.choices[0].delta.content or ""
            print(response_1_content)
        asyncio.run(call1())
        time.sleep(0.5)
        print("\n\n Response 1 content: ", response_1_content, "\n\n")
-        async def call2():
+        response2 = await litellm.acompletion(
-            nonlocal response_2_content, response_2_created
+            model="azure/chatgpt-v-2",
-            response2 = await litellm.acompletion(
+            messages=messages,
-                model="azure/chatgpt-v-2",
+            max_tokens=40,
-                messages=messages,
+            temperature=1,
-                max_tokens=40,
+            stream=True,
-                temperature=1,
+        )
-                stream=True,
+        async for chunk in response2:
-            )
+            print(chunk)
-            async for chunk in response2:
+            response_2_content += chunk.choices[0].delta.content or ""
-                print(chunk)
+            response_2_created = chunk.created
-                response_2_content += chunk.choices[0].delta.content or ""
+        print(response_2_content)
                response_2_created = chunk.created
            print(response_2_content)
        asyncio.run(call2())
        print("\nresponse 1", response_1_content)
        print("\nresponse 2", response_2_content)