fix(utils.py): support streaming cached response logging

2024-02-21 17:53:14 -08:00 · 2024-02-21 17:53:14 -08:00 · f8b233b653
commit f8b233b653
parent 0733bf1e7a
3 changed files with 114 additions and 33 deletions
--- a/litellm/tests/test_custom_callback_input.py
+++ b/litellm/tests/test_custom_callback_input.py
@ -2,7 +2,7 @@
 ## This test asserts the type of data passed into each method of the custom callback handler
 import sys, os, time, inspect, asyncio, traceback
 from datetime import datetime
-import pytest
+import pytest, uuid
 from pydantic import BaseModel

 sys.path.insert(0, os.path.abspath("../.."))
@ -795,6 +795,50 @@ async def test_async_completion_azure_caching():
    assert len(customHandler_caching.states) == 4  # pre, post, success, success


+@pytest.mark.asyncio
+async def test_async_completion_azure_caching_streaming():
+    litellm.set_verbose = True
+    customHandler_caching = CompletionCustomHandler()
+    litellm.cache = Cache(
+        type="redis",
+        host=os.environ["REDIS_HOST"],
+        port=os.environ["REDIS_PORT"],
+        password=os.environ["REDIS_PASSWORD"],
+    )
+    litellm.callbacks = [customHandler_caching]
+    unique_time = uuid.uuid4()
+    response1 = await litellm.acompletion(
+        model="azure/chatgpt-v-2",
+        messages=[
+            {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
+        ],
+        caching=True,
+        stream=True,
+    )
+    async for chunk in response1:
+        continue
+    await asyncio.sleep(1)
+    print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
+    response2 = await litellm.acompletion(
+        model="azure/chatgpt-v-2",
+        messages=[
+            {"role": "user", "content": f"Hi 👋 - i'm async azure {unique_time}"}
+        ],
+        caching=True,
+        stream=True,
+    )
+    async for chunk in response2:
+        continue
+    await asyncio.sleep(1)  # success callbacks are done in parallel
+    print(
+        f"customHandler_caching.states post-cache hit: {customHandler_caching.states}"
+    )
+    assert len(customHandler_caching.errors) == 0
+    assert (
+        len(customHandler_caching.states) > 4
+    )  # pre, post, streaming .., success, success
+
+
@pytest.mark.asyncio
 async def test_async_embedding_azure_caching():
    print("Testing custom callback input - Azure Caching")