fix(utils.py): enable streaming cache logging

2024-02-21 21:10:58 -08:00 · 2024-02-21 21:10:58 -08:00 · 2d62dee712
commit 2d62dee712
parent b011c8b93a
2 changed files with 12 additions and 7 deletions
--- a/litellm/tests/test_custom_callback_input.py
+++ b/litellm/tests/test_custom_callback_input.py
@ -797,6 +797,8 @@ async def test_async_completion_azure_caching():
@pytest.mark.asyncio
 async def test_async_completion_azure_caching_streaming():
    import copy
    litellm.set_verbose = True
    customHandler_caching = CompletionCustomHandler()
    litellm.cache = Cache(
@ -816,8 +818,9 @@ async def test_async_completion_azure_caching_streaming():
        stream=True,
    )
    async for chunk in response1:
-        continue
+        print(f"chunk in response1: {chunk}")
    await asyncio.sleep(1)
    initial_customhandler_caching_states = len(customHandler_caching.states)
    print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
    response2 = await litellm.acompletion(
        model="azure/chatgpt-v-2",
@ -828,14 +831,14 @@ async def test_async_completion_azure_caching_streaming():
        stream=True,
    )
    async for chunk in response2:
-        continue
+        print(f"chunk in response2: {chunk}")
    await asyncio.sleep(1)  # success callbacks are done in parallel
    print(
        f"customHandler_caching.states post-cache hit: {customHandler_caching.states}"
    )
    assert len(customHandler_caching.errors) == 0
    assert (
-        len(customHandler_caching.states) == 4
+        len(customHandler_caching.states) > initial_customhandler_caching_states
    )  # pre, post, streaming .., success, success
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1411,7 +1411,7 @@ class Logging:
                                print_verbose(
                                    f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
                                )
-                                return
+                                pass
                            else:
                                print_verbose(
                                    "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
@ -1616,7 +1616,7 @@ class Logging:
                            print_verbose(
                                f"async success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
                            )
-                            return
+                            pass
                        else:
                            print_verbose(
                                "async success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
@ -1625,8 +1625,10 @@ class Logging:
                            # only add to cache once we have a complete streaming response
                            litellm.cache.add_cache(result, **kwargs)
                if isinstance(callback, CustomLogger):  # custom logger class
-                    print_verbose(f"Async success callbacks: {callback}")
+                    print_verbose(
-                    if self.stream:
+                        f"Async success callbacks: {callback}; self.stream: {self.stream}; complete_streaming_response: {self.model_call_details.get('complete_streaming_response', None)}"
                    )
                    if self.stream == True:
                        if "complete_streaming_response" in self.model_call_details:
                            await callback.async_log_success_event(
                                kwargs=self.model_call_details,