diff --git a/litellm/tests/test_custom_callback_input.py b/litellm/tests/test_custom_callback_input.py
index 9ea1a3bb16..5da46ffeea 100644
--- a/litellm/tests/test_custom_callback_input.py
+++ b/litellm/tests/test_custom_callback_input.py
@@ -797,6 +797,8 @@ async def test_async_completion_azure_caching():
 
 @pytest.mark.asyncio
 async def test_async_completion_azure_caching_streaming():
+    import copy
+
     litellm.set_verbose = True
     customHandler_caching = CompletionCustomHandler()
     litellm.cache = Cache(
@@ -816,8 +818,9 @@ async def test_async_completion_azure_caching_streaming():
         stream=True,
     )
     async for chunk in response1:
-        continue
+        print(f"chunk in response1: {chunk}")
     await asyncio.sleep(1)
+    initial_customhandler_caching_states = len(customHandler_caching.states)
     print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}")
     response2 = await litellm.acompletion(
         model="azure/chatgpt-v-2",
@@ -828,14 +831,14 @@ async def test_async_completion_azure_caching_streaming():
         stream=True,
     )
     async for chunk in response2:
-        continue
+        print(f"chunk in response2: {chunk}")
     await asyncio.sleep(1)  # success callbacks are done in parallel
     print(
         f"customHandler_caching.states post-cache hit: {customHandler_caching.states}"
     )
     assert len(customHandler_caching.errors) == 0
     assert (
-        len(customHandler_caching.states) == 4
+        len(customHandler_caching.states) > initial_customhandler_caching_states
     )  # pre, post, streaming .., success, success
 
 
diff --git a/litellm/utils.py b/litellm/utils.py
index a7f8c378d1..3444c88484 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1411,7 +1411,7 @@ class Logging:
                                 print_verbose(
                                     f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
                                 )
-                                return
+                                pass
                             else:
                                 print_verbose(
                                     "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
@@ -1616,7 +1616,7 @@ class Logging:
                             print_verbose(
                                 f"async success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n"
                             )
-                            return
+                            pass
                         else:
                             print_verbose(
                                 "async success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache"
@@ -1625,8 +1625,10 @@ class Logging:
                             # only add to cache once we have a complete streaming response
                             litellm.cache.add_cache(result, **kwargs)
                 if isinstance(callback, CustomLogger):  # custom logger class
-                    print_verbose(f"Async success callbacks: {callback}")
-                    if self.stream:
+                    print_verbose(
+                        f"Async success callbacks: {callback}; self.stream: {self.stream}; complete_streaming_response: {self.model_call_details.get('complete_streaming_response', None)}"
+                    )
+                    if self.stream == True:
                         if "complete_streaming_response" in self.model_call_details:
                             await callback.async_log_success_event(
                                 kwargs=self.model_call_details,