diff --git a/litellm/tests/test_custom_callback_input.py b/litellm/tests/test_custom_callback_input.py index 9ea1a3bb16..5da46ffeea 100644 --- a/litellm/tests/test_custom_callback_input.py +++ b/litellm/tests/test_custom_callback_input.py @@ -797,6 +797,8 @@ async def test_async_completion_azure_caching(): @pytest.mark.asyncio async def test_async_completion_azure_caching_streaming(): + import copy + litellm.set_verbose = True customHandler_caching = CompletionCustomHandler() litellm.cache = Cache( @@ -816,8 +818,9 @@ async def test_async_completion_azure_caching_streaming(): stream=True, ) async for chunk in response1: - continue + print(f"chunk in response1: {chunk}") await asyncio.sleep(1) + initial_customhandler_caching_states = len(customHandler_caching.states) print(f"customHandler_caching.states pre-cache hit: {customHandler_caching.states}") response2 = await litellm.acompletion( model="azure/chatgpt-v-2", @@ -828,14 +831,14 @@ async def test_async_completion_azure_caching_streaming(): stream=True, ) async for chunk in response2: - continue + print(f"chunk in response2: {chunk}") await asyncio.sleep(1) # success callbacks are done in parallel print( f"customHandler_caching.states post-cache hit: {customHandler_caching.states}" ) assert len(customHandler_caching.errors) == 0 assert ( - len(customHandler_caching.states) == 4 + len(customHandler_caching.states) > initial_customhandler_caching_states ) # pre, post, streaming .., success, success diff --git a/litellm/utils.py b/litellm/utils.py index a7f8c378d1..3444c88484 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1411,7 +1411,7 @@ class Logging: print_verbose( f"success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n" ) - return + pass else: print_verbose( "success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache" @@ -1616,7 +1616,7 @@ class Logging: print_verbose( f"async success_callback: reaches cache for logging, there is no complete_streaming_response. Kwargs={kwargs}\n\n" ) - return + pass else: print_verbose( "async success_callback: reaches cache for logging, there is a complete_streaming_response. Adding to cache" @@ -1625,8 +1625,10 @@ class Logging: # only add to cache once we have a complete streaming response litellm.cache.add_cache(result, **kwargs) if isinstance(callback, CustomLogger): # custom logger class - print_verbose(f"Async success callbacks: {callback}") - if self.stream: + print_verbose( + f"Async success callbacks: {callback}; self.stream: {self.stream}; complete_streaming_response: {self.model_call_details.get('complete_streaming_response', None)}" + ) + if self.stream == True: if "complete_streaming_response" in self.model_call_details: await callback.async_log_success_event( kwargs=self.model_call_details,