diff --git a/litellm/utils.py b/litellm/utils.py index c6d855fd1..4a18fa852 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -824,7 +824,7 @@ class Logging: print_verbose(f"success callbacks: {litellm.success_callback}") ## BUILD COMPLETE STREAMED RESPONSE complete_streaming_response = None - if self.model_call_details.get("litellm_params", {}).get("acompletion", False) == True: + if self.stream == True and self.model_call_details.get("litellm_params", {}).get("acompletion", False) == True: # if it's acompletion == True, chunks are built/appended in async_success_handler if result.choices[0].finish_reason is not None: # if it's the last chunk complete_streaming_response = litellm.stream_chunk_builder(self.streaming_chunks, messages=self.model_call_details.get("messages", None))