diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py
index 721ad2bc72..485b64b90c 100644
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@@ -58,7 +58,7 @@ class LangFuseLogger:
                 model=kwargs['model'],
                 modelParameters=optional_params,
                 prompt=prompt,
-                completion=response_obj['choices'][0]['message'],
+                completion=response_obj['choices'][0]['message'].json(),
                 usage=Usage(
                     prompt_tokens=response_obj['usage']['prompt_tokens'],
                     completion_tokens=response_obj['usage']['completion_tokens']
diff --git a/litellm/utils.py b/litellm/utils.py
index 900fc45edc..173abe8ab8 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -178,6 +178,14 @@ class Message(OpenAIObject):
         # Allow dictionary-style assignment of attributes
         setattr(self, key, value)
 
+    def json(self, **kwargs):
+        try:
+            return self.model_dump() # noqa
+        except:
+            # if using pydantic v1
+            return self.dict()
+
+
 class Delta(OpenAIObject):
     def __init__(self, content=None, role=None, **params):
         super(Delta, self).__init__(**params)
@@ -817,16 +825,17 @@ class Logging:
             )
         # print(f"original response in success handler: {self.model_call_details['original_response']}")
         try:
-            print_verbose(f"success callbacks: {litellm.success_callback}")        
+            print_verbose(f"success callbacks: {litellm.success_callback}")          
             ## BUILD COMPLETE STREAMED RESPONSE
             complete_streaming_response = None
             if self.stream == True and self.model_call_details.get("litellm_params", {}).get("acompletion", False) == True:
                 # if it's acompletion == True, chunks are built/appended in async_success_handler
+                self.streaming_chunks.append(result)
                 if result.choices[0].finish_reason is not None: # if it's the last chunk
                     complete_streaming_response = litellm.stream_chunk_builder(self.streaming_chunks, messages=self.model_call_details.get("messages", None))
             else:
                 # this is a completion() call
-                if self.stream: 
+                if self.stream == True: 
                     print_verbose("success callback - assembling complete streaming response")
                     if result.choices[0].finish_reason is not None: # if it's the last chunk
                         print_verbose(f"success callback - Got the very Last chunk. Assembling {self.streaming_chunks}")
@@ -5766,14 +5775,13 @@ class CustomStreamWrapper:
                     if processed_chunk is None: 
                         continue
                     ## LOGGING
+                    threading.Thread(target=self.logging_obj.success_handler, args=(processed_chunk,)).start() # log response
                     asyncio.create_task(self.logging_obj.async_success_handler(processed_chunk,))
                     return processed_chunk
                 raise StopAsyncIteration
             else: # temporary patch for non-aiohttp async calls
                 # example - boto3 bedrock llms
-                print_verbose(f"ENTERS __NEXT__ LOOP")
                 processed_chunk = next(self)
-                print_verbose(f"PROCESSED CHUNK IN __ANEXT__: {processed_chunk}")
                 asyncio.create_task(self.logging_obj.async_success_handler(processed_chunk,))
                 return processed_chunk
         except StopAsyncIteration: