diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
index bff4121c2c..56dc8017c9 100644
Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 61460c06ad..5cde9036ab 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 70960990d9..fb4eca6d43 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/main.py b/litellm/main.py
index 759d45c17a..7d6844ddac 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -1478,12 +1478,13 @@ def config_completion(**kwargs):
         )
 
 def stream_chunk_builder(chunks: list):
+    print(f"chunk 0: {chunks[0]}")
     id = chunks[0]["id"]
     object = chunks[0]["object"]
     created = chunks[0]["created"]
     model = chunks[0]["model"]
     role = chunks[0]["choices"][0]["delta"]["role"]
-    finnish_reason = chunks[-1]["choices"][0]["finish_reason"]
+    finish_reason = chunks[-1]["choices"][0]["finish_reason"]
     
     # Initialize the response dictionary
     response = {
@@ -1498,7 +1499,7 @@ def stream_chunk_builder(chunks: list):
                     "role": role,
                     "content": ""
                 },
-                "finish_reason": finnish_reason,
+                "finish_reason": finish_reason,
             }
         ],
         # "usage": {
diff --git a/litellm/tests/test_logging.py b/litellm/tests/test_logging.py
index 5cbbb22213..e68c90b357 100644
--- a/litellm/tests/test_logging.py
+++ b/litellm/tests/test_logging.py
@@ -41,118 +41,163 @@ messages = [{"content": user_message, "role": "user"}]
 # 1. On Call Success
 # normal completion 
 ## test on openai completion call
-try:
-    # Redirect stdout
-    old_stdout = sys.stdout
-    sys.stdout = new_stdout = io.StringIO()
+def test_logging_success_completion():
+    global score
+    try:
+        # Redirect stdout
+        old_stdout = sys.stdout
+        sys.stdout = new_stdout = io.StringIO()
 
-    response = completion(model="gpt-3.5-turbo", messages=messages)
-    # Restore stdout
-    sys.stdout = old_stdout
-    output = new_stdout.getvalue().strip()
+        response = completion(model="gpt-3.5-turbo", messages=messages)
+        # Restore stdout
+        sys.stdout = old_stdout
+        output = new_stdout.getvalue().strip()
 
-    if "Logging Details Pre-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details Post-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details LiteLLM-Success Call" not in output:
-        raise Exception("Required log message not found!")
-    score += 1
-except Exception as e:
-    pytest.fail(f"Error occurred: {e}")
-    pass
+        if "Logging Details Pre-API Call" not in output:
+            raise Exception("Required log message not found!")
+        elif "Logging Details Post-API Call" not in output:
+            raise Exception("Required log message not found!")
+        elif "Logging Details LiteLLM-Success Call" not in output:
+            raise Exception("Required log message not found!")
+        score += 1
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+        pass
 
 ## test on non-openai completion call
-try:
-    # Redirect stdout
-    old_stdout = sys.stdout
-    sys.stdout = new_stdout = io.StringIO()
+def test_logging_success_completion_non_openai():
+    global score
+    try:
+        # Redirect stdout
+        old_stdout = sys.stdout
+        sys.stdout = new_stdout = io.StringIO()
 
-    response = completion(model="claude-instant-1", messages=messages)
-    
-    # Restore stdout
-    sys.stdout = old_stdout
-    output = new_stdout.getvalue().strip()
+        response = completion(model="claude-instant-1", messages=messages)
+        
+        # Restore stdout
+        sys.stdout = old_stdout
+        output = new_stdout.getvalue().strip()
 
-    if "Logging Details Pre-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details Post-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details LiteLLM-Success Call" not in output:
-        raise Exception("Required log message not found!")
-    score += 1
-except Exception as e:
-    pytest.fail(f"Error occurred: {e}")
-    pass
+        if "Logging Details Pre-API Call" not in output:
+            raise Exception("Required log message not found!")
+        elif "Logging Details Post-API Call" not in output:
+            raise Exception("Required log message not found!")
+        elif "Logging Details LiteLLM-Success Call" not in output:
+            raise Exception("Required log message not found!")
+        score += 1
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+        pass
 
 # streaming completion
 ## test on openai completion call
-try:
-    # Redirect stdout
-    old_stdout = sys.stdout
-    sys.stdout = new_stdout = io.StringIO()
+def test_logging_success_streaming_openai():
+    global score
+    try:
+        # litellm.set_verbose = False
+        def custom_callback(
+            kwargs,                 # kwargs to completion
+            completion_response,    # response from completion
+            start_time, end_time    # start/end time
+        ):
+            if "complete_streaming_response" in kwargs: 
+                print(f"Complete Streaming Response: {kwargs['complete_streaming_response']}")
+        
+        # Assign the custom callback function
+        litellm.success_callback = [custom_callback]
 
-    response = completion(model="gpt-3.5-turbo", messages=messages)
+        # Redirect stdout
+        old_stdout = sys.stdout
+        sys.stdout = new_stdout = io.StringIO()
 
-    # Restore stdout
-    sys.stdout = old_stdout
-    output = new_stdout.getvalue().strip()
+        response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
+        for chunk in response: 
+            pass
 
-    if "Logging Details Pre-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details Post-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details LiteLLM-Success Call" not in output:
-        raise Exception("Required log message not found!")
-    score += 1
-except Exception as e:
-    pytest.fail(f"Error occurred: {e}")
-    pass
+        # Restore stdout
+        sys.stdout = old_stdout
+        output = new_stdout.getvalue().strip()
+
+        if "Logging Details Pre-API Call" not in output:
+            raise Exception("Required log message not found!")
+        elif "Logging Details Post-API Call" not in output:
+            raise Exception("Required log message not found!")
+        elif "Logging Details LiteLLM-Success Call" not in output:
+            raise Exception("Required log message not found!")
+        elif "Complete Streaming Response:" not in output:
+            raise Exception("Required log message not found!")
+        score += 1
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+        pass
+
+# test_logging_success_streaming_openai() 
 
 ## test on non-openai completion call
-try:
-    # Redirect stdout
-    old_stdout = sys.stdout
-    sys.stdout = new_stdout = io.StringIO()
+def test_logging_success_streaming_non_openai():
+    global score
+    try:
+        # litellm.set_verbose = False
+        def custom_callback(
+            kwargs,                 # kwargs to completion
+            completion_response,    # response from completion
+            start_time, end_time    # start/end time
+        ):
+            # print(f"streaming response: {completion_response}")
+            if "complete_streaming_response" in kwargs: 
+                print(f"Complete Streaming Response: {kwargs['complete_streaming_response']}")
+        
+        # Assign the custom callback function
+        litellm.success_callback = [custom_callback]
 
-    response = completion(model="claude-instant-1", messages=messages)
-    
-    # Restore stdout
-    sys.stdout = old_stdout
-    output = new_stdout.getvalue().strip()
+        # Redirect stdout
+        old_stdout = sys.stdout
+        sys.stdout = new_stdout = io.StringIO()
 
-    if "Logging Details Pre-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details Post-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details LiteLLM-Success Call" not in output:
-        raise Exception("Required log message not found!")
-    score += 1
-except Exception as e:
-    pytest.fail(f"Error occurred: {e}")
-    pass
+        response = completion(model="claude-instant-1", messages=messages, stream=True)
+        for idx, chunk in enumerate(response): 
+            pass
+        
+        # Restore stdout
+        sys.stdout = old_stdout
+        output = new_stdout.getvalue().strip()
 
+        if "Logging Details Pre-API Call" not in output:
+            raise Exception("Required log message not found!")
+        elif "Logging Details Post-API Call" not in output:
+            raise Exception("Required log message not found!")
+        elif "Logging Details LiteLLM-Success Call" not in output:
+            raise Exception("Required log message not found!")
+        elif "Complete Streaming Response:" not in output:
+            raise Exception("Required log message not found!")
+        score += 1
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+        pass
+
+test_logging_success_streaming_non_openai() 
 # embedding
 
-try:
-     # Redirect stdout
-    old_stdout = sys.stdout
-    sys.stdout = new_stdout = io.StringIO()
+def test_logging_success_embedding_openai():
+    try:
+        # Redirect stdout
+        old_stdout = sys.stdout
+        sys.stdout = new_stdout = io.StringIO()
 
-    response = embedding(model="text-embedding-ada-002", input=["good morning from litellm"])
+        response = embedding(model="text-embedding-ada-002", input=["good morning from litellm"])
 
-    # Restore stdout
-    sys.stdout = old_stdout
-    output = new_stdout.getvalue().strip()
+        # Restore stdout
+        sys.stdout = old_stdout
+        output = new_stdout.getvalue().strip()
 
-    if "Logging Details Pre-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details Post-API Call" not in output:
-        raise Exception("Required log message not found!")
-    elif "Logging Details LiteLLM-Success Call" not in output:
-        raise Exception("Required log message not found!")
-except Exception as e:
-    pytest.fail(f"Error occurred: {e}")
+        if "Logging Details Pre-API Call" not in output:
+            raise Exception("Required log message not found!")
+        elif "Logging Details Post-API Call" not in output:
+            raise Exception("Required log message not found!")
+        elif "Logging Details LiteLLM-Success Call" not in output:
+            raise Exception("Required log message not found!")
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
 
 # ## 2. On LiteLLM Call failure
 # ## TEST BAD KEY
diff --git a/litellm/tests/test_stream_chunk_builder.py b/litellm/tests/test_stream_chunk_builder.py
index 465ee4a52f..a4527521f2 100644
--- a/litellm/tests/test_stream_chunk_builder.py
+++ b/litellm/tests/test_stream_chunk_builder.py
@@ -54,4 +54,5 @@ def test_stream_chunk_builder():
         finnish_reason = choices["finish_reason"]
     except:
         raise Exception("stream_chunk_builder failed to rebuild response")
-test_stream_chunk_builder()
+# test_stream_chunk_builder()
+
diff --git a/litellm/utils.py b/litellm/utils.py
index def722776b..9b0c6c7356 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -228,6 +228,7 @@ class Logging:
         self.call_type = call_type
         self.litellm_call_id = litellm_call_id
         self.function_id = function_id
+        self.streaming_chunks = [] # for generating complete stream response
     
     def update_environment_variables(self, model, user, optional_params, litellm_params):
         self.optional_params = optional_params
@@ -394,7 +395,7 @@ class Logging:
             pass
 
     
-    def success_handler(self, result, start_time=None, end_time=None):
+    def success_handler(self, result=None, start_time=None, end_time=None, **kwargs):
         print_verbose(
                 f"Logging Details LiteLLM-Success Call"
             )
@@ -403,6 +404,20 @@ class Logging:
                 start_time = self.start_time
             if end_time is None:
                 end_time = datetime.datetime.now()
+            
+            complete_streaming_response = None
+            
+            ## BUILD COMPLETE STREAMED RESPONSE
+            if self.stream: 
+                if result.choices[0].finish_reason: # if it's the last chunk 
+                    self.streaming_chunks.append(result)
+                    complete_streaming_response = litellm.stream_chunk_builder(self.streaming_chunks)
+                else:
+                    self.streaming_chunks.append(result)
+            
+            if complete_streaming_response: 
+                self.model_call_details["complete_streaming_response"] = complete_streaming_response
+
             print_verbose(f"success callbacks: {litellm.success_callback}")
 
             if litellm.max_budget and self.stream:
@@ -3328,20 +3343,22 @@ class CustomStreamWrapper:
                     chunk = next(self.completion_stream)
                     model_response = chunk
                     # LOGGING
-                    threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
+                    threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
                     return model_response
                 
-                # LOGGING
-                threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
                 model_response.model = self.model
                 if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string
                     if self.sent_first_chunk == False:
                         completion_obj["role"] = "assistant"
                         self.sent_first_chunk = True
                     model_response.choices[0].delta = Delta(**completion_obj)
+                    # LOGGING
+                    threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
                     return model_response
                 elif model_response.choices[0].finish_reason:
                     model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai
+                    # LOGGING
+                    threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
                     return model_response
         except StopIteration:
             raise StopIteration