diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index f3bf6edfeb..5702cb34f2 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 6b756a49ba..cb633a81e1 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/integrations/litedebugger.py b/litellm/integrations/litedebugger.py
index 032f1b6550..ef33c7edd7 100644
--- a/litellm/integrations/litedebugger.py
+++ b/litellm/integrations/litedebugger.py
@@ -184,21 +184,22 @@ class LiteDebugger:
                     data=json.dumps(litellm_data_obj),
                 )
             elif call_type == "completion" and stream == True:
-                litellm_data_obj = {
-                    "response_time": response_time,
-                    "total_cost": total_cost,
-                    "response": "streamed response",
-                    "litellm_call_id": litellm_call_id,
-                    "status": "success",
-                }
-                print_verbose(
-                    f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
-                )
-                response = requests.post(
-                    url=self.api_url,
-                    headers={"content-type": "application/json"},
-                    data=json.dumps(litellm_data_obj),
-                )
+                if len(response_obj["content"]) > 0: # don't log the empty strings
+                    litellm_data_obj = {
+                        "response_time": response_time,
+                        "total_cost": total_cost,
+                        "response": response_obj["content"],
+                        "litellm_call_id": litellm_call_id,
+                        "status": "success",
+                    }
+                    print_verbose(
+                        f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
+                    )
+                    response = requests.post(
+                        url=self.api_url,
+                        headers={"content-type": "application/json"},
+                        data=json.dumps(litellm_data_obj),
+                    )
             elif "error" in response_obj:
                 if "Unable to map your input to a model." in response_obj["error"]:
                     total_cost = 0
diff --git a/litellm/tests/test_litedebugger_integration.py b/litellm/tests/test_litedebugger_integration.py
index afa84e10c4..c9a569760c 100644
--- a/litellm/tests/test_litedebugger_integration.py
+++ b/litellm/tests/test_litedebugger_integration.py
@@ -36,8 +36,8 @@ litellm.set_verbose = True
 
 score = 0
 split_per_model = {
-	"gpt-4": 0.7, 
-	"claude-instant-1.2": 0.3
+	"gpt-4": 0, 
+	"claude-instant-1.2": 1
 }
 
 
@@ -81,26 +81,31 @@ try:
         raise Exception("LiteLLMDebugger: post-api call not logged!")
     if "LiteDebugger: Success/Failure Call Logging" not in output:
         raise Exception("LiteLLMDebugger: success/failure call not logged!")
-except:
-    pass
+except Exception as e:
+    pytest.fail(f"Error occurred: {e}")
 
-# Test 3: On streaming completion call - setting client to true
+Test 3: On streaming completion call - setting client to true
 try:
     # Redirect stdout
     old_stdout = sys.stdout
     sys.stdout = new_stdout = io.StringIO()
 
-    response = completion_with_split_tests(models=split_per_model, messages=messages, stream=True, use_client=True, id="6d383c99-488d-481d-aa1b-1f94935cec44")
-
+    response = completion_with_split_tests(models=split_per_model, messages=messages, stream=True, use_client=True, override_client=True, id="6d383c99-488d-481d-aa1b-1f94935cec44")
+    for data in response:
+        print(data)
     # Restore stdout
     sys.stdout = old_stdout
     output = new_stdout.getvalue().strip()
 
+    print(output)
+    print(f"response: {response}")
+
     if "LiteDebugger: Pre-API Call Logging" not in output:
         raise Exception("LiteLLMDebugger: pre-api call not logged!")
     if "LiteDebugger: Post-API Call Logging" not in output:
         raise Exception("LiteLLMDebugger: post-api call not logged!")
     if "LiteDebugger: Success/Failure Call Logging" not in output:
         raise Exception("LiteLLMDebugger: success/failure call not logged!")
-except:
-    pass
+except Exception as e:
+    pytest.fail(f"Error occurred: {e}")
+
diff --git a/litellm/utils.py b/litellm/utils.py
index 4add7fe703..12260de021 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -157,13 +157,14 @@ class CallTypes(Enum):
 class Logging:
     global supabaseClient, liteDebuggerClient
 
-    def __init__(self, model, messages, stream, call_type, litellm_call_id, function_id):
+    def __init__(self, model, messages, stream, call_type, start_time, litellm_call_id, function_id):
         if call_type not in [item.value for item in CallTypes]:
             allowed_values = ", ".join([item.value for item in CallTypes])
             raise ValueError(f"Invalid call_type {call_type}. Allowed values: {allowed_values}")
         self.model = model
         self.messages = messages
         self.stream = stream
+        self.start_time = start_time # log the call start time
         self.call_type = call_type
         self.litellm_call_id = litellm_call_id
         self.function_id = function_id
@@ -330,11 +331,15 @@ class Logging:
             pass
 
     
-    def success_handler(self, result, start_time, end_time):
+    def success_handler(self, result, start_time=None, end_time=None):
         print_verbose(
                 f"Logging Details LiteLLM-Success Call"
             )
         try:
+            if start_time is None:
+                start_time = self.start_time
+            if end_time is None:
+                end_time = datetime.datetime.now()
             for callback in litellm.success_callback:
                 try:
                     if callback == "lite_debugger":
@@ -366,11 +371,16 @@ class Logging:
             )
             pass
 
-    def failure_handler(self, exception, traceback_exception, start_time, end_time):
+    def failure_handler(self, exception, traceback_exception, start_time=None, end_time=None):
         print_verbose(
                 f"Logging Details LiteLLM-Failure Call"
             )
         try:
+            if start_time is None:
+                start_time = self.start_time
+            if end_time is None:
+                end_time = datetime.datetime.now()
+
             for callback in litellm.failure_callback:
                 try:
                     if callback == "lite_debugger":
@@ -451,7 +461,7 @@ def client(original_function):
     global liteDebuggerClient, get_all_keys
 
     def function_setup(
-        *args, **kwargs
+        start_time, *args, **kwargs
     ):  # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc.
         try:
             global callback_list, add_breadcrumb, user_logger_fn, Logging
@@ -495,7 +505,7 @@ def client(original_function):
             elif call_type == CallTypes.embedding.value:
                 messages = args[1] if len(args) > 1 else kwargs["input"]
             stream = True if "stream" in kwargs and kwargs["stream"] == True else False
-            logging_obj = Logging(model=model, messages=messages, stream=stream, litellm_call_id=kwargs["litellm_call_id"], function_id=function_id, call_type=call_type)
+            logging_obj = Logging(model=model, messages=messages, stream=stream, litellm_call_id=kwargs["litellm_call_id"], function_id=function_id, call_type=call_type, start_time=start_time)
             return logging_obj
         except:  # DO NOT BLOCK running the function because of this
             print_verbose(f"[Non-Blocking] {traceback.format_exc()}; args - {args}; kwargs - {kwargs}")
@@ -521,14 +531,13 @@ def client(original_function):
                 pass
 
     def wrapper(*args, **kwargs):
-        start_time = None
+        start_time = datetime.datetime.now()
         result = None
         litellm_call_id = str(uuid.uuid4())
         kwargs["litellm_call_id"] = litellm_call_id
-        logging_obj = function_setup(*args, **kwargs)
+        logging_obj = function_setup(start_time, *args, **kwargs)
         kwargs["litellm_logging_obj"] = logging_obj
         try:
-            start_time = datetime.datetime.now()
             # [OPTIONAL] CHECK CACHE
             # remove this after deprecating litellm.caching
             if (litellm.caching or litellm.caching_with_models) and litellm.cache is None:
@@ -543,12 +552,11 @@ def client(original_function):
             # MODEL CALL
             result = original_function(*args, **kwargs)
             end_time = datetime.datetime.now()
-            # LOG SUCCESS
-            logging_obj.success_handler(result, start_time, end_time)
-            
             if "stream" in kwargs and kwargs["stream"] == True:
                 # TODO: Add to cache for streaming
                 return result
+        
+            
             # [OPTIONAL] ADD TO CACHE
             if litellm.caching or litellm.caching_with_models or litellm.cache != None: # user init a cache object
                 litellm.cache.add_cache(result, *args, **kwargs)
@@ -557,7 +565,8 @@ def client(original_function):
             if litellm.use_client == True:
                 result['litellm_call_id'] = litellm_call_id
 
-            # LOG SUCCESS
+            # LOG SUCCESS - handle streaming success logging in the _next_ object, remove `handle_success` once it's deprecated
+            threading.Thread(target=logging_obj.success_handler, args=(result, start_time, end_time)).start()
             my_thread = threading.Thread(
                 target=handle_success, args=(args, kwargs, result, start_time, end_time)
             )  # don't interrupt execution of main thread
@@ -568,7 +577,8 @@ def client(original_function):
             traceback_exception = traceback.format_exc()
             crash_reporting(*args, **kwargs, exception=traceback_exception)
             end_time = datetime.datetime.now()
-            logging_obj.failure_handler(e, traceback_exception, start_time, end_time)
+            # LOG FAILURE - handle streaming failure logging in the _next_ object, remove `handle_failure` once it's deprecated
+            threading.Thread(target=logging_obj.failure_handler, args=(e, traceback_exception, start_time, end_time)).start()
             my_thread = threading.Thread(
                 target=handle_failure,
                 args=(e, traceback_exception, start_time, end_time, args, kwargs),
@@ -1833,7 +1843,7 @@ class CustomStreamWrapper:
                 completion_obj["content"] = self.handle_openai_chat_completion_chunk(chunk)
 
             # LOGGING
-            # self.logging_obj.post_call(completion_obj["content"])
+            threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
             # return this for all models
             return {"choices": [{"delta": completion_obj}]}
         except:
@@ -1933,7 +1943,7 @@ def get_model_split_test(models, completion_call_id):
         )
 
 
-def completion_with_split_tests(models={}, messages=[], use_client=False, **kwargs):
+def completion_with_split_tests(models={}, messages=[], use_client=False, override_client=False, **kwargs):
     """
     Example Usage: 
 
@@ -1945,7 +1955,7 @@ def completion_with_split_tests(models={}, messages=[], use_client=False, **kwar
     completion_with_split_tests(models=models, messages=messages)
     """
     import random
-    if use_client:
+    if use_client and not override_client:
         if "id" not in kwargs or kwargs["id"] is None:
             raise ValueError("Please tag this completion call, if you'd like to update it's split test values through the UI. - eg. `completion_with_split_tests(.., id=1234)`.")
         # get the most recent model split list from server