From 2b437a269987514e7db46512429699d7fa50b3ba Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Wed, 29 Nov 2023 12:11:08 -0800
Subject: [PATCH] fix(utils.py): return last streaming chunk

---
 litellm/tests/test_custom_logger.py | 14 +++-----------
 litellm/utils.py                    | 10 +++++-----
 2 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/litellm/tests/test_custom_logger.py b/litellm/tests/test_custom_logger.py
index c31eba0c74..eac07665b3 100644
--- a/litellm/tests/test_custom_logger.py
+++ b/litellm/tests/test_custom_logger.py
@@ -37,15 +37,7 @@ def test_chat_openai():
                                   "role": "user",
                                   "content": "Hi 👋 - i'm openai"
                               }],
-                              stream=True, 
-                              complete_response = True)
-        response2 = completion(model="gpt-3.5-turbo",
-                              messages=[{
-                                  "role": "user",
-                                  "content": "Hi 👋 - i'm not openai"
-                              }],
-                              stream=True, 
-                              complete_response = True)
+                              stream=True)
         time.sleep(1)
         assert customHandler.success == True
     except Exception as e:
@@ -53,7 +45,7 @@ def test_chat_openai():
         pass
 
 
-# test_chat_openai()
+test_chat_openai()
 
 def test_completion_azure_stream_moderation_failure():
     try:
@@ -80,7 +72,7 @@ def test_completion_azure_stream_moderation_failure():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-test_completion_azure_stream_moderation_failure()
+# test_completion_azure_stream_moderation_failure()
 
 
 # def custom_callback(
diff --git a/litellm/utils.py b/litellm/utils.py
index c63aae07b4..71492d0e81 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -5242,6 +5242,11 @@ class CustomStreamWrapper:
                     return model_response
                 else: 
                     return 
+            elif model_response.choices[0].finish_reason:
+                model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai
+                # LOGGING
+                threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
+                return model_response
             elif response_obj is not None and response_obj.get("original_chunk", None) is not None: # function / tool calling branch - only set for openai/azure compatible endpoints
                 # enter this branch when no content has been passed in response
                 original_chunk = response_obj.get("original_chunk", None)
@@ -5263,11 +5268,6 @@ class CustomStreamWrapper:
                     self.sent_first_chunk = True
                 threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start() # log response
                 return model_response
-            elif model_response.choices[0].finish_reason:
-                model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai
-                # LOGGING
-                threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
-                return model_response
             else: 
                 return
         except StopIteration: