fix(utils.py): return last streaming chunk

2025-04-24 18:24:20 +00:00 · 2023-11-29 12:11:08 -08:00 · 2023-11-29 12:11:08 -08:00 · 2b437a2699
commit 2b437a2699
parent a9ed768991
2 changed files with 8 additions and 16 deletions
--- a/litellm/tests/test_custom_logger.py
+++ b/litellm/tests/test_custom_logger.py
@ -37,15 +37,7 @@ def test_chat_openai():
                                  "role": "user",
                                  "content": "Hi 👋 - i'm openai"
                              }],
-                              stream=True, 
+                              stream=True)
                              complete_response = True)
        response2 = completion(model="gpt-3.5-turbo",
                              messages=[{
                                  "role": "user",
                                  "content": "Hi 👋 - i'm not openai"
                              }],
                              stream=True, 
                              complete_response = True)
        time.sleep(1)
        assert customHandler.success == True
    except Exception as e:
@ -53,7 +45,7 @@ def test_chat_openai():
        pass
-# test_chat_openai()
+test_chat_openai()
 def test_completion_azure_stream_moderation_failure():
    try:
@ -80,7 +72,7 @@ def test_completion_azure_stream_moderation_failure():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
-test_completion_azure_stream_moderation_failure()
+# test_completion_azure_stream_moderation_failure()
 # def custom_callback(
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -5242,6 +5242,11 @@ class CustomStreamWrapper:
                    return model_response
                else: 
                    return 
            elif model_response.choices[0].finish_reason:
                model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai
                # LOGGING
                threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
                return model_response
            elif response_obj is not None and response_obj.get("original_chunk", None) is not None: # function / tool calling branch - only set for openai/azure compatible endpoints
                # enter this branch when no content has been passed in response
                original_chunk = response_obj.get("original_chunk", None)
@ -5263,11 +5268,6 @@ class CustomStreamWrapper:
                    self.sent_first_chunk = True
                threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start() # log response
                return model_response
            elif model_response.choices[0].finish_reason:
                model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai
                # LOGGING
                threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
                return model_response
            else: 
                return
        except StopIteration: