diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 51a3ae04ea..80086dbe6d 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -824,6 +824,32 @@ def test_bedrock_claude_3_streaming(): pytest.fail(f"Error occurred: {e}") +def test_claude_3_streaming_finish_reason(): + try: + litellm.set_verbose = True + messages = [ + {"role": "system", "content": "Be helpful"}, + {"role": "user", "content": "What do you know?"}, + ] + response: ModelResponse = completion( + model="claude-3-opus-20240229", + messages=messages, + stream=True, + ) + complete_response = "" + # Add any assertions here to check the response + num_finish_reason = 0 + for idx, chunk in enumerate(response): + if isinstance(chunk, ModelResponse): + if chunk.choices[0].finish_reason is not None: + num_finish_reason += 1 + assert num_finish_reason == 1 + except RateLimitError: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + @pytest.mark.skip(reason="Replicate changed exceptions") def test_completion_replicate_stream_bad_key(): try: diff --git a/litellm/utils.py b/litellm/utils.py index f528ff8332..121e7d9e5a 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -9531,6 +9531,9 @@ class CustomStreamWrapper: else: return elif self.received_finish_reason is not None: + if self.sent_last_chunk == True: + raise StopIteration + # flush any remaining holding chunk if len(self.holding_chunk) > 0: if model_response.choices[0].delta.content is None: @@ -9544,6 +9547,7 @@ class CustomStreamWrapper: is_delta_empty = self.is_delta_empty( delta=model_response.choices[0].delta ) + if is_delta_empty: # get any function call arguments model_response.choices[0].finish_reason = map_finish_reason(