fixes to streaming iterator

2025-04-26 11:14:04 +00:00 · 2025-04-19 11:26:40 -07:00 · 2025-04-19 11:26:40 -07:00 · e6ae53b5a9
commit e6ae53b5a9
parent ffd6d35ba2
2 changed files with 32 additions and 5 deletions
--- a/litellm/responses/litellm_completion_transformation/streaming_iterator.py
+++ b/litellm/responses/litellm_completion_transformation/streaming_iterator.py
@ -54,6 +54,7 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
                # Get the next chunk from the stream
                try:
                    chunk = await self.litellm_custom_stream_wrapper.__anext__()
                    self.collected_chat_completion_chunks.append(chunk)
                    response_api_chunk = (
                        self._transform_chat_completion_chunk_to_response_api_chunk(
                            chunk
@ -61,7 +62,6 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
                    )
                    if response_api_chunk:
                        return response_api_chunk
                    self.collected_chat_completion_chunks.append(chunk)
                except StopAsyncIteration:
                    self.finished = True
                    response_completed_event = self._emit_response_completed_event()
@ -84,10 +84,11 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
        try:
            while True:
                if self.finished is True:
-                    raise StopAsyncIteration
+                    raise StopIteration
                # Get the next chunk from the stream
                try:
                    chunk = self.litellm_custom_stream_wrapper.__next__()
                    self.collected_chat_completion_chunks.append(chunk)
                    response_api_chunk = (
                        self._transform_chat_completion_chunk_to_response_api_chunk(
                            chunk
@ -95,14 +96,13 @@ class LiteLLMCompletionStreamingIterator(ResponsesAPIStreamingIterator):
                    )
                    if response_api_chunk:
                        return response_api_chunk
-                    self.collected_chat_completion_chunks.append(chunk)
+                except StopIteration:
                except StopAsyncIteration:
                    self.finished = True
                    response_completed_event = self._emit_response_completed_event()
                    if response_completed_event:
                        return response_completed_event
                    else:
-                        raise StopAsyncIteration
+                        raise StopIteration
        except Exception as e:
            # Handle HTTP errors
--- a/tests/llm_responses_api_testing/base_responses_api.py
+++ b/tests/llm_responses_api_testing/base_responses_api.py
@ -138,6 +138,8 @@ class BaseResponsesAPITest(ABC):
    async def test_basic_openai_responses_api_streaming(self, sync_mode):
        litellm._turn_on_debug()
        base_completion_call_args = self.get_base_completion_call_args()
        collected_content_string = ""
        response_completed_event = None
        if sync_mode:
            response = litellm.responses(
                input="Basic ping",
@ -146,6 +148,10 @@ class BaseResponsesAPITest(ABC):
            )
            for event in response:
                print("litellm response=", json.dumps(event, indent=4, default=str))
                if event.type == "response.output_text.delta":
                    collected_content_string += event.delta
                elif event.type == "response.completed":
                    response_completed_event = event
        else:
            response = await litellm.aresponses(
                input="Basic ping",
@ -154,5 +160,26 @@ class BaseResponsesAPITest(ABC):
            )
            async for event in response:
                print("litellm response=", json.dumps(event, indent=4, default=str))
                if event.type == "response.output_text.delta":
                    collected_content_string += event.delta
                elif event.type == "response.completed":
                    response_completed_event = event
        # assert the delta chunks content had len(collected_content_string) > 0
        # this content is typically rendered on chat ui's
        assert len(collected_content_string) > 0
        # assert the response completed event is not None
        assert response_completed_event is not None
        # assert the response completed event has a response
        assert response_completed_event.response is not None
        # assert the response completed event includes the usage
        assert response_completed_event.response.usage is not None