diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 92b798d84..b939d6299 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -2018,12 +2018,24 @@ def test_openai_stream_options_call(): """ assert last_chunk.usage is not None + assert isinstance(last_chunk.usage, litellm.Usage) assert last_chunk.usage.total_tokens > 0 assert last_chunk.usage.prompt_tokens > 0 assert last_chunk.usage.completion_tokens > 0 # assert all non last chunks have usage=None - assert all(chunk.usage is None for chunk in chunks[:-1]) + # Improved assertion with detailed error message + non_last_chunks_with_usage = [ + chunk + for chunk in chunks[:-1] + if hasattr(chunk, "usage") and chunk.usage is not None + ] + assert ( + not non_last_chunks_with_usage + ), f"Non-last chunks with usage not None:\n" + "\n".join( + f"Chunk ID: {chunk.id}, Usage: {chunk.usage}, Content: {chunk.choices[0].delta.content}" + for chunk in non_last_chunks_with_usage + ) def test_openai_stream_options_call_text_completion(): diff --git a/litellm/utils.py b/litellm/utils.py index 4dcee6be4..a8e872bd2 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -680,12 +680,6 @@ class ModelResponse(OpenAIObject): usage = usage elif stream is None or stream == False: usage = Usage() - elif ( - stream == True - and stream_options is not None - and stream_options.get("include_usage") == True - ): - usage = Usage() if hidden_params: self._hidden_params = hidden_params @@ -11107,8 +11101,7 @@ class CustomStreamWrapper: model_response.system_fingerprint = self.system_fingerprint model_response._hidden_params["custom_llm_provider"] = self.custom_llm_provider model_response._hidden_params["created_at"] = time.time() - model_response.choices = [StreamingChoices()] - model_response.choices[0].finish_reason = None + model_response.choices = [StreamingChoices(finish_reason=None)] return model_response def is_delta_empty(self, delta: Delta) -> bool: @@ -11463,8 +11456,13 @@ class CustomStreamWrapper: if ( self.stream_options is not None and self.stream_options["include_usage"] == True + and response_obj["usage"] is not None ): - model_response.usage = response_obj["usage"] + model_response.usage = litellm.Usage( + prompt_tokens=response_obj["usage"].prompt_tokens, + completion_tokens=response_obj["usage"].completion_tokens, + total_tokens=response_obj["usage"].total_tokens, + ) model_response.model = self.model print_verbose(