fix(utils.py): fix stream options to return consistent response object

2024-06-04 18:17:45 -07:00 · 2024-06-04 18:17:45 -07:00 · 9aa29854de
commit 9aa29854de
parent 5e1faf31b0
2 changed files with 20 additions and 10 deletions
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@ -2018,12 +2018,24 @@ def test_openai_stream_options_call():
    """

    assert last_chunk.usage is not None
+    assert isinstance(last_chunk.usage, litellm.Usage)
    assert last_chunk.usage.total_tokens > 0
    assert last_chunk.usage.prompt_tokens > 0
    assert last_chunk.usage.completion_tokens > 0

    # assert all non last chunks have usage=None
-    assert all(chunk.usage is None for chunk in chunks[:-1])
+    # Improved assertion with detailed error message
+    non_last_chunks_with_usage = [
+        chunk
+        for chunk in chunks[:-1]
+        if hasattr(chunk, "usage") and chunk.usage is not None
+    ]
+    assert (
+        not non_last_chunks_with_usage
+    ), f"Non-last chunks with usage not None:\n" + "\n".join(
+        f"Chunk ID: {chunk.id}, Usage: {chunk.usage}, Content: {chunk.choices[0].delta.content}"
+        for chunk in non_last_chunks_with_usage
+    )


 def test_openai_stream_options_call_text_completion():
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -680,12 +680,6 @@ class ModelResponse(OpenAIObject):
                usage = usage
        elif stream is None or stream == False:
            usage = Usage()
-        elif (
-            stream == True
-            and stream_options is not None
-            and stream_options.get("include_usage") == True
-        ):
-            usage = Usage()
        if hidden_params:
            self._hidden_params = hidden_params

@ -11107,8 +11101,7 @@ class CustomStreamWrapper:
            model_response.system_fingerprint = self.system_fingerprint
        model_response._hidden_params["custom_llm_provider"] = self.custom_llm_provider
        model_response._hidden_params["created_at"] = time.time()
-        model_response.choices = [StreamingChoices()]
-        model_response.choices[0].finish_reason = None
+        model_response.choices = [StreamingChoices(finish_reason=None)]
        return model_response

    def is_delta_empty(self, delta: Delta) -> bool:
@ -11463,8 +11456,13 @@ class CustomStreamWrapper:
                if (
                    self.stream_options is not None
                    and self.stream_options["include_usage"] == True
+                    and response_obj["usage"] is not None
                ):
-                    model_response.usage = response_obj["usage"]
+                    model_response.usage = litellm.Usage(
+                        prompt_tokens=response_obj["usage"].prompt_tokens,
+                        completion_tokens=response_obj["usage"].completion_tokens,
+                        total_tokens=response_obj["usage"].total_tokens,
+                    )

            model_response.model = self.model
            print_verbose(