fix(utils.py): fix recreating model response object when stream usage is true

2025-04-25 10:44:24 +00:00 · 2024-07-11 21:00:46 -07:00 · 2024-07-11 21:00:46 -07:00 · b2e46086dd
commit b2e46086dd
parent e112379d2f
3 changed files with 88 additions and 17 deletions
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -8951,7 +8951,16 @@ class CustomStreamWrapper:
            model_response.system_fingerprint = self.system_fingerprint
        model_response._hidden_params["custom_llm_provider"] = _logging_obj_llm_provider
        model_response._hidden_params["created_at"] = time.time()
-        model_response.choices = [StreamingChoices(finish_reason=None)]
+
+        if (
+            len(model_response.choices) > 0
+            and hasattr(model_response.choices[0], "delta")
+            and model_response.choices[0].delta is not None
+        ):
+            # do nothing, if object instantiated
+            pass
+        else:
+            model_response.choices = [StreamingChoices(finish_reason=None)]
        return model_response

    def is_delta_empty(self, delta: Delta) -> bool:
@ -9892,7 +9901,6 @@ class CustomStreamWrapper:
                    self.rules.post_call_rules(
                        input=self.response_uptil_now, model=self.model
                    )
-                    print_verbose(f"final returned processed chunk: {processed_chunk}")
                    self.chunks.append(processed_chunk)
                    if hasattr(
                        processed_chunk, "usage"
@ -9906,6 +9914,7 @@ class CustomStreamWrapper:

                        # Create a new object without the removed attribute
                        processed_chunk = self.model_response_creator(chunk=obj_dict)
+                    print_verbose(f"final returned processed chunk: {processed_chunk}")
                    return processed_chunk
                raise StopAsyncIteration
            else:  # temporary patch for non-aiohttp async calls