fix(utils.py): persist response id across chunks

2025-04-26 11:14:04 +00:00 · 2024-03-25 18:20:43 -07:00 · 2024-03-25 18:20:43 -07:00 · 1ac641165b
commit 1ac641165b
parent dc2c4af631
2 changed files with 11 additions and 3 deletions
--- a/litellm/tests/test_custom_logger.py
+++ b/litellm/tests/test_custom_logger.py
@ -490,7 +490,7 @@ def test_redis_cache_completion_stream():
            response_1_content += chunk.choices[0].delta.content or ""
        print(response_1_content)
-        time.sleep(0.1)  # sleep for 0.1 seconds allow set cache to occur
+        time.sleep(1)  # sleep for 0.1 seconds allow set cache to occur
        response2 = completion(
            model="gpt-3.5-turbo",
            messages=messages,
@ -505,8 +505,10 @@ def test_redis_cache_completion_stream():
            response_2_id = chunk.id
            print(chunk)
            response_2_content += chunk.choices[0].delta.content or ""
-        print("\nresponse 1", response_1_content)
+        print(
-        print("\nresponse 2", response_2_content)
+            f"\nresponse 1: {response_1_content}",
        )
        print(f"\nresponse 2: {response_2_content}")
        assert (
            response_1_id == response_2_id
        ), f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}"
@ -516,6 +518,7 @@ def test_redis_cache_completion_stream():
        litellm.success_callback = []
        litellm._async_success_callback = []
        litellm.cache = None
        raise Exception("it worked!")
    except Exception as e:
        print(e)
        litellm.success_callback = []
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -8458,6 +8458,7 @@ class CustomStreamWrapper:
        self.completion_stream = completion_stream
        self.sent_first_chunk = False
        self.sent_last_chunk = False
        self.system_fingerprint: Optional[str] = None
        self.received_finish_reason: Optional[str] = None
        self.special_tokens = ["<|assistant|>", "<|system|>", "<|user|>", "<s>", "</s>"]
        self.holding_chunk = ""
@ -9373,6 +9374,7 @@ class CustomStreamWrapper:
                print_verbose(f"completion obj content: {completion_obj['content']}")
                if hasattr(chunk, "id"):
                    model_response.id = chunk.id
                    self.response_id = chunk.id
                if response_obj["is_finished"]:
                    self.received_finish_reason = response_obj["finish_reason"]
            else:  # openai / azure chat model
@ -9397,6 +9399,7 @@ class CustomStreamWrapper:
                    )
                    if hasattr(response_obj["original_chunk"], "id"):
                        model_response.id = response_obj["original_chunk"].id
                        self.response_id = model_response.id
                if response_obj["logprobs"] is not None:
                    model_response.choices[0].logprobs = response_obj["logprobs"]
@ -9412,6 +9415,7 @@ class CustomStreamWrapper:
                # enter this branch when no content has been passed in response
                original_chunk = response_obj.get("original_chunk", None)
                model_response.id = original_chunk.id
                self.response_id = original_chunk.id
                if len(original_chunk.choices) > 0:
                    if (
                        original_chunk.choices[0].delta.function_call is not None
@ -9493,6 +9497,7 @@ class CustomStreamWrapper:
                    original_chunk = response_obj.get("original_chunk", None)
                    if original_chunk:
                        model_response.id = original_chunk.id
                        self.response_id = original_chunk.id
                        if len(original_chunk.choices) > 0:
                            try:
                                delta = dict(original_chunk.choices[0].delta)