diff --git a/litellm/tests/test_custom_logger.py b/litellm/tests/test_custom_logger.py index b2e2b7d220..7e8a53561f 100644 --- a/litellm/tests/test_custom_logger.py +++ b/litellm/tests/test_custom_logger.py @@ -490,7 +490,7 @@ def test_redis_cache_completion_stream(): response_1_content += chunk.choices[0].delta.content or "" print(response_1_content) - time.sleep(0.1) # sleep for 0.1 seconds allow set cache to occur + time.sleep(1) # sleep for 0.1 seconds allow set cache to occur response2 = completion( model="gpt-3.5-turbo", messages=messages, @@ -505,8 +505,10 @@ def test_redis_cache_completion_stream(): response_2_id = chunk.id print(chunk) response_2_content += chunk.choices[0].delta.content or "" - print("\nresponse 1", response_1_content) - print("\nresponse 2", response_2_content) + print( + f"\nresponse 1: {response_1_content}", + ) + print(f"\nresponse 2: {response_2_content}") assert ( response_1_id == response_2_id ), f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}" @@ -516,6 +518,7 @@ def test_redis_cache_completion_stream(): litellm.success_callback = [] litellm._async_success_callback = [] litellm.cache = None + raise Exception("it worked!") except Exception as e: print(e) litellm.success_callback = [] diff --git a/litellm/utils.py b/litellm/utils.py index bd47e08ba3..496bb75ec4 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -8458,6 +8458,7 @@ class CustomStreamWrapper: self.completion_stream = completion_stream self.sent_first_chunk = False self.sent_last_chunk = False + self.system_fingerprint: Optional[str] = None self.received_finish_reason: Optional[str] = None self.special_tokens = ["<|assistant|>", "<|system|>", "<|user|>", "", ""] self.holding_chunk = "" @@ -9373,6 +9374,7 @@ class CustomStreamWrapper: print_verbose(f"completion obj content: {completion_obj['content']}") if hasattr(chunk, "id"): model_response.id = chunk.id + self.response_id = chunk.id if response_obj["is_finished"]: self.received_finish_reason = response_obj["finish_reason"] else: # openai / azure chat model @@ -9397,6 +9399,7 @@ class CustomStreamWrapper: ) if hasattr(response_obj["original_chunk"], "id"): model_response.id = response_obj["original_chunk"].id + self.response_id = model_response.id if response_obj["logprobs"] is not None: model_response.choices[0].logprobs = response_obj["logprobs"] @@ -9412,6 +9415,7 @@ class CustomStreamWrapper: # enter this branch when no content has been passed in response original_chunk = response_obj.get("original_chunk", None) model_response.id = original_chunk.id + self.response_id = original_chunk.id if len(original_chunk.choices) > 0: if ( original_chunk.choices[0].delta.function_call is not None @@ -9493,6 +9497,7 @@ class CustomStreamWrapper: original_chunk = response_obj.get("original_chunk", None) if original_chunk: model_response.id = original_chunk.id + self.response_id = original_chunk.id if len(original_chunk.choices) > 0: try: delta = dict(original_chunk.choices[0].delta)