diff --git a/litellm/tests/test_caching.py b/litellm/tests/test_caching.py index 0ca679248..86751e816 100644 --- a/litellm/tests/test_caching.py +++ b/litellm/tests/test_caching.py @@ -276,7 +276,7 @@ def test_redis_cache_completion(): port=os.environ["REDIS_PORT"], password=os.environ["REDIS_PASSWORD"], ) - print("test2 for caching") + print("test2 for Redis Caching - non streaming") response1 = completion( model="gpt-3.5-turbo", messages=messages, caching=True, max_tokens=20 ) @@ -328,6 +328,10 @@ def test_redis_cache_completion(): print(f"response4: {response4}") pytest.fail(f"Error occurred:") + assert response1.id == response2.id + assert response1.created == response2.created + assert response1.choices[0].message.content == response2.choices[0].message.content + # test_redis_cache_completion() @@ -559,8 +563,11 @@ def test_s3_cache_acompletion_stream_azure(): response_1_content = "" response_2_content = "" + response_1_created = "" + response_2_created = "" + async def call1(): - nonlocal response_1_content + nonlocal response_1_content, response_1_created response1 = await litellm.acompletion( model="azure/chatgpt-v-2", messages=messages, @@ -570,6 +577,7 @@ def test_s3_cache_acompletion_stream_azure(): ) async for chunk in response1: print(chunk) + response_1_created = chunk.created response_1_content += chunk.choices[0].delta.content or "" print(response_1_content) @@ -578,7 +586,7 @@ def test_s3_cache_acompletion_stream_azure(): print("\n\n Response 1 content: ", response_1_content, "\n\n") async def call2(): - nonlocal response_2_content + nonlocal response_2_content, response_2_created response2 = await litellm.acompletion( model="azure/chatgpt-v-2", messages=messages, @@ -589,14 +597,22 @@ def test_s3_cache_acompletion_stream_azure(): async for chunk in response2: print(chunk) response_2_content += chunk.choices[0].delta.content or "" + response_2_created = chunk.created print(response_2_content) asyncio.run(call2()) print("\nresponse 1", response_1_content) print("\nresponse 2", response_2_content) + assert ( response_1_content == response_2_content ), f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}" + + print("response 1 created", response_1_created) + print("response 2 created", response_2_created) + + assert response_1_created == response_2_created + litellm.cache = None litellm.success_callback = [] litellm._async_success_callback = [] @@ -605,7 +621,7 @@ def test_s3_cache_acompletion_stream_azure(): raise e -test_s3_cache_acompletion_stream_azure() +# test_s3_cache_acompletion_stream_azure() # test_redis_cache_acompletion_stream_bedrock() diff --git a/litellm/utils.py b/litellm/utils.py index 09cb52343..8f93fb620 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4937,6 +4937,9 @@ async def convert_to_streaming_response_async(response_object: Optional[dict] = if "id" in response_object: model_response_object.id = response_object["id"] + if "created" in response_object: + model_response_object.created = response_object["created"] + if "system_fingerprint" in response_object: model_response_object.system_fingerprint = response_object["system_fingerprint"] @@ -4981,6 +4984,9 @@ def convert_to_streaming_response(response_object: Optional[dict] = None): if "id" in response_object: model_response_object.id = response_object["id"] + if "created" in response_object: + model_response_object.created = response_object["created"] + if "system_fingerprint" in response_object: model_response_object.system_fingerprint = response_object["system_fingerprint"] @@ -5036,6 +5042,9 @@ def convert_to_model_response_object( model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0) # type: ignore model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore + if "created" in response_object: + model_response_object.created = response_object["created"] + if "id" in response_object: model_response_object.id = response_object["id"]