forked from phoenix/litellm-mirror
(fix) caching use same "created" in response_object
This commit is contained in:
parent
00b001b96b
commit
4679c7b99a
2 changed files with 29 additions and 4 deletions
|
@ -276,7 +276,7 @@ def test_redis_cache_completion():
|
||||||
port=os.environ["REDIS_PORT"],
|
port=os.environ["REDIS_PORT"],
|
||||||
password=os.environ["REDIS_PASSWORD"],
|
password=os.environ["REDIS_PASSWORD"],
|
||||||
)
|
)
|
||||||
print("test2 for caching")
|
print("test2 for Redis Caching - non streaming")
|
||||||
response1 = completion(
|
response1 = completion(
|
||||||
model="gpt-3.5-turbo", messages=messages, caching=True, max_tokens=20
|
model="gpt-3.5-turbo", messages=messages, caching=True, max_tokens=20
|
||||||
)
|
)
|
||||||
|
@ -328,6 +328,10 @@ def test_redis_cache_completion():
|
||||||
print(f"response4: {response4}")
|
print(f"response4: {response4}")
|
||||||
pytest.fail(f"Error occurred:")
|
pytest.fail(f"Error occurred:")
|
||||||
|
|
||||||
|
assert response1.id == response2.id
|
||||||
|
assert response1.created == response2.created
|
||||||
|
assert response1.choices[0].message.content == response2.choices[0].message.content
|
||||||
|
|
||||||
|
|
||||||
# test_redis_cache_completion()
|
# test_redis_cache_completion()
|
||||||
|
|
||||||
|
@ -559,8 +563,11 @@ def test_s3_cache_acompletion_stream_azure():
|
||||||
response_1_content = ""
|
response_1_content = ""
|
||||||
response_2_content = ""
|
response_2_content = ""
|
||||||
|
|
||||||
|
response_1_created = ""
|
||||||
|
response_2_created = ""
|
||||||
|
|
||||||
async def call1():
|
async def call1():
|
||||||
nonlocal response_1_content
|
nonlocal response_1_content, response_1_created
|
||||||
response1 = await litellm.acompletion(
|
response1 = await litellm.acompletion(
|
||||||
model="azure/chatgpt-v-2",
|
model="azure/chatgpt-v-2",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
@ -570,6 +577,7 @@ def test_s3_cache_acompletion_stream_azure():
|
||||||
)
|
)
|
||||||
async for chunk in response1:
|
async for chunk in response1:
|
||||||
print(chunk)
|
print(chunk)
|
||||||
|
response_1_created = chunk.created
|
||||||
response_1_content += chunk.choices[0].delta.content or ""
|
response_1_content += chunk.choices[0].delta.content or ""
|
||||||
print(response_1_content)
|
print(response_1_content)
|
||||||
|
|
||||||
|
@ -578,7 +586,7 @@ def test_s3_cache_acompletion_stream_azure():
|
||||||
print("\n\n Response 1 content: ", response_1_content, "\n\n")
|
print("\n\n Response 1 content: ", response_1_content, "\n\n")
|
||||||
|
|
||||||
async def call2():
|
async def call2():
|
||||||
nonlocal response_2_content
|
nonlocal response_2_content, response_2_created
|
||||||
response2 = await litellm.acompletion(
|
response2 = await litellm.acompletion(
|
||||||
model="azure/chatgpt-v-2",
|
model="azure/chatgpt-v-2",
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
@ -589,14 +597,22 @@ def test_s3_cache_acompletion_stream_azure():
|
||||||
async for chunk in response2:
|
async for chunk in response2:
|
||||||
print(chunk)
|
print(chunk)
|
||||||
response_2_content += chunk.choices[0].delta.content or ""
|
response_2_content += chunk.choices[0].delta.content or ""
|
||||||
|
response_2_created = chunk.created
|
||||||
print(response_2_content)
|
print(response_2_content)
|
||||||
|
|
||||||
asyncio.run(call2())
|
asyncio.run(call2())
|
||||||
print("\nresponse 1", response_1_content)
|
print("\nresponse 1", response_1_content)
|
||||||
print("\nresponse 2", response_2_content)
|
print("\nresponse 2", response_2_content)
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
response_1_content == response_2_content
|
response_1_content == response_2_content
|
||||||
), f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}"
|
), f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}"
|
||||||
|
|
||||||
|
print("response 1 created", response_1_created)
|
||||||
|
print("response 2 created", response_2_created)
|
||||||
|
|
||||||
|
assert response_1_created == response_2_created
|
||||||
|
|
||||||
litellm.cache = None
|
litellm.cache = None
|
||||||
litellm.success_callback = []
|
litellm.success_callback = []
|
||||||
litellm._async_success_callback = []
|
litellm._async_success_callback = []
|
||||||
|
@ -605,7 +621,7 @@ def test_s3_cache_acompletion_stream_azure():
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
||||||
test_s3_cache_acompletion_stream_azure()
|
# test_s3_cache_acompletion_stream_azure()
|
||||||
|
|
||||||
|
|
||||||
# test_redis_cache_acompletion_stream_bedrock()
|
# test_redis_cache_acompletion_stream_bedrock()
|
||||||
|
|
|
@ -4937,6 +4937,9 @@ async def convert_to_streaming_response_async(response_object: Optional[dict] =
|
||||||
if "id" in response_object:
|
if "id" in response_object:
|
||||||
model_response_object.id = response_object["id"]
|
model_response_object.id = response_object["id"]
|
||||||
|
|
||||||
|
if "created" in response_object:
|
||||||
|
model_response_object.created = response_object["created"]
|
||||||
|
|
||||||
if "system_fingerprint" in response_object:
|
if "system_fingerprint" in response_object:
|
||||||
model_response_object.system_fingerprint = response_object["system_fingerprint"]
|
model_response_object.system_fingerprint = response_object["system_fingerprint"]
|
||||||
|
|
||||||
|
@ -4981,6 +4984,9 @@ def convert_to_streaming_response(response_object: Optional[dict] = None):
|
||||||
if "id" in response_object:
|
if "id" in response_object:
|
||||||
model_response_object.id = response_object["id"]
|
model_response_object.id = response_object["id"]
|
||||||
|
|
||||||
|
if "created" in response_object:
|
||||||
|
model_response_object.created = response_object["created"]
|
||||||
|
|
||||||
if "system_fingerprint" in response_object:
|
if "system_fingerprint" in response_object:
|
||||||
model_response_object.system_fingerprint = response_object["system_fingerprint"]
|
model_response_object.system_fingerprint = response_object["system_fingerprint"]
|
||||||
|
|
||||||
|
@ -5036,6 +5042,9 @@ def convert_to_model_response_object(
|
||||||
model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0) # type: ignore
|
model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0) # type: ignore
|
||||||
model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore
|
model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore
|
||||||
|
|
||||||
|
if "created" in response_object:
|
||||||
|
model_response_object.created = response_object["created"]
|
||||||
|
|
||||||
if "id" in response_object:
|
if "id" in response_object:
|
||||||
model_response_object.id = response_object["id"]
|
model_response_object.id = response_object["id"]
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue