fix(utils.py): fix sync/async stream logging

This commit is contained in:
Krrish Dholakia 2024-01-22 13:52:30 -08:00
parent e423aeff85
commit 7165a927b0

View file

@ -1,56 +1,58 @@
### What this tests #### ### What this tests ####
import sys, os, time, inspect, asyncio, traceback import sys, os, time, inspect, asyncio, traceback
import pytest import pytest
sys.path.insert(0, os.path.abspath('../..'))
sys.path.insert(0, os.path.abspath("../.."))
from litellm import completion, embedding from litellm import completion, embedding
import litellm import litellm
from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.custom_logger import CustomLogger
class MyCustomHandler(CustomLogger): class MyCustomHandler(CustomLogger):
complete_streaming_response_in_callback = "" complete_streaming_response_in_callback = ""
def __init__(self): def __init__(self):
self.success: bool = False # type: ignore self.success: bool = False # type: ignore
self.failure: bool = False # type: ignore self.failure: bool = False # type: ignore
self.async_success: bool = False # type: ignore self.async_success: bool = False # type: ignore
self.async_success_embedding: bool = False # type: ignore self.async_success_embedding: bool = False # type: ignore
self.async_failure: bool = False # type: ignore self.async_failure: bool = False # type: ignore
self.async_failure_embedding: bool = False # type: ignore self.async_failure_embedding: bool = False # type: ignore
self.async_completion_kwargs = None # type: ignore self.async_completion_kwargs = None # type: ignore
self.async_embedding_kwargs = None # type: ignore self.async_embedding_kwargs = None # type: ignore
self.async_embedding_response = None # type: ignore self.async_embedding_response = None # type: ignore
self.async_completion_kwargs_fail = None # type: ignore self.async_completion_kwargs_fail = None # type: ignore
self.async_embedding_kwargs_fail = None # type: ignore self.async_embedding_kwargs_fail = None # type: ignore
self.stream_collected_response = None # type: ignore self.stream_collected_response = None # type: ignore
self.sync_stream_collected_response = None # type: ignore self.sync_stream_collected_response = None # type: ignore
self.user = None # type: ignore self.user = None # type: ignore
self.data_sent_to_api: dict = {} self.data_sent_to_api: dict = {}
def log_pre_api_call(self, model, messages, kwargs): def log_pre_api_call(self, model, messages, kwargs):
print(f"Pre-API Call") print(f"Pre-API Call")
self.data_sent_to_api = kwargs["additional_args"].get("complete_input_dict", {}) self.data_sent_to_api = kwargs["additional_args"].get("complete_input_dict", {})
def log_post_api_call(self, kwargs, response_obj, start_time, end_time): def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
print(f"Post-API Call") print(f"Post-API Call")
def log_stream_event(self, kwargs, response_obj, start_time, end_time): def log_stream_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Stream") print(f"On Stream")
def log_success_event(self, kwargs, response_obj, start_time, end_time): def log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Success") print(f"On Success")
self.success = True self.success = True
if kwargs.get("stream") == True: if kwargs.get("stream") == True:
self.sync_stream_collected_response = response_obj self.sync_stream_collected_response = response_obj
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
def log_failure_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Failure") print(f"On Failure")
self.failure = True self.failure = True
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Async success") print(f"On Async success")
print(f"received kwargs user: {kwargs['user']}") print(f"received kwargs user: {kwargs['user']}")
self.async_success = True self.async_success = True
@ -62,24 +64,30 @@ class MyCustomHandler(CustomLogger):
self.stream_collected_response = response_obj self.stream_collected_response = response_obj
self.async_completion_kwargs = kwargs self.async_completion_kwargs = kwargs
self.user = kwargs.get("user", None) self.user = kwargs.get("user", None)
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
print(f"On Async Failure") print(f"On Async Failure")
self.async_failure = True self.async_failure = True
if kwargs.get("model") == "text-embedding-ada-002": if kwargs.get("model") == "text-embedding-ada-002":
self.async_failure_embedding = True self.async_failure_embedding = True
self.async_embedding_kwargs_fail = kwargs self.async_embedding_kwargs_fail = kwargs
self.async_completion_kwargs_fail = kwargs self.async_completion_kwargs_fail = kwargs
class TmpFunction: class TmpFunction:
complete_streaming_response_in_callback = "" complete_streaming_response_in_callback = ""
async_success: bool = False async_success: bool = False
async def async_test_logging_fn(self, kwargs, completion_obj, start_time, end_time): async def async_test_logging_fn(self, kwargs, completion_obj, start_time, end_time):
print(f"ON ASYNC LOGGING") print(f"ON ASYNC LOGGING")
self.async_success = True self.async_success = True
print(f'kwargs.get("complete_streaming_response"): {kwargs.get("complete_streaming_response")}') print(
self.complete_streaming_response_in_callback = kwargs.get("complete_streaming_response") f'kwargs.get("complete_streaming_response"): {kwargs.get("complete_streaming_response")}'
)
self.complete_streaming_response_in_callback = kwargs.get(
"complete_streaming_response"
)
def test_async_chat_openai_stream(): def test_async_chat_openai_stream():
@ -88,29 +96,39 @@ def test_async_chat_openai_stream():
# litellm.set_verbose = True # litellm.set_verbose = True
litellm.success_callback = [tmp_function.async_test_logging_fn] litellm.success_callback = [tmp_function.async_test_logging_fn]
complete_streaming_response = "" complete_streaming_response = ""
async def call_gpt(): async def call_gpt():
nonlocal complete_streaming_response nonlocal complete_streaming_response
response = await litellm.acompletion(model="gpt-3.5-turbo", response = await litellm.acompletion(
messages=[{ model="gpt-3.5-turbo",
"role": "user", messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}],
"content": "Hi 👋 - i'm openai" stream=True,
}], )
stream=True) async for chunk in response:
async for chunk in response: complete_streaming_response += (
complete_streaming_response += chunk["choices"][0]["delta"]["content"] or "" chunk["choices"][0]["delta"]["content"] or ""
)
print(complete_streaming_response) print(complete_streaming_response)
asyncio.run(call_gpt()) asyncio.run(call_gpt())
complete_streaming_response = complete_streaming_response.strip("'") complete_streaming_response = complete_streaming_response.strip("'")
response1 = tmp_function.complete_streaming_response_in_callback["choices"][0]["message"]["content"] response1 = tmp_function.complete_streaming_response_in_callback["choices"][0][
"message"
]["content"]
response2 = complete_streaming_response response2 = complete_streaming_response
# assert [ord(c) for c in response1] == [ord(c) for c in response2] # assert [ord(c) for c in response1] == [ord(c) for c in response2]
print(f"response1: {response1}")
print(f"response2: {response2}")
assert response1 == response2 assert response1 == response2
assert tmp_function.async_success == True assert tmp_function.async_success == True
except Exception as e: except Exception as e:
print(e) print(e)
pytest.fail(f"An error occurred - {str(e)}") pytest.fail(f"An error occurred - {str(e)}")
# test_async_chat_openai_stream() # test_async_chat_openai_stream()
def test_completion_azure_stream_moderation_failure(): def test_completion_azure_stream_moderation_failure():
try: try:
customHandler = MyCustomHandler() customHandler = MyCustomHandler()
@ -122,11 +140,11 @@ def test_completion_azure_stream_moderation_failure():
"content": "how do i kill someone", "content": "how do i kill someone",
}, },
] ]
try: try:
response = completion( response = completion(
model="azure/chatgpt-v-2", messages=messages, stream=True model="azure/chatgpt-v-2", messages=messages, stream=True
) )
for chunk in response: for chunk in response:
print(f"chunk: {chunk}") print(f"chunk: {chunk}")
continue continue
except Exception as e: except Exception as e:
@ -139,7 +157,7 @@ def test_completion_azure_stream_moderation_failure():
def test_async_custom_handler_stream(): def test_async_custom_handler_stream():
try: try:
# [PROD Test] - Do not DELETE # [PROD Test] - Do not DELETE
# checks if the model response available in the async + stream callbacks is equal to the received response # checks if the model response available in the async + stream callbacks is equal to the received response
customHandler2 = MyCustomHandler() customHandler2 = MyCustomHandler()
litellm.callbacks = [customHandler2] litellm.callbacks = [customHandler2]
@ -152,32 +170,37 @@ def test_async_custom_handler_stream():
}, },
] ]
complete_streaming_response = "" complete_streaming_response = ""
async def test_1(): async def test_1():
nonlocal complete_streaming_response nonlocal complete_streaming_response
response = await litellm.acompletion( response = await litellm.acompletion(
model="azure/chatgpt-v-2", model="azure/chatgpt-v-2", messages=messages, stream=True
messages=messages,
stream=True
) )
async for chunk in response: async for chunk in response:
complete_streaming_response += chunk["choices"][0]["delta"]["content"] or "" complete_streaming_response += (
chunk["choices"][0]["delta"]["content"] or ""
)
print(complete_streaming_response) print(complete_streaming_response)
asyncio.run(test_1()) asyncio.run(test_1())
response_in_success_handler = customHandler2.stream_collected_response response_in_success_handler = customHandler2.stream_collected_response
response_in_success_handler = response_in_success_handler["choices"][0]["message"]["content"] response_in_success_handler = response_in_success_handler["choices"][0][
"message"
]["content"]
print("\n\n") print("\n\n")
print("response_in_success_handler: ", response_in_success_handler) print("response_in_success_handler: ", response_in_success_handler)
print("complete_streaming_response: ", complete_streaming_response) print("complete_streaming_response: ", complete_streaming_response)
assert response_in_success_handler == complete_streaming_response assert response_in_success_handler == complete_streaming_response
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
# test_async_custom_handler_stream() # test_async_custom_handler_stream()
def test_azure_completion_stream(): def test_azure_completion_stream():
# [PROD Test] - Do not DELETE # [PROD Test] - Do not DELETE
# test if completion() + sync custom logger get the same complete stream response # test if completion() + sync custom logger get the same complete stream response
try: try:
# checks if the model response available in the async + stream callbacks is equal to the received response # checks if the model response available in the async + stream callbacks is equal to the received response
@ -194,17 +217,17 @@ def test_azure_completion_stream():
complete_streaming_response = "" complete_streaming_response = ""
response = litellm.completion( response = litellm.completion(
model="azure/chatgpt-v-2", model="azure/chatgpt-v-2", messages=messages, stream=True
messages=messages,
stream=True
) )
for chunk in response: for chunk in response:
complete_streaming_response += chunk["choices"][0]["delta"]["content"] or "" complete_streaming_response += chunk["choices"][0]["delta"]["content"] or ""
print(complete_streaming_response) print(complete_streaming_response)
time.sleep(0.5) # wait 1/2 second before checking callbacks time.sleep(0.5) # wait 1/2 second before checking callbacks
response_in_success_handler = customHandler2.sync_stream_collected_response response_in_success_handler = customHandler2.sync_stream_collected_response
response_in_success_handler = response_in_success_handler["choices"][0]["message"]["content"] response_in_success_handler = response_in_success_handler["choices"][0][
"message"
]["content"]
print("\n\n") print("\n\n")
print("response_in_success_handler: ", response_in_success_handler) print("response_in_success_handler: ", response_in_success_handler)
print("complete_streaming_response: ", complete_streaming_response) print("complete_streaming_response: ", complete_streaming_response)
@ -212,24 +235,32 @@ def test_azure_completion_stream():
except Exception as e: except Exception as e:
pytest.fail(f"Error occurred: {e}") pytest.fail(f"Error occurred: {e}")
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_async_custom_handler_completion(): async def test_async_custom_handler_completion():
try: try:
customHandler_success = MyCustomHandler() customHandler_success = MyCustomHandler()
customHandler_failure = MyCustomHandler() customHandler_failure = MyCustomHandler()
# success # success
assert customHandler_success.async_success == False assert customHandler_success.async_success == False
litellm.callbacks = [customHandler_success] litellm.callbacks = [customHandler_success]
response = await litellm.acompletion( response = await litellm.acompletion(
model="gpt-3.5-turbo", model="gpt-3.5-turbo",
messages=[{ messages=[
{
"role": "user", "role": "user",
"content": "hello from litellm test", "content": "hello from litellm test",
}] }
) ],
)
await asyncio.sleep(1) await asyncio.sleep(1)
assert customHandler_success.async_success == True, "async success is not set to True even after success" assert (
assert customHandler_success.async_completion_kwargs.get("model") == "gpt-3.5-turbo" customHandler_success.async_success == True
), "async success is not set to True even after success"
assert (
customHandler_success.async_completion_kwargs.get("model")
== "gpt-3.5-turbo"
)
# failure # failure
litellm.callbacks = [customHandler_failure] litellm.callbacks = [customHandler_failure]
messages = [ messages = [
@ -240,80 +271,119 @@ async def test_async_custom_handler_completion():
}, },
] ]
assert customHandler_failure.async_failure == False assert customHandler_failure.async_failure == False
try: try:
response = await litellm.acompletion( response = await litellm.acompletion(
model="gpt-3.5-turbo", model="gpt-3.5-turbo",
messages=messages, messages=messages,
api_key="my-bad-key", api_key="my-bad-key",
) )
except: except:
pass pass
assert customHandler_failure.async_failure == True, "async failure is not set to True even after failure" assert (
assert customHandler_failure.async_completion_kwargs_fail.get("model") == "gpt-3.5-turbo" customHandler_failure.async_failure == True
assert len(str(customHandler_failure.async_completion_kwargs_fail.get("exception"))) > 10 # expect APIError("OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"), 'traceback_exception': 'Traceback (most recent call last):\n File "/Users/ishaanjaffer/Github/litellm/litellm/llms/openai.py", line 269, in acompletion\n response = await openai_aclient.chat.completions.create(**data)\n File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/openai/resources/chat/completions.py", line 119 ), "async failure is not set to True even after failure"
assert (
customHandler_failure.async_completion_kwargs_fail.get("model")
== "gpt-3.5-turbo"
)
assert (
len(
str(customHandler_failure.async_completion_kwargs_fail.get("exception"))
)
> 10
) # expect APIError("OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"), 'traceback_exception': 'Traceback (most recent call last):\n File "/Users/ishaanjaffer/Github/litellm/litellm/llms/openai.py", line 269, in acompletion\n response = await openai_aclient.chat.completions.create(**data)\n File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/openai/resources/chat/completions.py", line 119
litellm.callbacks = [] litellm.callbacks = []
print("Passed setting async failure") print("Passed setting async failure")
except Exception as e: except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}") pytest.fail(f"An exception occurred - {str(e)}")
# asyncio.run(test_async_custom_handler_completion()) # asyncio.run(test_async_custom_handler_completion())
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_async_custom_handler_embedding(): async def test_async_custom_handler_embedding():
try: try:
customHandler_embedding = MyCustomHandler() customHandler_embedding = MyCustomHandler()
litellm.callbacks = [customHandler_embedding] litellm.callbacks = [customHandler_embedding]
# success # success
assert customHandler_embedding.async_success_embedding == False assert customHandler_embedding.async_success_embedding == False
response = await litellm.aembedding( response = await litellm.aembedding(
model="text-embedding-ada-002", model="text-embedding-ada-002",
input = ["hello world"], input=["hello world"],
) )
await asyncio.sleep(1) await asyncio.sleep(1)
assert customHandler_embedding.async_success_embedding == True, "async_success_embedding is not set to True even after success" assert (
assert customHandler_embedding.async_embedding_kwargs.get("model") == "text-embedding-ada-002" customHandler_embedding.async_success_embedding == True
assert customHandler_embedding.async_embedding_response["usage"]["prompt_tokens"] ==2 ), "async_success_embedding is not set to True even after success"
assert (
customHandler_embedding.async_embedding_kwargs.get("model")
== "text-embedding-ada-002"
)
assert (
customHandler_embedding.async_embedding_response["usage"]["prompt_tokens"]
== 2
)
print("Passed setting async success: Embedding") print("Passed setting async success: Embedding")
# failure # failure
assert customHandler_embedding.async_failure_embedding == False assert customHandler_embedding.async_failure_embedding == False
try: try:
response = await litellm.aembedding( response = await litellm.aembedding(
model="text-embedding-ada-002", model="text-embedding-ada-002",
input = ["hello world"], input=["hello world"],
api_key="my-bad-key", api_key="my-bad-key",
) )
except: except:
pass pass
assert customHandler_embedding.async_failure_embedding == True, "async failure embedding is not set to True even after failure" assert (
assert customHandler_embedding.async_embedding_kwargs_fail.get("model") == "text-embedding-ada-002" customHandler_embedding.async_failure_embedding == True
assert len(str(customHandler_embedding.async_embedding_kwargs_fail.get("exception"))) > 10 # exppect APIError("OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"), 'traceback_exception': 'Traceback (most recent call last):\n File "/Users/ishaanjaffer/Github/litellm/litellm/llms/openai.py", line 269, in acompletion\n response = await openai_aclient.chat.completions.create(**data)\n File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/openai/resources/chat/completions.py", line 119 ), "async failure embedding is not set to True even after failure"
assert (
customHandler_embedding.async_embedding_kwargs_fail.get("model")
== "text-embedding-ada-002"
)
assert (
len(
str(
customHandler_embedding.async_embedding_kwargs_fail.get("exception")
)
)
> 10
) # exppect APIError("OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"), 'traceback_exception': 'Traceback (most recent call last):\n File "/Users/ishaanjaffer/Github/litellm/litellm/llms/openai.py", line 269, in acompletion\n response = await openai_aclient.chat.completions.create(**data)\n File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/openai/resources/chat/completions.py", line 119
except Exception as e: except Exception as e:
pytest.fail(f"An exception occurred - {str(e)}") pytest.fail(f"An exception occurred - {str(e)}")
# asyncio.run(test_async_custom_handler_embedding()) # asyncio.run(test_async_custom_handler_embedding())
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_async_custom_handler_embedding_optional_param(): async def test_async_custom_handler_embedding_optional_param():
""" """
Tests if the openai optional params for embedding - user + encoding_format, Tests if the openai optional params for embedding - user + encoding_format,
are logged are logged
""" """
customHandler_optional_params = MyCustomHandler() customHandler_optional_params = MyCustomHandler()
litellm.callbacks = [customHandler_optional_params] litellm.callbacks = [customHandler_optional_params]
response = await litellm.aembedding( response = await litellm.aembedding(
model="azure/azure-embedding-model", model="azure/azure-embedding-model", input=["hello world"], user="John"
input = ["hello world"], )
user = "John" await asyncio.sleep(1) # success callback is async
)
await asyncio.sleep(1) # success callback is async
assert customHandler_optional_params.user == "John" assert customHandler_optional_params.user == "John"
assert customHandler_optional_params.user == customHandler_optional_params.data_sent_to_api["user"] assert (
customHandler_optional_params.user
== customHandler_optional_params.data_sent_to_api["user"]
)
# asyncio.run(test_async_custom_handler_embedding_optional_param()) # asyncio.run(test_async_custom_handler_embedding_optional_param())
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_async_custom_handler_embedding_optional_param_bedrock(): async def test_async_custom_handler_embedding_optional_param_bedrock():
""" """
Tests if the openai optional params for embedding - user + encoding_format, Tests if the openai optional params for embedding - user + encoding_format,
are logged are logged
but makes sure these are not sent to the non-openai/azure endpoint (raises errors). but makes sure these are not sent to the non-openai/azure endpoint (raises errors).
@ -323,42 +393,68 @@ async def test_async_custom_handler_embedding_optional_param_bedrock():
customHandler_optional_params = MyCustomHandler() customHandler_optional_params = MyCustomHandler()
litellm.callbacks = [customHandler_optional_params] litellm.callbacks = [customHandler_optional_params]
response = await litellm.aembedding( response = await litellm.aembedding(
model="bedrock/amazon.titan-embed-text-v1", model="bedrock/amazon.titan-embed-text-v1", input=["hello world"], user="John"
input = ["hello world"], )
user = "John" await asyncio.sleep(1) # success callback is async
)
await asyncio.sleep(1) # success callback is async
assert customHandler_optional_params.user == "John" assert customHandler_optional_params.user == "John"
assert "user" not in customHandler_optional_params.data_sent_to_api assert "user" not in customHandler_optional_params.data_sent_to_api
def test_redis_cache_completion_stream(): def test_redis_cache_completion_stream():
from litellm import Cache from litellm import Cache
# Important Test - This tests if we can add to streaming cache, when custom callbacks are set
# Important Test - This tests if we can add to streaming cache, when custom callbacks are set
import random import random
try: try:
print("\nrunning test_redis_cache_completion_stream") print("\nrunning test_redis_cache_completion_stream")
litellm.set_verbose = True litellm.set_verbose = True
random_number = random.randint(1, 100000) # add a random number to ensure it's always adding / reading from cache random_number = random.randint(
messages = [{"role": "user", "content": f"write a one sentence poem about: {random_number}"}] 1, 100000
litellm.cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD']) ) # add a random number to ensure it's always adding / reading from cache
messages = [
{
"role": "user",
"content": f"write a one sentence poem about: {random_number}",
}
]
litellm.cache = Cache(
type="redis",
host=os.environ["REDIS_HOST"],
port=os.environ["REDIS_PORT"],
password=os.environ["REDIS_PASSWORD"],
)
print("test for caching, streaming + completion") print("test for caching, streaming + completion")
response1 = completion(model="gpt-3.5-turbo", messages=messages, max_tokens=40, temperature=0.2, stream=True) response1 = completion(
model="gpt-3.5-turbo",
messages=messages,
max_tokens=40,
temperature=0.2,
stream=True,
)
response_1_content = "" response_1_content = ""
for chunk in response1: for chunk in response1:
print(chunk) print(chunk)
response_1_content += chunk.choices[0].delta.content or "" response_1_content += chunk.choices[0].delta.content or ""
print(response_1_content) print(response_1_content)
time.sleep(0.1) # sleep for 0.1 seconds allow set cache to occur time.sleep(0.1) # sleep for 0.1 seconds allow set cache to occur
response2 = completion(model="gpt-3.5-turbo", messages=messages, max_tokens=40, temperature=0.2, stream=True) response2 = completion(
model="gpt-3.5-turbo",
messages=messages,
max_tokens=40,
temperature=0.2,
stream=True,
)
response_2_content = "" response_2_content = ""
for chunk in response2: for chunk in response2:
print(chunk) print(chunk)
response_2_content += chunk.choices[0].delta.content or "" response_2_content += chunk.choices[0].delta.content or ""
print("\nresponse 1", response_1_content) print("\nresponse 1", response_1_content)
print("\nresponse 2", response_2_content) print("\nresponse 2", response_2_content)
assert response_1_content == response_2_content, f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}" assert (
response_1_content == response_2_content
), f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}"
litellm.success_callback = [] litellm.success_callback = []
litellm._async_success_callback = [] litellm._async_success_callback = []
litellm.cache = None litellm.cache = None
@ -366,4 +462,6 @@ def test_redis_cache_completion_stream():
print(e) print(e)
litellm.success_callback = [] litellm.success_callback = []
raise e raise e
# test_redis_cache_completion_stream()
# test_redis_cache_completion_stream()