diff --git a/litellm/tests/test_custom_logger.py b/litellm/tests/test_custom_logger.py index de7dd67b4..565df5b25 100644 --- a/litellm/tests/test_custom_logger.py +++ b/litellm/tests/test_custom_logger.py @@ -1,56 +1,58 @@ ### What this tests #### import sys, os, time, inspect, asyncio, traceback import pytest -sys.path.insert(0, os.path.abspath('../..')) + +sys.path.insert(0, os.path.abspath("../..")) from litellm import completion, embedding import litellm from litellm.integrations.custom_logger import CustomLogger + class MyCustomHandler(CustomLogger): complete_streaming_response_in_callback = "" + def __init__(self): - self.success: bool = False # type: ignore - self.failure: bool = False # type: ignore - self.async_success: bool = False # type: ignore + self.success: bool = False # type: ignore + self.failure: bool = False # type: ignore + self.async_success: bool = False # type: ignore self.async_success_embedding: bool = False # type: ignore - self.async_failure: bool = False # type: ignore + self.async_failure: bool = False # type: ignore self.async_failure_embedding: bool = False # type: ignore - self.async_completion_kwargs = None # type: ignore - self.async_embedding_kwargs = None # type: ignore - self.async_embedding_response = None # type: ignore + self.async_completion_kwargs = None # type: ignore + self.async_embedding_kwargs = None # type: ignore + self.async_embedding_response = None # type: ignore - self.async_completion_kwargs_fail = None # type: ignore - self.async_embedding_kwargs_fail = None # type: ignore + self.async_completion_kwargs_fail = None # type: ignore + self.async_embedding_kwargs_fail = None # type: ignore - self.stream_collected_response = None # type: ignore - self.sync_stream_collected_response = None # type: ignore - self.user = None # type: ignore + self.stream_collected_response = None # type: ignore + self.sync_stream_collected_response = None # type: ignore + self.user = None # type: ignore self.data_sent_to_api: dict = {} - def log_pre_api_call(self, model, messages, kwargs): + def log_pre_api_call(self, model, messages, kwargs): print(f"Pre-API Call") self.data_sent_to_api = kwargs["additional_args"].get("complete_input_dict", {}) - - def log_post_api_call(self, kwargs, response_obj, start_time, end_time): + + def log_post_api_call(self, kwargs, response_obj, start_time, end_time): print(f"Post-API Call") - + def log_stream_event(self, kwargs, response_obj, start_time, end_time): print(f"On Stream") - - def log_success_event(self, kwargs, response_obj, start_time, end_time): + + def log_success_event(self, kwargs, response_obj, start_time, end_time): print(f"On Success") self.success = True if kwargs.get("stream") == True: self.sync_stream_collected_response = response_obj - - def log_failure_event(self, kwargs, response_obj, start_time, end_time): + def log_failure_event(self, kwargs, response_obj, start_time, end_time): print(f"On Failure") self.failure = True - async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): + async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): print(f"On Async success") print(f"received kwargs user: {kwargs['user']}") self.async_success = True @@ -62,24 +64,30 @@ class MyCustomHandler(CustomLogger): self.stream_collected_response = response_obj self.async_completion_kwargs = kwargs self.user = kwargs.get("user", None) - - async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): + + async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time): print(f"On Async Failure") self.async_failure = True if kwargs.get("model") == "text-embedding-ada-002": self.async_failure_embedding = True self.async_embedding_kwargs_fail = kwargs - + self.async_completion_kwargs_fail = kwargs + class TmpFunction: complete_streaming_response_in_callback = "" async_success: bool = False + async def async_test_logging_fn(self, kwargs, completion_obj, start_time, end_time): print(f"ON ASYNC LOGGING") self.async_success = True - print(f'kwargs.get("complete_streaming_response"): {kwargs.get("complete_streaming_response")}') - self.complete_streaming_response_in_callback = kwargs.get("complete_streaming_response") + print( + f'kwargs.get("complete_streaming_response"): {kwargs.get("complete_streaming_response")}' + ) + self.complete_streaming_response_in_callback = kwargs.get( + "complete_streaming_response" + ) def test_async_chat_openai_stream(): @@ -88,29 +96,39 @@ def test_async_chat_openai_stream(): # litellm.set_verbose = True litellm.success_callback = [tmp_function.async_test_logging_fn] complete_streaming_response = "" + async def call_gpt(): nonlocal complete_streaming_response - response = await litellm.acompletion(model="gpt-3.5-turbo", - messages=[{ - "role": "user", - "content": "Hi 👋 - i'm openai" - }], - stream=True) - async for chunk in response: - complete_streaming_response += chunk["choices"][0]["delta"]["content"] or "" + response = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hi 👋 - i'm openai"}], + stream=True, + ) + async for chunk in response: + complete_streaming_response += ( + chunk["choices"][0]["delta"]["content"] or "" + ) print(complete_streaming_response) + asyncio.run(call_gpt()) complete_streaming_response = complete_streaming_response.strip("'") - response1 = tmp_function.complete_streaming_response_in_callback["choices"][0]["message"]["content"] + response1 = tmp_function.complete_streaming_response_in_callback["choices"][0][ + "message" + ]["content"] response2 = complete_streaming_response # assert [ord(c) for c in response1] == [ord(c) for c in response2] + print(f"response1: {response1}") + print(f"response2: {response2}") assert response1 == response2 assert tmp_function.async_success == True except Exception as e: print(e) pytest.fail(f"An error occurred - {str(e)}") + + # test_async_chat_openai_stream() + def test_completion_azure_stream_moderation_failure(): try: customHandler = MyCustomHandler() @@ -122,11 +140,11 @@ def test_completion_azure_stream_moderation_failure(): "content": "how do i kill someone", }, ] - try: + try: response = completion( model="azure/chatgpt-v-2", messages=messages, stream=True ) - for chunk in response: + for chunk in response: print(f"chunk: {chunk}") continue except Exception as e: @@ -139,7 +157,7 @@ def test_completion_azure_stream_moderation_failure(): def test_async_custom_handler_stream(): try: - # [PROD Test] - Do not DELETE + # [PROD Test] - Do not DELETE # checks if the model response available in the async + stream callbacks is equal to the received response customHandler2 = MyCustomHandler() litellm.callbacks = [customHandler2] @@ -152,32 +170,37 @@ def test_async_custom_handler_stream(): }, ] complete_streaming_response = "" + async def test_1(): nonlocal complete_streaming_response response = await litellm.acompletion( - model="azure/chatgpt-v-2", - messages=messages, - stream=True + model="azure/chatgpt-v-2", messages=messages, stream=True ) - async for chunk in response: - complete_streaming_response += chunk["choices"][0]["delta"]["content"] or "" + async for chunk in response: + complete_streaming_response += ( + chunk["choices"][0]["delta"]["content"] or "" + ) print(complete_streaming_response) - + asyncio.run(test_1()) response_in_success_handler = customHandler2.stream_collected_response - response_in_success_handler = response_in_success_handler["choices"][0]["message"]["content"] + response_in_success_handler = response_in_success_handler["choices"][0][ + "message" + ]["content"] print("\n\n") print("response_in_success_handler: ", response_in_success_handler) print("complete_streaming_response: ", complete_streaming_response) assert response_in_success_handler == complete_streaming_response except Exception as e: pytest.fail(f"Error occurred: {e}") + + # test_async_custom_handler_stream() def test_azure_completion_stream(): - # [PROD Test] - Do not DELETE + # [PROD Test] - Do not DELETE # test if completion() + sync custom logger get the same complete stream response try: # checks if the model response available in the async + stream callbacks is equal to the received response @@ -194,17 +217,17 @@ def test_azure_completion_stream(): complete_streaming_response = "" response = litellm.completion( - model="azure/chatgpt-v-2", - messages=messages, - stream=True + model="azure/chatgpt-v-2", messages=messages, stream=True ) - for chunk in response: + for chunk in response: complete_streaming_response += chunk["choices"][0]["delta"]["content"] or "" print(complete_streaming_response) - - time.sleep(0.5) # wait 1/2 second before checking callbacks + + time.sleep(0.5) # wait 1/2 second before checking callbacks response_in_success_handler = customHandler2.sync_stream_collected_response - response_in_success_handler = response_in_success_handler["choices"][0]["message"]["content"] + response_in_success_handler = response_in_success_handler["choices"][0][ + "message" + ]["content"] print("\n\n") print("response_in_success_handler: ", response_in_success_handler) print("complete_streaming_response: ", complete_streaming_response) @@ -212,24 +235,32 @@ def test_azure_completion_stream(): except Exception as e: pytest.fail(f"Error occurred: {e}") + @pytest.mark.asyncio -async def test_async_custom_handler_completion(): - try: +async def test_async_custom_handler_completion(): + try: customHandler_success = MyCustomHandler() customHandler_failure = MyCustomHandler() # success assert customHandler_success.async_success == False litellm.callbacks = [customHandler_success] response = await litellm.acompletion( - model="gpt-3.5-turbo", - messages=[{ + model="gpt-3.5-turbo", + messages=[ + { "role": "user", "content": "hello from litellm test", - }] - ) + } + ], + ) await asyncio.sleep(1) - assert customHandler_success.async_success == True, "async success is not set to True even after success" - assert customHandler_success.async_completion_kwargs.get("model") == "gpt-3.5-turbo" + assert ( + customHandler_success.async_success == True + ), "async success is not set to True even after success" + assert ( + customHandler_success.async_completion_kwargs.get("model") + == "gpt-3.5-turbo" + ) # failure litellm.callbacks = [customHandler_failure] messages = [ @@ -240,80 +271,119 @@ async def test_async_custom_handler_completion(): }, ] - assert customHandler_failure.async_failure == False - try: + assert customHandler_failure.async_failure == False + try: response = await litellm.acompletion( - model="gpt-3.5-turbo", - messages=messages, - api_key="my-bad-key", - ) + model="gpt-3.5-turbo", + messages=messages, + api_key="my-bad-key", + ) except: pass - assert customHandler_failure.async_failure == True, "async failure is not set to True even after failure" - assert customHandler_failure.async_completion_kwargs_fail.get("model") == "gpt-3.5-turbo" - assert len(str(customHandler_failure.async_completion_kwargs_fail.get("exception"))) > 10 # expect APIError("OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"), 'traceback_exception': 'Traceback (most recent call last):\n File "/Users/ishaanjaffer/Github/litellm/litellm/llms/openai.py", line 269, in acompletion\n response = await openai_aclient.chat.completions.create(**data)\n File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/openai/resources/chat/completions.py", line 119 + assert ( + customHandler_failure.async_failure == True + ), "async failure is not set to True even after failure" + assert ( + customHandler_failure.async_completion_kwargs_fail.get("model") + == "gpt-3.5-turbo" + ) + assert ( + len( + str(customHandler_failure.async_completion_kwargs_fail.get("exception")) + ) + > 10 + ) # expect APIError("OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"), 'traceback_exception': 'Traceback (most recent call last):\n File "/Users/ishaanjaffer/Github/litellm/litellm/llms/openai.py", line 269, in acompletion\n response = await openai_aclient.chat.completions.create(**data)\n File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/openai/resources/chat/completions.py", line 119 litellm.callbacks = [] print("Passed setting async failure") except Exception as e: pytest.fail(f"An exception occurred - {str(e)}") + + # asyncio.run(test_async_custom_handler_completion()) + @pytest.mark.asyncio -async def test_async_custom_handler_embedding(): - try: +async def test_async_custom_handler_embedding(): + try: customHandler_embedding = MyCustomHandler() litellm.callbacks = [customHandler_embedding] # success assert customHandler_embedding.async_success_embedding == False response = await litellm.aembedding( - model="text-embedding-ada-002", - input = ["hello world"], - ) + model="text-embedding-ada-002", + input=["hello world"], + ) await asyncio.sleep(1) - assert customHandler_embedding.async_success_embedding == True, "async_success_embedding is not set to True even after success" - assert customHandler_embedding.async_embedding_kwargs.get("model") == "text-embedding-ada-002" - assert customHandler_embedding.async_embedding_response["usage"]["prompt_tokens"] ==2 + assert ( + customHandler_embedding.async_success_embedding == True + ), "async_success_embedding is not set to True even after success" + assert ( + customHandler_embedding.async_embedding_kwargs.get("model") + == "text-embedding-ada-002" + ) + assert ( + customHandler_embedding.async_embedding_response["usage"]["prompt_tokens"] + == 2 + ) print("Passed setting async success: Embedding") - # failure + # failure assert customHandler_embedding.async_failure_embedding == False - try: + try: response = await litellm.aembedding( - model="text-embedding-ada-002", - input = ["hello world"], - api_key="my-bad-key", - ) - except: + model="text-embedding-ada-002", + input=["hello world"], + api_key="my-bad-key", + ) + except: pass - assert customHandler_embedding.async_failure_embedding == True, "async failure embedding is not set to True even after failure" - assert customHandler_embedding.async_embedding_kwargs_fail.get("model") == "text-embedding-ada-002" - assert len(str(customHandler_embedding.async_embedding_kwargs_fail.get("exception"))) > 10 # exppect APIError("OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"), 'traceback_exception': 'Traceback (most recent call last):\n File "/Users/ishaanjaffer/Github/litellm/litellm/llms/openai.py", line 269, in acompletion\n response = await openai_aclient.chat.completions.create(**data)\n File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/openai/resources/chat/completions.py", line 119 + assert ( + customHandler_embedding.async_failure_embedding == True + ), "async failure embedding is not set to True even after failure" + assert ( + customHandler_embedding.async_embedding_kwargs_fail.get("model") + == "text-embedding-ada-002" + ) + assert ( + len( + str( + customHandler_embedding.async_embedding_kwargs_fail.get("exception") + ) + ) + > 10 + ) # exppect APIError("OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: test. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"), 'traceback_exception': 'Traceback (most recent call last):\n File "/Users/ishaanjaffer/Github/litellm/litellm/llms/openai.py", line 269, in acompletion\n response = await openai_aclient.chat.completions.create(**data)\n File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages/openai/resources/chat/completions.py", line 119 except Exception as e: pytest.fail(f"An exception occurred - {str(e)}") + + # asyncio.run(test_async_custom_handler_embedding()) + @pytest.mark.asyncio -async def test_async_custom_handler_embedding_optional_param(): +async def test_async_custom_handler_embedding_optional_param(): """ - Tests if the openai optional params for embedding - user + encoding_format, + Tests if the openai optional params for embedding - user + encoding_format, are logged """ customHandler_optional_params = MyCustomHandler() litellm.callbacks = [customHandler_optional_params] response = await litellm.aembedding( - model="azure/azure-embedding-model", - input = ["hello world"], - user = "John" - ) - await asyncio.sleep(1) # success callback is async + model="azure/azure-embedding-model", input=["hello world"], user="John" + ) + await asyncio.sleep(1) # success callback is async assert customHandler_optional_params.user == "John" - assert customHandler_optional_params.user == customHandler_optional_params.data_sent_to_api["user"] + assert ( + customHandler_optional_params.user + == customHandler_optional_params.data_sent_to_api["user"] + ) + # asyncio.run(test_async_custom_handler_embedding_optional_param()) + @pytest.mark.asyncio -async def test_async_custom_handler_embedding_optional_param_bedrock(): +async def test_async_custom_handler_embedding_optional_param_bedrock(): """ - Tests if the openai optional params for embedding - user + encoding_format, + Tests if the openai optional params for embedding - user + encoding_format, are logged but makes sure these are not sent to the non-openai/azure endpoint (raises errors). @@ -323,42 +393,68 @@ async def test_async_custom_handler_embedding_optional_param_bedrock(): customHandler_optional_params = MyCustomHandler() litellm.callbacks = [customHandler_optional_params] response = await litellm.aembedding( - model="bedrock/amazon.titan-embed-text-v1", - input = ["hello world"], - user = "John" - ) - await asyncio.sleep(1) # success callback is async + model="bedrock/amazon.titan-embed-text-v1", input=["hello world"], user="John" + ) + await asyncio.sleep(1) # success callback is async assert customHandler_optional_params.user == "John" assert "user" not in customHandler_optional_params.data_sent_to_api def test_redis_cache_completion_stream(): from litellm import Cache - # Important Test - This tests if we can add to streaming cache, when custom callbacks are set + + # Important Test - This tests if we can add to streaming cache, when custom callbacks are set import random + try: print("\nrunning test_redis_cache_completion_stream") litellm.set_verbose = True - random_number = random.randint(1, 100000) # add a random number to ensure it's always adding / reading from cache - messages = [{"role": "user", "content": f"write a one sentence poem about: {random_number}"}] - litellm.cache = Cache(type="redis", host=os.environ['REDIS_HOST'], port=os.environ['REDIS_PORT'], password=os.environ['REDIS_PASSWORD']) + random_number = random.randint( + 1, 100000 + ) # add a random number to ensure it's always adding / reading from cache + messages = [ + { + "role": "user", + "content": f"write a one sentence poem about: {random_number}", + } + ] + litellm.cache = Cache( + type="redis", + host=os.environ["REDIS_HOST"], + port=os.environ["REDIS_PORT"], + password=os.environ["REDIS_PASSWORD"], + ) print("test for caching, streaming + completion") - response1 = completion(model="gpt-3.5-turbo", messages=messages, max_tokens=40, temperature=0.2, stream=True) + response1 = completion( + model="gpt-3.5-turbo", + messages=messages, + max_tokens=40, + temperature=0.2, + stream=True, + ) response_1_content = "" for chunk in response1: print(chunk) response_1_content += chunk.choices[0].delta.content or "" print(response_1_content) - time.sleep(0.1) # sleep for 0.1 seconds allow set cache to occur - response2 = completion(model="gpt-3.5-turbo", messages=messages, max_tokens=40, temperature=0.2, stream=True) + time.sleep(0.1) # sleep for 0.1 seconds allow set cache to occur + response2 = completion( + model="gpt-3.5-turbo", + messages=messages, + max_tokens=40, + temperature=0.2, + stream=True, + ) response_2_content = "" for chunk in response2: print(chunk) response_2_content += chunk.choices[0].delta.content or "" print("\nresponse 1", response_1_content) print("\nresponse 2", response_2_content) - assert response_1_content == response_2_content, f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}" + assert ( + response_1_content == response_2_content + ), f"Response 1 != Response 2. Same params, Response 1{response_1_content} != Response 2{response_2_content}" litellm.success_callback = [] litellm._async_success_callback = [] litellm.cache = None @@ -366,4 +462,6 @@ def test_redis_cache_completion_stream(): print(e) litellm.success_callback = [] raise e -# test_redis_cache_completion_stream() \ No newline at end of file + + +# test_redis_cache_completion_stream()