diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc index bff4121c2c..56dc8017c9 100644 Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc index 61460c06ad..5cde9036ab 100644 Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 70960990d9..fb4eca6d43 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/main.py b/litellm/main.py index 759d45c17a..7d6844ddac 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1478,12 +1478,13 @@ def config_completion(**kwargs): ) def stream_chunk_builder(chunks: list): + print(f"chunk 0: {chunks[0]}") id = chunks[0]["id"] object = chunks[0]["object"] created = chunks[0]["created"] model = chunks[0]["model"] role = chunks[0]["choices"][0]["delta"]["role"] - finnish_reason = chunks[-1]["choices"][0]["finish_reason"] + finish_reason = chunks[-1]["choices"][0]["finish_reason"] # Initialize the response dictionary response = { @@ -1498,7 +1499,7 @@ def stream_chunk_builder(chunks: list): "role": role, "content": "" }, - "finish_reason": finnish_reason, + "finish_reason": finish_reason, } ], # "usage": { diff --git a/litellm/tests/test_logging.py b/litellm/tests/test_logging.py index 5cbbb22213..e68c90b357 100644 --- a/litellm/tests/test_logging.py +++ b/litellm/tests/test_logging.py @@ -41,118 +41,163 @@ messages = [{"content": user_message, "role": "user"}] # 1. On Call Success # normal completion ## test on openai completion call -try: - # Redirect stdout - old_stdout = sys.stdout - sys.stdout = new_stdout = io.StringIO() +def test_logging_success_completion(): + global score + try: + # Redirect stdout + old_stdout = sys.stdout + sys.stdout = new_stdout = io.StringIO() - response = completion(model="gpt-3.5-turbo", messages=messages) - # Restore stdout - sys.stdout = old_stdout - output = new_stdout.getvalue().strip() + response = completion(model="gpt-3.5-turbo", messages=messages) + # Restore stdout + sys.stdout = old_stdout + output = new_stdout.getvalue().strip() - if "Logging Details Pre-API Call" not in output: - raise Exception("Required log message not found!") - elif "Logging Details Post-API Call" not in output: - raise Exception("Required log message not found!") - elif "Logging Details LiteLLM-Success Call" not in output: - raise Exception("Required log message not found!") - score += 1 -except Exception as e: - pytest.fail(f"Error occurred: {e}") - pass + if "Logging Details Pre-API Call" not in output: + raise Exception("Required log message not found!") + elif "Logging Details Post-API Call" not in output: + raise Exception("Required log message not found!") + elif "Logging Details LiteLLM-Success Call" not in output: + raise Exception("Required log message not found!") + score += 1 + except Exception as e: + pytest.fail(f"Error occurred: {e}") + pass ## test on non-openai completion call -try: - # Redirect stdout - old_stdout = sys.stdout - sys.stdout = new_stdout = io.StringIO() +def test_logging_success_completion_non_openai(): + global score + try: + # Redirect stdout + old_stdout = sys.stdout + sys.stdout = new_stdout = io.StringIO() - response = completion(model="claude-instant-1", messages=messages) - - # Restore stdout - sys.stdout = old_stdout - output = new_stdout.getvalue().strip() + response = completion(model="claude-instant-1", messages=messages) + + # Restore stdout + sys.stdout = old_stdout + output = new_stdout.getvalue().strip() - if "Logging Details Pre-API Call" not in output: - raise Exception("Required log message not found!") - elif "Logging Details Post-API Call" not in output: - raise Exception("Required log message not found!") - elif "Logging Details LiteLLM-Success Call" not in output: - raise Exception("Required log message not found!") - score += 1 -except Exception as e: - pytest.fail(f"Error occurred: {e}") - pass + if "Logging Details Pre-API Call" not in output: + raise Exception("Required log message not found!") + elif "Logging Details Post-API Call" not in output: + raise Exception("Required log message not found!") + elif "Logging Details LiteLLM-Success Call" not in output: + raise Exception("Required log message not found!") + score += 1 + except Exception as e: + pytest.fail(f"Error occurred: {e}") + pass # streaming completion ## test on openai completion call -try: - # Redirect stdout - old_stdout = sys.stdout - sys.stdout = new_stdout = io.StringIO() +def test_logging_success_streaming_openai(): + global score + try: + # litellm.set_verbose = False + def custom_callback( + kwargs, # kwargs to completion + completion_response, # response from completion + start_time, end_time # start/end time + ): + if "complete_streaming_response" in kwargs: + print(f"Complete Streaming Response: {kwargs['complete_streaming_response']}") + + # Assign the custom callback function + litellm.success_callback = [custom_callback] - response = completion(model="gpt-3.5-turbo", messages=messages) + # Redirect stdout + old_stdout = sys.stdout + sys.stdout = new_stdout = io.StringIO() - # Restore stdout - sys.stdout = old_stdout - output = new_stdout.getvalue().strip() + response = completion(model="gpt-3.5-turbo", messages=messages, stream=True) + for chunk in response: + pass - if "Logging Details Pre-API Call" not in output: - raise Exception("Required log message not found!") - elif "Logging Details Post-API Call" not in output: - raise Exception("Required log message not found!") - elif "Logging Details LiteLLM-Success Call" not in output: - raise Exception("Required log message not found!") - score += 1 -except Exception as e: - pytest.fail(f"Error occurred: {e}") - pass + # Restore stdout + sys.stdout = old_stdout + output = new_stdout.getvalue().strip() + + if "Logging Details Pre-API Call" not in output: + raise Exception("Required log message not found!") + elif "Logging Details Post-API Call" not in output: + raise Exception("Required log message not found!") + elif "Logging Details LiteLLM-Success Call" not in output: + raise Exception("Required log message not found!") + elif "Complete Streaming Response:" not in output: + raise Exception("Required log message not found!") + score += 1 + except Exception as e: + pytest.fail(f"Error occurred: {e}") + pass + +# test_logging_success_streaming_openai() ## test on non-openai completion call -try: - # Redirect stdout - old_stdout = sys.stdout - sys.stdout = new_stdout = io.StringIO() +def test_logging_success_streaming_non_openai(): + global score + try: + # litellm.set_verbose = False + def custom_callback( + kwargs, # kwargs to completion + completion_response, # response from completion + start_time, end_time # start/end time + ): + # print(f"streaming response: {completion_response}") + if "complete_streaming_response" in kwargs: + print(f"Complete Streaming Response: {kwargs['complete_streaming_response']}") + + # Assign the custom callback function + litellm.success_callback = [custom_callback] - response = completion(model="claude-instant-1", messages=messages) - - # Restore stdout - sys.stdout = old_stdout - output = new_stdout.getvalue().strip() + # Redirect stdout + old_stdout = sys.stdout + sys.stdout = new_stdout = io.StringIO() - if "Logging Details Pre-API Call" not in output: - raise Exception("Required log message not found!") - elif "Logging Details Post-API Call" not in output: - raise Exception("Required log message not found!") - elif "Logging Details LiteLLM-Success Call" not in output: - raise Exception("Required log message not found!") - score += 1 -except Exception as e: - pytest.fail(f"Error occurred: {e}") - pass + response = completion(model="claude-instant-1", messages=messages, stream=True) + for idx, chunk in enumerate(response): + pass + + # Restore stdout + sys.stdout = old_stdout + output = new_stdout.getvalue().strip() + if "Logging Details Pre-API Call" not in output: + raise Exception("Required log message not found!") + elif "Logging Details Post-API Call" not in output: + raise Exception("Required log message not found!") + elif "Logging Details LiteLLM-Success Call" not in output: + raise Exception("Required log message not found!") + elif "Complete Streaming Response:" not in output: + raise Exception("Required log message not found!") + score += 1 + except Exception as e: + pytest.fail(f"Error occurred: {e}") + pass + +test_logging_success_streaming_non_openai() # embedding -try: - # Redirect stdout - old_stdout = sys.stdout - sys.stdout = new_stdout = io.StringIO() +def test_logging_success_embedding_openai(): + try: + # Redirect stdout + old_stdout = sys.stdout + sys.stdout = new_stdout = io.StringIO() - response = embedding(model="text-embedding-ada-002", input=["good morning from litellm"]) + response = embedding(model="text-embedding-ada-002", input=["good morning from litellm"]) - # Restore stdout - sys.stdout = old_stdout - output = new_stdout.getvalue().strip() + # Restore stdout + sys.stdout = old_stdout + output = new_stdout.getvalue().strip() - if "Logging Details Pre-API Call" not in output: - raise Exception("Required log message not found!") - elif "Logging Details Post-API Call" not in output: - raise Exception("Required log message not found!") - elif "Logging Details LiteLLM-Success Call" not in output: - raise Exception("Required log message not found!") -except Exception as e: - pytest.fail(f"Error occurred: {e}") + if "Logging Details Pre-API Call" not in output: + raise Exception("Required log message not found!") + elif "Logging Details Post-API Call" not in output: + raise Exception("Required log message not found!") + elif "Logging Details LiteLLM-Success Call" not in output: + raise Exception("Required log message not found!") + except Exception as e: + pytest.fail(f"Error occurred: {e}") # ## 2. On LiteLLM Call failure # ## TEST BAD KEY diff --git a/litellm/tests/test_stream_chunk_builder.py b/litellm/tests/test_stream_chunk_builder.py index 465ee4a52f..a4527521f2 100644 --- a/litellm/tests/test_stream_chunk_builder.py +++ b/litellm/tests/test_stream_chunk_builder.py @@ -54,4 +54,5 @@ def test_stream_chunk_builder(): finnish_reason = choices["finish_reason"] except: raise Exception("stream_chunk_builder failed to rebuild response") -test_stream_chunk_builder() +# test_stream_chunk_builder() + diff --git a/litellm/utils.py b/litellm/utils.py index def722776b..9b0c6c7356 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -228,6 +228,7 @@ class Logging: self.call_type = call_type self.litellm_call_id = litellm_call_id self.function_id = function_id + self.streaming_chunks = [] # for generating complete stream response def update_environment_variables(self, model, user, optional_params, litellm_params): self.optional_params = optional_params @@ -394,7 +395,7 @@ class Logging: pass - def success_handler(self, result, start_time=None, end_time=None): + def success_handler(self, result=None, start_time=None, end_time=None, **kwargs): print_verbose( f"Logging Details LiteLLM-Success Call" ) @@ -403,6 +404,20 @@ class Logging: start_time = self.start_time if end_time is None: end_time = datetime.datetime.now() + + complete_streaming_response = None + + ## BUILD COMPLETE STREAMED RESPONSE + if self.stream: + if result.choices[0].finish_reason: # if it's the last chunk + self.streaming_chunks.append(result) + complete_streaming_response = litellm.stream_chunk_builder(self.streaming_chunks) + else: + self.streaming_chunks.append(result) + + if complete_streaming_response: + self.model_call_details["complete_streaming_response"] = complete_streaming_response + print_verbose(f"success callbacks: {litellm.success_callback}") if litellm.max_budget and self.stream: @@ -3328,20 +3343,22 @@ class CustomStreamWrapper: chunk = next(self.completion_stream) model_response = chunk # LOGGING - threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start() + threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start() return model_response - # LOGGING - threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start() model_response.model = self.model if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string if self.sent_first_chunk == False: completion_obj["role"] = "assistant" self.sent_first_chunk = True model_response.choices[0].delta = Delta(**completion_obj) + # LOGGING + threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start() return model_response elif model_response.choices[0].finish_reason: model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai + # LOGGING + threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start() return model_response except StopIteration: raise StopIteration