fix(utils): adds complete streaming response to success handler

2023-10-07 15:37:31 -07:00 · 2023-10-07 15:37:31 -07:00 · 9cda24e1b2
commit 9cda24e1b2
parent f941975a78
7 changed files with 161 additions and 97 deletions
--- a/litellm/pycache/init.cpython-311.pyc
+++ b/litellm/pycache/init.cpython-311.pyc
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/main.py
+++ b/litellm/main.py
@ -1478,12 +1478,13 @@ def config_completion(**kwargs):
        )
 def stream_chunk_builder(chunks: list):
    print(f"chunk 0: {chunks[0]}")
    id = chunks[0]["id"]
    object = chunks[0]["object"]
    created = chunks[0]["created"]
    model = chunks[0]["model"]
    role = chunks[0]["choices"][0]["delta"]["role"]
-    finnish_reason = chunks[-1]["choices"][0]["finish_reason"]
+    finish_reason = chunks[-1]["choices"][0]["finish_reason"]
    # Initialize the response dictionary
    response = {
@ -1498,7 +1499,7 @@ def stream_chunk_builder(chunks: list):
                    "role": role,
                    "content": ""
                },
-                "finish_reason": finnish_reason,
+                "finish_reason": finish_reason,
            }
        ],
        # "usage": {
--- a/litellm/tests/test_logging.py
+++ b/litellm/tests/test_logging.py
@ -41,118 +41,163 @@ messages = [{"content": user_message, "role": "user"}]
 # 1. On Call Success
 # normal completion 
 ## test on openai completion call
-try:
+def test_logging_success_completion():
-    # Redirect stdout
+    global score
-    old_stdout = sys.stdout
+    try:
-    sys.stdout = new_stdout = io.StringIO()
+        # Redirect stdout
        old_stdout = sys.stdout
        sys.stdout = new_stdout = io.StringIO()
-    response = completion(model="gpt-3.5-turbo", messages=messages)
+        response = completion(model="gpt-3.5-turbo", messages=messages)
-    # Restore stdout
+        # Restore stdout
-    sys.stdout = old_stdout
+        sys.stdout = old_stdout
-    output = new_stdout.getvalue().strip()
+        output = new_stdout.getvalue().strip()
-    if "Logging Details Pre-API Call" not in output:
+        if "Logging Details Pre-API Call" not in output:
-        raise Exception("Required log message not found!")
+            raise Exception("Required log message not found!")
-    elif "Logging Details Post-API Call" not in output:
+        elif "Logging Details Post-API Call" not in output:
-        raise Exception("Required log message not found!")
+            raise Exception("Required log message not found!")
-    elif "Logging Details LiteLLM-Success Call" not in output:
+        elif "Logging Details LiteLLM-Success Call" not in output:
-        raise Exception("Required log message not found!")
+            raise Exception("Required log message not found!")
-    score += 1
+        score += 1
-except Exception as e:
+    except Exception as e:
-    pytest.fail(f"Error occurred: {e}")
+        pytest.fail(f"Error occurred: {e}")
-    pass
+        pass
 ## test on non-openai completion call
-try:
+def test_logging_success_completion_non_openai():
-    # Redirect stdout
+    global score
-    old_stdout = sys.stdout
+    try:
-    sys.stdout = new_stdout = io.StringIO()
+        # Redirect stdout
        old_stdout = sys.stdout
        sys.stdout = new_stdout = io.StringIO()
-    response = completion(model="claude-instant-1", messages=messages)
+        response = completion(model="claude-instant-1", messages=messages)
-    
+        
-    # Restore stdout
+        # Restore stdout
-    sys.stdout = old_stdout
+        sys.stdout = old_stdout
-    output = new_stdout.getvalue().strip()
+        output = new_stdout.getvalue().strip()
-    if "Logging Details Pre-API Call" not in output:
+        if "Logging Details Pre-API Call" not in output:
-        raise Exception("Required log message not found!")
+            raise Exception("Required log message not found!")
-    elif "Logging Details Post-API Call" not in output:
+        elif "Logging Details Post-API Call" not in output:
-        raise Exception("Required log message not found!")
+            raise Exception("Required log message not found!")
-    elif "Logging Details LiteLLM-Success Call" not in output:
+        elif "Logging Details LiteLLM-Success Call" not in output:
-        raise Exception("Required log message not found!")
+            raise Exception("Required log message not found!")
-    score += 1
+        score += 1
-except Exception as e:
+    except Exception as e:
-    pytest.fail(f"Error occurred: {e}")
+        pytest.fail(f"Error occurred: {e}")
-    pass
+        pass
 # streaming completion
 ## test on openai completion call
-try:
+def test_logging_success_streaming_openai():
-    # Redirect stdout
+    global score
-    old_stdout = sys.stdout
+    try:
-    sys.stdout = new_stdout = io.StringIO()
+        # litellm.set_verbose = False
        def custom_callback(
            kwargs,                 # kwargs to completion
            completion_response,    # response from completion
            start_time, end_time    # start/end time
        ):
            if "complete_streaming_response" in kwargs: 
                print(f"Complete Streaming Response: {kwargs['complete_streaming_response']}")
        # Assign the custom callback function
        litellm.success_callback = [custom_callback]
-    response = completion(model="gpt-3.5-turbo", messages=messages)
+        # Redirect stdout
        old_stdout = sys.stdout
        sys.stdout = new_stdout = io.StringIO()
-    # Restore stdout
+        response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
-    sys.stdout = old_stdout
+        for chunk in response: 
-    output = new_stdout.getvalue().strip()
+            pass
-    if "Logging Details Pre-API Call" not in output:
+        # Restore stdout
-        raise Exception("Required log message not found!")
+        sys.stdout = old_stdout
-    elif "Logging Details Post-API Call" not in output:
+        output = new_stdout.getvalue().strip()
-        raise Exception("Required log message not found!")
+
-    elif "Logging Details LiteLLM-Success Call" not in output:
+        if "Logging Details Pre-API Call" not in output:
-        raise Exception("Required log message not found!")
+            raise Exception("Required log message not found!")
-    score += 1
+        elif "Logging Details Post-API Call" not in output:
-except Exception as e:
+            raise Exception("Required log message not found!")
-    pytest.fail(f"Error occurred: {e}")
+        elif "Logging Details LiteLLM-Success Call" not in output:
-    pass
+            raise Exception("Required log message not found!")
        elif "Complete Streaming Response:" not in output:
            raise Exception("Required log message not found!")
        score += 1
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
        pass
 # test_logging_success_streaming_openai() 
 ## test on non-openai completion call
-try:
+def test_logging_success_streaming_non_openai():
-    # Redirect stdout
+    global score
-    old_stdout = sys.stdout
+    try:
-    sys.stdout = new_stdout = io.StringIO()
+        # litellm.set_verbose = False
        def custom_callback(
            kwargs,                 # kwargs to completion
            completion_response,    # response from completion
            start_time, end_time    # start/end time
        ):
            # print(f"streaming response: {completion_response}")
            if "complete_streaming_response" in kwargs: 
                print(f"Complete Streaming Response: {kwargs['complete_streaming_response']}")
        # Assign the custom callback function
        litellm.success_callback = [custom_callback]
-    response = completion(model="claude-instant-1", messages=messages)
+        # Redirect stdout
-    
+        old_stdout = sys.stdout
-    # Restore stdout
+        sys.stdout = new_stdout = io.StringIO()
    sys.stdout = old_stdout
    output = new_stdout.getvalue().strip()
-    if "Logging Details Pre-API Call" not in output:
+        response = completion(model="claude-instant-1", messages=messages, stream=True)
-        raise Exception("Required log message not found!")
+        for idx, chunk in enumerate(response): 
-    elif "Logging Details Post-API Call" not in output:
+            pass
-        raise Exception("Required log message not found!")
+        
-    elif "Logging Details LiteLLM-Success Call" not in output:
+        # Restore stdout
-        raise Exception("Required log message not found!")
+        sys.stdout = old_stdout
-    score += 1
+        output = new_stdout.getvalue().strip()
 except Exception as e:
    pytest.fail(f"Error occurred: {e}")
    pass
        if "Logging Details Pre-API Call" not in output:
            raise Exception("Required log message not found!")
        elif "Logging Details Post-API Call" not in output:
            raise Exception("Required log message not found!")
        elif "Logging Details LiteLLM-Success Call" not in output:
            raise Exception("Required log message not found!")
        elif "Complete Streaming Response:" not in output:
            raise Exception("Required log message not found!")
        score += 1
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
        pass
 test_logging_success_streaming_non_openai() 
 # embedding
-try:
+def test_logging_success_embedding_openai():
-     # Redirect stdout
+    try:
-    old_stdout = sys.stdout
+        # Redirect stdout
-    sys.stdout = new_stdout = io.StringIO()
+        old_stdout = sys.stdout
        sys.stdout = new_stdout = io.StringIO()
-    response = embedding(model="text-embedding-ada-002", input=["good morning from litellm"])
+        response = embedding(model="text-embedding-ada-002", input=["good morning from litellm"])
-    # Restore stdout
+        # Restore stdout
-    sys.stdout = old_stdout
+        sys.stdout = old_stdout
-    output = new_stdout.getvalue().strip()
+        output = new_stdout.getvalue().strip()
-    if "Logging Details Pre-API Call" not in output:
+        if "Logging Details Pre-API Call" not in output:
-        raise Exception("Required log message not found!")
+            raise Exception("Required log message not found!")
-    elif "Logging Details Post-API Call" not in output:
+        elif "Logging Details Post-API Call" not in output:
-        raise Exception("Required log message not found!")
+            raise Exception("Required log message not found!")
-    elif "Logging Details LiteLLM-Success Call" not in output:
+        elif "Logging Details LiteLLM-Success Call" not in output:
-        raise Exception("Required log message not found!")
+            raise Exception("Required log message not found!")
-except Exception as e:
+    except Exception as e:
-    pytest.fail(f"Error occurred: {e}")
+        pytest.fail(f"Error occurred: {e}")
 # ## 2. On LiteLLM Call failure
 # ## TEST BAD KEY
--- a/litellm/tests/test_stream_chunk_builder.py
+++ b/litellm/tests/test_stream_chunk_builder.py
@ -54,4 +54,5 @@ def test_stream_chunk_builder():
        finnish_reason = choices["finish_reason"]
    except:
        raise Exception("stream_chunk_builder failed to rebuild response")
-test_stream_chunk_builder()
+# test_stream_chunk_builder()
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -228,6 +228,7 @@ class Logging:
        self.call_type = call_type
        self.litellm_call_id = litellm_call_id
        self.function_id = function_id
        self.streaming_chunks = [] # for generating complete stream response
    def update_environment_variables(self, model, user, optional_params, litellm_params):
        self.optional_params = optional_params
@ -394,7 +395,7 @@ class Logging:
            pass
-    def success_handler(self, result, start_time=None, end_time=None):
+    def success_handler(self, result=None, start_time=None, end_time=None, **kwargs):
        print_verbose(
                f"Logging Details LiteLLM-Success Call"
            )
@ -403,6 +404,20 @@ class Logging:
                start_time = self.start_time
            if end_time is None:
                end_time = datetime.datetime.now()
            complete_streaming_response = None
            ## BUILD COMPLETE STREAMED RESPONSE
            if self.stream: 
                if result.choices[0].finish_reason: # if it's the last chunk 
                    self.streaming_chunks.append(result)
                    complete_streaming_response = litellm.stream_chunk_builder(self.streaming_chunks)
                else:
                    self.streaming_chunks.append(result)
            if complete_streaming_response: 
                self.model_call_details["complete_streaming_response"] = complete_streaming_response
            print_verbose(f"success callbacks: {litellm.success_callback}")
            if litellm.max_budget and self.stream:
@ -3328,20 +3343,22 @@ class CustomStreamWrapper:
                    chunk = next(self.completion_stream)
                    model_response = chunk
                    # LOGGING
-                    threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
+                    threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
                    return model_response
                # LOGGING
                threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
                model_response.model = self.model
                if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string
                    if self.sent_first_chunk == False:
                        completion_obj["role"] = "assistant"
                        self.sent_first_chunk = True
                    model_response.choices[0].delta = Delta(**completion_obj)
                    # LOGGING
                    threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
                    return model_response
                elif model_response.choices[0].finish_reason:
                    model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai
                    # LOGGING
                    threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
                    return model_response
        except StopIteration:
            raise StopIteration