fix(utils): adds complete streaming response to success handler

2023-10-07 15:37:31 -07:00 · 2023-10-07 15:37:31 -07:00 · 9cda24e1b2
commit 9cda24e1b2
parent f941975a78
7 changed files with 161 additions and 97 deletions
--- a/litellm/pycache/init.cpython-311.pyc
+++ b/litellm/pycache/init.cpython-311.pyc
--- a/litellm/pycache/main.cpython-311.pyc
+++ b/litellm/pycache/main.cpython-311.pyc
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/main.py
+++ b/litellm/main.py
@ -1478,12 +1478,13 @@ def config_completion(**kwargs):
        )

 def stream_chunk_builder(chunks: list):
+    print(f"chunk 0: {chunks[0]}")
    id = chunks[0]["id"]
    object = chunks[0]["object"]
    created = chunks[0]["created"]
    model = chunks[0]["model"]
    role = chunks[0]["choices"][0]["delta"]["role"]
-    finnish_reason = chunks[-1]["choices"][0]["finish_reason"]
+    finish_reason = chunks[-1]["choices"][0]["finish_reason"]
    
    # Initialize the response dictionary
    response = {
@ -1498,7 +1499,7 @@ def stream_chunk_builder(chunks: list):
                    "role": role,
                    "content": ""
                },
-                "finish_reason": finnish_reason,
+                "finish_reason": finish_reason,
            }
        ],
        # "usage": {
--- a/litellm/tests/test_logging.py
+++ b/litellm/tests/test_logging.py
@ -41,6 +41,8 @@ messages = [{"content": user_message, "role": "user"}]
 # 1. On Call Success
 # normal completion 
 ## test on openai completion call
+def test_logging_success_completion():
+    global score
    try:
        # Redirect stdout
        old_stdout = sys.stdout
@ -63,6 +65,8 @@ except Exception as e:
        pass

 ## test on non-openai completion call
+def test_logging_success_completion_non_openai():
+    global score
    try:
        # Redirect stdout
        old_stdout = sys.stdout
@ -87,12 +91,28 @@ except Exception as e:

 # streaming completion
 ## test on openai completion call
+def test_logging_success_streaming_openai():
+    global score
    try:
+        # litellm.set_verbose = False
+        def custom_callback(
+            kwargs,                 # kwargs to completion
+            completion_response,    # response from completion
+            start_time, end_time    # start/end time
+        ):
+            if "complete_streaming_response" in kwargs: 
+                print(f"Complete Streaming Response: {kwargs['complete_streaming_response']}")
+        
+        # Assign the custom callback function
+        litellm.success_callback = [custom_callback]
+
        # Redirect stdout
        old_stdout = sys.stdout
        sys.stdout = new_stdout = io.StringIO()

-    response = completion(model="gpt-3.5-turbo", messages=messages)
+        response = completion(model="gpt-3.5-turbo", messages=messages, stream=True)
+        for chunk in response: 
+            pass

        # Restore stdout
        sys.stdout = old_stdout
@ -104,18 +124,39 @@ try:
            raise Exception("Required log message not found!")
        elif "Logging Details LiteLLM-Success Call" not in output:
            raise Exception("Required log message not found!")
+        elif "Complete Streaming Response:" not in output:
+            raise Exception("Required log message not found!")
        score += 1
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
        pass

+# test_logging_success_streaming_openai() 
+
 ## test on non-openai completion call
+def test_logging_success_streaming_non_openai():
+    global score
    try:
+        # litellm.set_verbose = False
+        def custom_callback(
+            kwargs,                 # kwargs to completion
+            completion_response,    # response from completion
+            start_time, end_time    # start/end time
+        ):
+            # print(f"streaming response: {completion_response}")
+            if "complete_streaming_response" in kwargs: 
+                print(f"Complete Streaming Response: {kwargs['complete_streaming_response']}")
+        
+        # Assign the custom callback function
+        litellm.success_callback = [custom_callback]
+
        # Redirect stdout
        old_stdout = sys.stdout
        sys.stdout = new_stdout = io.StringIO()

-    response = completion(model="claude-instant-1", messages=messages)
+        response = completion(model="claude-instant-1", messages=messages, stream=True)
+        for idx, chunk in enumerate(response): 
+            pass
        
        # Restore stdout
        sys.stdout = old_stdout
@ -127,13 +168,17 @@ try:
            raise Exception("Required log message not found!")
        elif "Logging Details LiteLLM-Success Call" not in output:
            raise Exception("Required log message not found!")
+        elif "Complete Streaming Response:" not in output:
+            raise Exception("Required log message not found!")
        score += 1
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
        pass

+test_logging_success_streaming_non_openai() 
 # embedding

+def test_logging_success_embedding_openai():
    try:
        # Redirect stdout
        old_stdout = sys.stdout
--- a/litellm/tests/test_stream_chunk_builder.py
+++ b/litellm/tests/test_stream_chunk_builder.py
@ -54,4 +54,5 @@ def test_stream_chunk_builder():
        finnish_reason = choices["finish_reason"]
    except:
        raise Exception("stream_chunk_builder failed to rebuild response")
-test_stream_chunk_builder()
+# test_stream_chunk_builder()
+
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -228,6 +228,7 @@ class Logging:
        self.call_type = call_type
        self.litellm_call_id = litellm_call_id
        self.function_id = function_id
+        self.streaming_chunks = [] # for generating complete stream response
    
    def update_environment_variables(self, model, user, optional_params, litellm_params):
        self.optional_params = optional_params
@ -394,7 +395,7 @@ class Logging:
            pass

    
-    def success_handler(self, result, start_time=None, end_time=None):
+    def success_handler(self, result=None, start_time=None, end_time=None, **kwargs):
        print_verbose(
                f"Logging Details LiteLLM-Success Call"
            )
@ -403,6 +404,20 @@ class Logging:
                start_time = self.start_time
            if end_time is None:
                end_time = datetime.datetime.now()
+            
+            complete_streaming_response = None
+            
+            ## BUILD COMPLETE STREAMED RESPONSE
+            if self.stream: 
+                if result.choices[0].finish_reason: # if it's the last chunk 
+                    self.streaming_chunks.append(result)
+                    complete_streaming_response = litellm.stream_chunk_builder(self.streaming_chunks)
+                else:
+                    self.streaming_chunks.append(result)
+            
+            if complete_streaming_response: 
+                self.model_call_details["complete_streaming_response"] = complete_streaming_response
+
            print_verbose(f"success callbacks: {litellm.success_callback}")

            if litellm.max_budget and self.stream:
@ -3328,20 +3343,22 @@ class CustomStreamWrapper:
                    chunk = next(self.completion_stream)
                    model_response = chunk
                    # LOGGING
-                    threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
+                    threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
                    return model_response
                
-                # LOGGING
-                threading.Thread(target=self.logging_obj.success_handler, args=(completion_obj,)).start()
                model_response.model = self.model
                if len(completion_obj["content"]) > 0: # cannot set content of an OpenAI Object to be an empty string
                    if self.sent_first_chunk == False:
                        completion_obj["role"] = "assistant"
                        self.sent_first_chunk = True
                    model_response.choices[0].delta = Delta(**completion_obj)
+                    # LOGGING
+                    threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
                    return model_response
                elif model_response.choices[0].finish_reason:
                    model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai
+                    # LOGGING
+                    threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start()
                    return model_response
        except StopIteration:
            raise StopIteration