From 70fc5afb5d4b94482ecd1cea5e99a2b24e436d57 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Sat, 18 Nov 2023 16:23:05 -0800
Subject: [PATCH] (fix) streaming + function / tool calling

---
 litellm/utils.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/litellm/utils.py b/litellm/utils.py
index be5fc86ca..33e5879cf 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -4598,13 +4598,23 @@ class CustomStreamWrapper:
             text = "" 
             is_finished = False
             finish_reason = None
+            original_chunk = None # this is used for function/tool calling
             if len(str_line.choices) > 0: 
                 if str_line.choices[0].delta.content is not None:
                     text = str_line.choices[0].delta.content
+                else: # function/tool calling chunk - when content is None. in this case we just return the original chunk from openai
+                    original_chunk = str_line
                 if str_line.choices[0].finish_reason:
                     is_finished = True
                     finish_reason = str_line.choices[0].finish_reason
-            return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason}
+
+
+            return {
+                "text": text, 
+                "is_finished": is_finished, 
+                "finish_reason": finish_reason,
+                "original_chunk": str_line
+            }
         except Exception as e:
             traceback.print_exc()
             raise e
@@ -4856,6 +4866,13 @@ class CustomStreamWrapper:
                     return model_response
                 else: 
                     return 
+            elif response_obj.get("original_chunk", None) is not None: # function / tool calling branch
+                model_response = response_obj.get("original_chunk", None)
+                if self.sent_first_chunk == False:
+                    completion_obj["role"] = "assistant"
+                    self.sent_first_chunk = True
+                threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start() # log response
+                return model_response
             elif model_response.choices[0].finish_reason:
                 model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai
                 # LOGGING