From 70fc5afb5d4b94482ecd1cea5e99a2b24e436d57 Mon Sep 17 00:00:00 2001 From: ishaan-jaff Date: Sat, 18 Nov 2023 16:23:05 -0800 Subject: [PATCH] (fix) streaming + function / tool calling --- litellm/utils.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/litellm/utils.py b/litellm/utils.py index be5fc86ca..33e5879cf 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4598,13 +4598,23 @@ class CustomStreamWrapper: text = "" is_finished = False finish_reason = None + original_chunk = None # this is used for function/tool calling if len(str_line.choices) > 0: if str_line.choices[0].delta.content is not None: text = str_line.choices[0].delta.content + else: # function/tool calling chunk - when content is None. in this case we just return the original chunk from openai + original_chunk = str_line if str_line.choices[0].finish_reason: is_finished = True finish_reason = str_line.choices[0].finish_reason - return {"text": text, "is_finished": is_finished, "finish_reason": finish_reason} + + + return { + "text": text, + "is_finished": is_finished, + "finish_reason": finish_reason, + "original_chunk": str_line + } except Exception as e: traceback.print_exc() raise e @@ -4856,6 +4866,13 @@ class CustomStreamWrapper: return model_response else: return + elif response_obj.get("original_chunk", None) is not None: # function / tool calling branch + model_response = response_obj.get("original_chunk", None) + if self.sent_first_chunk == False: + completion_obj["role"] = "assistant" + self.sent_first_chunk = True + threading.Thread(target=self.logging_obj.success_handler, args=(model_response,)).start() # log response + return model_response elif model_response.choices[0].finish_reason: model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai # LOGGING