fix(utils.py): flush holding chunk for streaming, on stream end

This commit is contained in:
Krrish Dholakia 2023-12-12 16:13:31 -08:00
parent 7efcc550e2
commit 669862643b
2 changed files with 27 additions and 2 deletions

View file

@ -5682,6 +5682,13 @@ class CustomStreamWrapper:
else:
return
elif model_response.choices[0].finish_reason:
# flush any remaining holding chunk
if len(self.holding_chunk) > 0:
if model_response.choices[0].delta.content is None:
model_response.choices[0].delta.content = self.holding_chunk
else:
model_response.choices[0].delta.content = self.holding_chunk + model_response.choices[0].delta.content
self.holding_chunk = ""
model_response.choices[0].finish_reason = map_finish_reason(model_response.choices[0].finish_reason) # ensure consistent output to openai
return model_response
elif response_obj is not None and response_obj.get("original_chunk", None) is not None: # function / tool calling branch - only set for openai/azure compatible endpoints