fix(utils.py): fix recreating model response object when stream usage is true

This commit is contained in:
Krrish Dholakia 2024-07-11 21:00:46 -07:00
parent e112379d2f
commit b2e46086dd
3 changed files with 88 additions and 17 deletions

View file

@ -8951,7 +8951,16 @@ class CustomStreamWrapper:
model_response.system_fingerprint = self.system_fingerprint
model_response._hidden_params["custom_llm_provider"] = _logging_obj_llm_provider
model_response._hidden_params["created_at"] = time.time()
model_response.choices = [StreamingChoices(finish_reason=None)]
if (
len(model_response.choices) > 0
and hasattr(model_response.choices[0], "delta")
and model_response.choices[0].delta is not None
):
# do nothing, if object instantiated
pass
else:
model_response.choices = [StreamingChoices(finish_reason=None)]
return model_response
def is_delta_empty(self, delta: Delta) -> bool:
@ -9892,7 +9901,6 @@ class CustomStreamWrapper:
self.rules.post_call_rules(
input=self.response_uptil_now, model=self.model
)
print_verbose(f"final returned processed chunk: {processed_chunk}")
self.chunks.append(processed_chunk)
if hasattr(
processed_chunk, "usage"
@ -9906,6 +9914,7 @@ class CustomStreamWrapper:
# Create a new object without the removed attribute
processed_chunk = self.model_response_creator(chunk=obj_dict)
print_verbose(f"final returned processed chunk: {processed_chunk}")
return processed_chunk
raise StopAsyncIteration
else: # temporary patch for non-aiohttp async calls