mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
fix(utils.py): fix recreating model response object when stream usage is true
This commit is contained in:
parent
e112379d2f
commit
b2e46086dd
3 changed files with 88 additions and 17 deletions
|
@ -8951,7 +8951,16 @@ class CustomStreamWrapper:
|
|||
model_response.system_fingerprint = self.system_fingerprint
|
||||
model_response._hidden_params["custom_llm_provider"] = _logging_obj_llm_provider
|
||||
model_response._hidden_params["created_at"] = time.time()
|
||||
model_response.choices = [StreamingChoices(finish_reason=None)]
|
||||
|
||||
if (
|
||||
len(model_response.choices) > 0
|
||||
and hasattr(model_response.choices[0], "delta")
|
||||
and model_response.choices[0].delta is not None
|
||||
):
|
||||
# do nothing, if object instantiated
|
||||
pass
|
||||
else:
|
||||
model_response.choices = [StreamingChoices(finish_reason=None)]
|
||||
return model_response
|
||||
|
||||
def is_delta_empty(self, delta: Delta) -> bool:
|
||||
|
@ -9892,7 +9901,6 @@ class CustomStreamWrapper:
|
|||
self.rules.post_call_rules(
|
||||
input=self.response_uptil_now, model=self.model
|
||||
)
|
||||
print_verbose(f"final returned processed chunk: {processed_chunk}")
|
||||
self.chunks.append(processed_chunk)
|
||||
if hasattr(
|
||||
processed_chunk, "usage"
|
||||
|
@ -9906,6 +9914,7 @@ class CustomStreamWrapper:
|
|||
|
||||
# Create a new object without the removed attribute
|
||||
processed_chunk = self.model_response_creator(chunk=obj_dict)
|
||||
print_verbose(f"final returned processed chunk: {processed_chunk}")
|
||||
return processed_chunk
|
||||
raise StopAsyncIteration
|
||||
else: # temporary patch for non-aiohttp async calls
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue