fix(utils.py): fix streaming to not return usage dict

Fixes https://github.com/BerriAI/litellm/issues/3237
This commit is contained in:
Krrish Dholakia 2024-04-24 08:06:07 -07:00
parent facc5f65aa
commit b10f03706d
24 changed files with 107 additions and 83 deletions

View file

@ -41,9 +41,9 @@ class PetalsConfig:
"""
max_length: Optional[int] = None
max_new_tokens: Optional[
int
] = litellm.max_tokens # petals requires max tokens to be set
max_new_tokens: Optional[int] = (
litellm.max_tokens
) # petals requires max tokens to be set
do_sample: Optional[bool] = None
temperature: Optional[float] = None
top_k: Optional[int] = None
@ -203,7 +203,7 @@ def completion(
completion_tokens=completion_tokens,
total_tokens=prompt_tokens + completion_tokens,
)
model_response.usage = usage
setattr(model_response, "usage", usage)
return model_response