Merge pull request #3267 from BerriAI/litellm_openai_streaming_fix

fix(utils.py): fix streaming to not return usage dict
This commit is contained in:
Krish Dholakia 2024-04-24 21:08:33 -07:00 committed by GitHub
commit 435a4b5ed4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 216 additions and 5301 deletions

View file

@ -531,9 +531,6 @@ class ModelResponse(OpenAIObject):
backend changes have been made that might impact determinism.
"""
usage: Optional[Usage] = None
"""Usage statistics for the completion request."""
_hidden_params: dict = {}
def __init__(
@ -588,20 +585,27 @@ class ModelResponse(OpenAIObject):
else:
created = created
model = model
if usage:
if usage is not None:
usage = usage
else:
elif stream is None or stream == False:
usage = Usage()
if hidden_params:
self._hidden_params = hidden_params
init_values = {
"id": id,
"choices": choices,
"created": created,
"model": model,
"object": object,
"system_fingerprint": system_fingerprint,
}
if usage is not None:
init_values["usage"] = usage
super().__init__(
id=id,
choices=choices,
created=created,
model=model,
object=object,
system_fingerprint=system_fingerprint,
usage=usage,
**init_values,
**params,
)
@ -6885,10 +6889,14 @@ async def convert_to_streaming_response_async(response_object: Optional[dict] =
model_response_object.choices = choice_list
if "usage" in response_object and response_object["usage"] is not None:
model_response_object.usage = Usage(
completion_tokens=response_object["usage"].get("completion_tokens", 0),
prompt_tokens=response_object["usage"].get("prompt_tokens", 0),
total_tokens=response_object["usage"].get("total_tokens", 0),
setattr(
model_response_object,
"usage",
Usage(
completion_tokens=response_object["usage"].get("completion_tokens", 0),
prompt_tokens=response_object["usage"].get("prompt_tokens", 0),
total_tokens=response_object["usage"].get("total_tokens", 0),
),
)
if "id" in response_object:
@ -6939,6 +6947,7 @@ def convert_to_streaming_response(response_object: Optional[dict] = None):
model_response_object.choices = choice_list
if "usage" in response_object and response_object["usage"] is not None:
setattr(model_response_object, "usage", Usage())
model_response_object.usage.completion_tokens = response_object["usage"].get("completion_tokens", 0) # type: ignore
model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0) # type: ignore
model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore
@ -9789,6 +9798,7 @@ class CustomStreamWrapper:
if response_obj is None:
return
completion_obj["content"] = response_obj["text"]
setattr(model_response, "usage", Usage())
if response_obj.get("prompt_tokens", None) is not None:
model_response.usage.prompt_tokens = response_obj[
"prompt_tokens"
@ -10082,6 +10092,7 @@ class CustomStreamWrapper:
"content" in completion_obj
and isinstance(completion_obj["content"], str)
and len(completion_obj["content"]) == 0
and hasattr(model_response, "usage")
and hasattr(model_response.usage, "prompt_tokens")
):
if self.sent_first_chunk == False: