forked from phoenix/litellm-mirror
Merge pull request #3267 from BerriAI/litellm_openai_streaming_fix
fix(utils.py): fix streaming to not return usage dict
This commit is contained in:
commit
435a4b5ed4
25 changed files with 216 additions and 5301 deletions
|
@ -531,9 +531,6 @@ class ModelResponse(OpenAIObject):
|
|||
backend changes have been made that might impact determinism.
|
||||
"""
|
||||
|
||||
usage: Optional[Usage] = None
|
||||
"""Usage statistics for the completion request."""
|
||||
|
||||
_hidden_params: dict = {}
|
||||
|
||||
def __init__(
|
||||
|
@ -588,20 +585,27 @@ class ModelResponse(OpenAIObject):
|
|||
else:
|
||||
created = created
|
||||
model = model
|
||||
if usage:
|
||||
if usage is not None:
|
||||
usage = usage
|
||||
else:
|
||||
elif stream is None or stream == False:
|
||||
usage = Usage()
|
||||
if hidden_params:
|
||||
self._hidden_params = hidden_params
|
||||
|
||||
init_values = {
|
||||
"id": id,
|
||||
"choices": choices,
|
||||
"created": created,
|
||||
"model": model,
|
||||
"object": object,
|
||||
"system_fingerprint": system_fingerprint,
|
||||
}
|
||||
|
||||
if usage is not None:
|
||||
init_values["usage"] = usage
|
||||
|
||||
super().__init__(
|
||||
id=id,
|
||||
choices=choices,
|
||||
created=created,
|
||||
model=model,
|
||||
object=object,
|
||||
system_fingerprint=system_fingerprint,
|
||||
usage=usage,
|
||||
**init_values,
|
||||
**params,
|
||||
)
|
||||
|
||||
|
@ -6885,10 +6889,14 @@ async def convert_to_streaming_response_async(response_object: Optional[dict] =
|
|||
model_response_object.choices = choice_list
|
||||
|
||||
if "usage" in response_object and response_object["usage"] is not None:
|
||||
model_response_object.usage = Usage(
|
||||
completion_tokens=response_object["usage"].get("completion_tokens", 0),
|
||||
prompt_tokens=response_object["usage"].get("prompt_tokens", 0),
|
||||
total_tokens=response_object["usage"].get("total_tokens", 0),
|
||||
setattr(
|
||||
model_response_object,
|
||||
"usage",
|
||||
Usage(
|
||||
completion_tokens=response_object["usage"].get("completion_tokens", 0),
|
||||
prompt_tokens=response_object["usage"].get("prompt_tokens", 0),
|
||||
total_tokens=response_object["usage"].get("total_tokens", 0),
|
||||
),
|
||||
)
|
||||
|
||||
if "id" in response_object:
|
||||
|
@ -6939,6 +6947,7 @@ def convert_to_streaming_response(response_object: Optional[dict] = None):
|
|||
model_response_object.choices = choice_list
|
||||
|
||||
if "usage" in response_object and response_object["usage"] is not None:
|
||||
setattr(model_response_object, "usage", Usage())
|
||||
model_response_object.usage.completion_tokens = response_object["usage"].get("completion_tokens", 0) # type: ignore
|
||||
model_response_object.usage.prompt_tokens = response_object["usage"].get("prompt_tokens", 0) # type: ignore
|
||||
model_response_object.usage.total_tokens = response_object["usage"].get("total_tokens", 0) # type: ignore
|
||||
|
@ -9789,6 +9798,7 @@ class CustomStreamWrapper:
|
|||
if response_obj is None:
|
||||
return
|
||||
completion_obj["content"] = response_obj["text"]
|
||||
setattr(model_response, "usage", Usage())
|
||||
if response_obj.get("prompt_tokens", None) is not None:
|
||||
model_response.usage.prompt_tokens = response_obj[
|
||||
"prompt_tokens"
|
||||
|
@ -10082,6 +10092,7 @@ class CustomStreamWrapper:
|
|||
"content" in completion_obj
|
||||
and isinstance(completion_obj["content"], str)
|
||||
and len(completion_obj["content"]) == 0
|
||||
and hasattr(model_response, "usage")
|
||||
and hasattr(model_response.usage, "prompt_tokens")
|
||||
):
|
||||
if self.sent_first_chunk == False:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue