fix(utils.py): fix stream options to return consistent response object

This commit is contained in:
Krrish Dholakia 2024-06-04 18:17:45 -07:00
parent 5e1faf31b0
commit 9aa29854de
2 changed files with 20 additions and 10 deletions

View file

@ -2018,12 +2018,24 @@ def test_openai_stream_options_call():
""" """
assert last_chunk.usage is not None assert last_chunk.usage is not None
assert isinstance(last_chunk.usage, litellm.Usage)
assert last_chunk.usage.total_tokens > 0 assert last_chunk.usage.total_tokens > 0
assert last_chunk.usage.prompt_tokens > 0 assert last_chunk.usage.prompt_tokens > 0
assert last_chunk.usage.completion_tokens > 0 assert last_chunk.usage.completion_tokens > 0
# assert all non last chunks have usage=None # assert all non last chunks have usage=None
assert all(chunk.usage is None for chunk in chunks[:-1]) # Improved assertion with detailed error message
non_last_chunks_with_usage = [
chunk
for chunk in chunks[:-1]
if hasattr(chunk, "usage") and chunk.usage is not None
]
assert (
not non_last_chunks_with_usage
), f"Non-last chunks with usage not None:\n" + "\n".join(
f"Chunk ID: {chunk.id}, Usage: {chunk.usage}, Content: {chunk.choices[0].delta.content}"
for chunk in non_last_chunks_with_usage
)
def test_openai_stream_options_call_text_completion(): def test_openai_stream_options_call_text_completion():

View file

@ -680,12 +680,6 @@ class ModelResponse(OpenAIObject):
usage = usage usage = usage
elif stream is None or stream == False: elif stream is None or stream == False:
usage = Usage() usage = Usage()
elif (
stream == True
and stream_options is not None
and stream_options.get("include_usage") == True
):
usage = Usage()
if hidden_params: if hidden_params:
self._hidden_params = hidden_params self._hidden_params = hidden_params
@ -11107,8 +11101,7 @@ class CustomStreamWrapper:
model_response.system_fingerprint = self.system_fingerprint model_response.system_fingerprint = self.system_fingerprint
model_response._hidden_params["custom_llm_provider"] = self.custom_llm_provider model_response._hidden_params["custom_llm_provider"] = self.custom_llm_provider
model_response._hidden_params["created_at"] = time.time() model_response._hidden_params["created_at"] = time.time()
model_response.choices = [StreamingChoices()] model_response.choices = [StreamingChoices(finish_reason=None)]
model_response.choices[0].finish_reason = None
return model_response return model_response
def is_delta_empty(self, delta: Delta) -> bool: def is_delta_empty(self, delta: Delta) -> bool:
@ -11463,8 +11456,13 @@ class CustomStreamWrapper:
if ( if (
self.stream_options is not None self.stream_options is not None
and self.stream_options["include_usage"] == True and self.stream_options["include_usage"] == True
and response_obj["usage"] is not None
): ):
model_response.usage = response_obj["usage"] model_response.usage = litellm.Usage(
prompt_tokens=response_obj["usage"].prompt_tokens,
completion_tokens=response_obj["usage"].completion_tokens,
total_tokens=response_obj["usage"].total_tokens,
)
model_response.model = self.model model_response.model = self.model
print_verbose( print_verbose(