forked from phoenix/litellm-mirror
fix(utils.py): fix stream options to return consistent response object
This commit is contained in:
parent
5e1faf31b0
commit
9aa29854de
2 changed files with 20 additions and 10 deletions
|
@ -2018,12 +2018,24 @@ def test_openai_stream_options_call():
|
||||||
"""
|
"""
|
||||||
|
|
||||||
assert last_chunk.usage is not None
|
assert last_chunk.usage is not None
|
||||||
|
assert isinstance(last_chunk.usage, litellm.Usage)
|
||||||
assert last_chunk.usage.total_tokens > 0
|
assert last_chunk.usage.total_tokens > 0
|
||||||
assert last_chunk.usage.prompt_tokens > 0
|
assert last_chunk.usage.prompt_tokens > 0
|
||||||
assert last_chunk.usage.completion_tokens > 0
|
assert last_chunk.usage.completion_tokens > 0
|
||||||
|
|
||||||
# assert all non last chunks have usage=None
|
# assert all non last chunks have usage=None
|
||||||
assert all(chunk.usage is None for chunk in chunks[:-1])
|
# Improved assertion with detailed error message
|
||||||
|
non_last_chunks_with_usage = [
|
||||||
|
chunk
|
||||||
|
for chunk in chunks[:-1]
|
||||||
|
if hasattr(chunk, "usage") and chunk.usage is not None
|
||||||
|
]
|
||||||
|
assert (
|
||||||
|
not non_last_chunks_with_usage
|
||||||
|
), f"Non-last chunks with usage not None:\n" + "\n".join(
|
||||||
|
f"Chunk ID: {chunk.id}, Usage: {chunk.usage}, Content: {chunk.choices[0].delta.content}"
|
||||||
|
for chunk in non_last_chunks_with_usage
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_openai_stream_options_call_text_completion():
|
def test_openai_stream_options_call_text_completion():
|
||||||
|
|
|
@ -680,12 +680,6 @@ class ModelResponse(OpenAIObject):
|
||||||
usage = usage
|
usage = usage
|
||||||
elif stream is None or stream == False:
|
elif stream is None or stream == False:
|
||||||
usage = Usage()
|
usage = Usage()
|
||||||
elif (
|
|
||||||
stream == True
|
|
||||||
and stream_options is not None
|
|
||||||
and stream_options.get("include_usage") == True
|
|
||||||
):
|
|
||||||
usage = Usage()
|
|
||||||
if hidden_params:
|
if hidden_params:
|
||||||
self._hidden_params = hidden_params
|
self._hidden_params = hidden_params
|
||||||
|
|
||||||
|
@ -11107,8 +11101,7 @@ class CustomStreamWrapper:
|
||||||
model_response.system_fingerprint = self.system_fingerprint
|
model_response.system_fingerprint = self.system_fingerprint
|
||||||
model_response._hidden_params["custom_llm_provider"] = self.custom_llm_provider
|
model_response._hidden_params["custom_llm_provider"] = self.custom_llm_provider
|
||||||
model_response._hidden_params["created_at"] = time.time()
|
model_response._hidden_params["created_at"] = time.time()
|
||||||
model_response.choices = [StreamingChoices()]
|
model_response.choices = [StreamingChoices(finish_reason=None)]
|
||||||
model_response.choices[0].finish_reason = None
|
|
||||||
return model_response
|
return model_response
|
||||||
|
|
||||||
def is_delta_empty(self, delta: Delta) -> bool:
|
def is_delta_empty(self, delta: Delta) -> bool:
|
||||||
|
@ -11463,8 +11456,13 @@ class CustomStreamWrapper:
|
||||||
if (
|
if (
|
||||||
self.stream_options is not None
|
self.stream_options is not None
|
||||||
and self.stream_options["include_usage"] == True
|
and self.stream_options["include_usage"] == True
|
||||||
|
and response_obj["usage"] is not None
|
||||||
):
|
):
|
||||||
model_response.usage = response_obj["usage"]
|
model_response.usage = litellm.Usage(
|
||||||
|
prompt_tokens=response_obj["usage"].prompt_tokens,
|
||||||
|
completion_tokens=response_obj["usage"].completion_tokens,
|
||||||
|
total_tokens=response_obj["usage"].total_tokens,
|
||||||
|
)
|
||||||
|
|
||||||
model_response.model = self.model
|
model_response.model = self.model
|
||||||
print_verbose(
|
print_verbose(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue