diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py index 271a53dd4..7d639d7a3 100644 --- a/litellm/tests/test_streaming.py +++ b/litellm/tests/test_streaming.py @@ -1501,6 +1501,37 @@ def test_openai_chat_completion_complete_response_call(): # test_openai_chat_completion_complete_response_call() +def test_openai_stream_options_call(): + litellm.set_verbose = False + response = litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "system", "content": "say GM - we're going to make it "}], + stream=True, + stream_options={"include_usage": True}, + max_tokens=10, + ) + usage = None + chunks = [] + for chunk in response: + print("chunk: ", chunk) + chunks.append(chunk) + + last_chunk = chunks[-1] + print("last chunk: ", last_chunk) + + """ + Assert that: + - Last Chunk includes Usage + - All chunks prior to last chunk have usage=None + """ + + assert last_chunk.usage is not None + assert last_chunk.usage.total_tokens > 0 + assert last_chunk.usage.prompt_tokens > 0 + assert last_chunk.usage.completion_tokens > 0 + + # assert all non last chunks have usage=None + assert all(chunk.usage is None for chunk in chunks[:-1]) def test_openai_text_completion_call():