(Bug fix) - Using include_usage for /completions requests + unit testing (#8484)

* pass stream options (#8419) * test_completion_streaming_usage_metrics * test_text_completion_include_usage --------- Co-authored-by: Kaushik Deka <55996465+Kaushikdkrikhanu@users.noreply.github.com>
2025-04-26 03:04:13 +00:00 · 2025-02-11 20:29:04 -08:00 · 2025-02-11 20:29:04 -08:00 · 152b44075b
commit 152b44075b
parent 2a79c95af7
3 changed files with 66 additions and 0 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -3947,6 +3947,7 @@ async def atext_completion(
                ),
                model=model,
                custom_llm_provider=custom_llm_provider,
                stream_options=kwargs.get('stream_options'),
            )
        else:
            ## OpenAI / Azure Text Completion Returns here
--- a/tests/llm_translation/test_text_completion.py
+++ b/tests/llm_translation/test_text_completion.py
@ -139,3 +139,38 @@ def test_convert_chat_to_text_completion_multiple_choices():
        completion_tokens_details=None,
        prompt_tokens_details=None,
    )
@pytest.mark.asyncio
@pytest.mark.parametrize("sync_mode", [True, False])
 async def test_text_completion_include_usage(sync_mode):
    """Test text completion with include_usage"""
    last_chunk = None
    if sync_mode:
        response = await litellm.atext_completion(
            model="gpt-3.5-turbo",
            prompt="Hello, world!",
            stream=True,
            stream_options={"include_usage": True},
        )
        async for chunk in response:
            print(chunk)
            last_chunk = chunk
    else:
        response = litellm.text_completion(
            model="gpt-3.5-turbo",
            prompt="Hello, world!",
            stream=True,
            stream_options={"include_usage": True},
        )
        for chunk in response:
            print(chunk)
            last_chunk = chunk
    assert last_chunk is not None
    assert last_chunk.usage is not None
    assert last_chunk.usage.prompt_tokens > 0
    assert last_chunk.usage.completion_tokens > 0
    assert last_chunk.usage.total_tokens > 0
--- a/tests/test_openai_endpoints.py
+++ b/tests/test_openai_endpoints.py
@ -378,6 +378,36 @@ async def test_chat_completion_streaming():
    print(f"response_str: {response_str}")
@pytest.mark.asyncio
 async def test_completion_streaming_usage_metrics():
    """
    [PROD Test] Ensures usage metrics are returned correctly when `include_usage` is set to `True`
    """
    client = AsyncOpenAI(api_key="sk-1234", base_url="http://0.0.0.0:4000")
    response = await client.completions.create(
        model="gpt-instruct",
        prompt="hey",
        stream=True,
        stream_options={"include_usage": True},
        max_tokens=4,
        temperature=0.00000001,
    )
    last_chunk = None
    async for chunk in response:
        print("chunk", chunk)
        last_chunk = chunk
    assert last_chunk is not None, "No chunks were received"
    assert last_chunk.usage is not None, "Usage information was not received"
    assert last_chunk.usage.prompt_tokens > 0, "Prompt tokens should be greater than 0"
    assert (
        last_chunk.usage.completion_tokens > 0
    ), "Completion tokens should be greater than 0"
    assert last_chunk.usage.total_tokens > 0, "Total tokens should be greater than 0"
@pytest.mark.asyncio
 async def test_chat_completion_anthropic_structured_output():
    """