litellm/tests/local_testing/test_stream_chunk_builder.py

import asyncio
import os
import sys
import time
import traceback

import pytest
from typing import List
from litellm.types.utils import StreamingChoices, ChatCompletionAudioResponse


def check_non_streaming_response(completion):
    assert completion.choices[0].message.audio is not None, "Audio response is missing"
    print("audio", completion.choices[0].message.audio)
    assert isinstance(
        completion.choices[0].message.audio, ChatCompletionAudioResponse
    ), "Invalid audio response type"
    assert len(completion.choices[0].message.audio.data) > 0, "Audio data is empty"


sys.path.insert(
    0, os.path.abspath("../..")
)  # Adds the parent directory to the system path
import os

import dotenv
from openai import OpenAI

import litellm
import stream_chunk_testdata
from litellm import completion, stream_chunk_builder

dotenv.load_dotenv()

user_message = "What is the current weather in Boston?"
messages = [{"content": user_message, "role": "user"}]

function_schema = {
    "name": "get_weather",
    "description": "gets the current weather",
    "parameters": {
        "type": "object",
        "properties": {
            "location": {
                "type": "string",
                "description": "The city and state, e.g. San Francisco, CA",
            },
        },
        "required": ["location"],
    },
}


tools_schema = [
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        },
    }
]

# def test_stream_chunk_builder_tools():
#     try:
#       litellm.set_verbose = False
#       response = client.chat.completions.create(
#           model="gpt-3.5-turbo",
#           messages=messages,
#           tools=tools_schema,
#           # stream=True,
#           # complete_response=True # runs stream_chunk_builder under-the-hood
#       )

#       print(f"response: {response}")
#       print(f"response usage: {response.usage}")
#     except Exception as e:
#        pytest.fail(f"An exception occurred - {str(e)}")

# test_stream_chunk_builder_tools()


def test_stream_chunk_builder_litellm_function_call():
    try:
        litellm.set_verbose = False
        response = litellm.completion(
            model="gpt-3.5-turbo",
            messages=messages,
            functions=[function_schema],
            # stream=True,
            # complete_response=True # runs stream_chunk_builder under-the-hood
        )

        print(f"response: {response}")
    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")


# test_stream_chunk_builder_litellm_function_call()


def test_stream_chunk_builder_litellm_tool_call():
    try:
        litellm.set_verbose = True
        response = litellm.completion(
            model="gpt-3.5-turbo",
            messages=messages,
            tools=tools_schema,
            stream=True,
            complete_response=True,
        )

        print(f"complete response: {response}")
        print(f"complete response usage: {response.usage}")
        assert response.usage.completion_tokens > 0
        assert response.usage.prompt_tokens > 0
        assert (
            response.usage.total_tokens
            == response.usage.completion_tokens + response.usage.prompt_tokens
        )
    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")


# test_stream_chunk_builder_litellm_tool_call()


def test_stream_chunk_builder_litellm_tool_call_regular_message():
    try:
        messages = [{"role": "user", "content": "Hey, how's it going?"}]
        # litellm.set_verbose = True
        response = litellm.completion(
            model="gpt-3.5-turbo",
            messages=messages,
            tools=tools_schema,
            stream=True,
            complete_response=True,
        )

        print(f"complete response: {response}")
        print(f"complete response usage: {response.usage}")
        assert response.usage.completion_tokens > 0
        assert response.usage.prompt_tokens > 0
        assert (
            response.usage.total_tokens
            == response.usage.completion_tokens + response.usage.prompt_tokens
        )

        # check provider is in hidden params
        print("hidden params", response._hidden_params)
        assert response._hidden_params["custom_llm_provider"] == "openai"

    except Exception as e:
        pytest.fail(f"An exception occurred - {str(e)}")


# test_stream_chunk_builder_litellm_tool_call_regular_message()


def test_stream_chunk_builder_litellm_usage_chunks():
    """
    Checks if stream_chunk_builder is able to correctly rebuild with given metadata from streaming chunks
    """
    from litellm.types.utils import Usage

    messages = [
        {"role": "user", "content": "Tell me the funniest joke you know."},
        {
            "role": "assistant",
            "content": "Why did the chicken cross the road?\nYou will not guess this one I bet\n",
        },
        {"role": "user", "content": "I do not know, why?"},
        {"role": "assistant", "content": "uhhhh\n\n\nhmmmm.....\nthinking....\n"},
        {"role": "user", "content": "\nI am waiting...\n\n...\n"},
    ]

    usage: litellm.Usage = Usage(
        completion_tokens=27,
        prompt_tokens=55,
        total_tokens=82,
        completion_tokens_details=None,
        prompt_tokens_details=None,
    )

    gemini_pt = usage.prompt_tokens

    # make a streaming gemini call
    try:
        response = completion(
            model="gemini/gemini-1.5-flash",
            messages=messages,
            stream=True,
            complete_response=True,
            stream_options={"include_usage": True},
        )
    except litellm.InternalServerError as e:
        pytest.skip(f"Skipping test due to internal server error - {str(e)}")

    usage: litellm.Usage = response.usage

    stream_rebuilt_pt = usage.prompt_tokens

    # assert prompt tokens are the same

    assert gemini_pt == stream_rebuilt_pt


def test_stream_chunk_builder_litellm_mixed_calls():
    response = stream_chunk_builder(stream_chunk_testdata.chunks)
    assert (
        response.choices[0].message.content
        == "To answer your question about how many rows are in the 'users' table, I'll need to run a SQL query. Let me do that for you."
    )

    print(response.choices[0].message.tool_calls[0].to_dict())

    assert len(response.choices[0].message.tool_calls) == 1
    assert response.choices[0].message.tool_calls[0].to_dict() == {
        "function": {
            "arguments": '{"query": "SELECT COUNT(*) FROM users;"}',
            "name": "sql_query",
        },
        "id": "toolu_01H3AjkLpRtGQrof13CBnWfK",
        "type": "function",
    }


def test_stream_chunk_builder_litellm_empty_chunks():
    with pytest.raises(litellm.APIError):
        response = stream_chunk_builder(chunks=None)

    response = stream_chunk_builder(chunks=[])
    assert response is None


def test_stream_chunk_builder_multiple_tool_calls():
    init_chunks = [
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "id": "call_X9P9B6STj7ze8OsJCGkfoN94",
                                "function": {"arguments": "", "name": "exponentiate"},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": '{"ba'},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": 'se": '},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": '3, "ex'},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": "pone"},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": 'nt": '},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": "5}"},
                                "type": "function",
                                "index": 0,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "id": "call_Qq8yDeRx7v276abRcLrYORdW",
                                "function": {"arguments": "", "name": "add"},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": '{"fi'},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": "rst_i"},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": 'nt": 1'},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": '2, "'},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": "secon"},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": 'd_int"'},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [
                {
                    "index": 0,
                    "delta": {
                        "role": "assistant",
                        "tool_calls": [
                            {
                                "function": {"arguments": ": 3}"},
                                "type": "function",
                                "index": 1,
                            }
                        ],
                    },
                }
            ],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
        {
            "id": "chatcmpl-A5kCnzaxRsknd6008552ZhDi71yPt",
            "choices": [{"finish_reason": "tool_calls", "index": 0, "delta": {}}],
            "created": 1725932618,
            "model": "gpt-4o-2024-08-06",
            "object": "chat.completion.chunk",
            "system_fingerprint": "fp_b2ffeb16ee",
        },
    ]

    chunks = []
    for chunk in init_chunks:
        chunks.append(litellm.ModelResponse(**chunk, stream=True))
    response = stream_chunk_builder(chunks=chunks)

    print(f"Returned response: {response}")
    completed_response = {
        "id": "chatcmpl-A61mXjvcRX0Xr2IiojN9TPiy1P3Fm",
        "choices": [
            {
                "finish_reason": "tool_calls",
                "index": 0,
                "message": {
                    "content": None,
                    "role": "assistant",
                    "tool_calls": [
                        {
                            "function": {
                                "arguments": '{"base": 3, "exponent": 5}',
                                "name": "exponentiate",
                            },
                            "id": "call_X9P9B6STj7ze8OsJCGkfoN94",
                            "type": "function",
                        },
                        {
                            "function": {
                                "arguments": '{"first_int": 12, "second_int": 3}',
                                "name": "add",
                            },
                            "id": "call_Qq8yDeRx7v276abRcLrYORdW",
                            "type": "function",
                        },
                    ],
                    "function_call": None,
                },
            }
        ],
        "created": 1726000181,
        "model": "gpt-4o-2024-05-13",
        "object": "chat.completion",
        "system_fingerprint": "fp_25624ae3a5",
        "usage": {"completion_tokens": 55, "prompt_tokens": 127, "total_tokens": 182},
        "service_tier": None,
    }

    expected_response = litellm.ModelResponse(**completed_response)

    print(f"\n\nexpected_response:\n{expected_response}\n\n")
    assert (
        expected_response.choices == response.choices
    ), "\nGot={}\n, Expected={}\n".format(response.choices, expected_response.choices)


def test_stream_chunk_builder_openai_prompt_caching():
    from openai import OpenAI
    from pydantic import BaseModel

    client = OpenAI(
        # This is the default and can be omitted
        api_key=os.getenv("OPENAI_API_KEY"),
    )

    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": "Say this is a test",
            }
        ],
        model="gpt-3.5-turbo",
        stream=True,
        stream_options={"include_usage": True},
    )
    chunks: List[litellm.ModelResponse] = []
    usage_obj = None
    for chunk in chat_completion:
        chunks.append(litellm.ModelResponse(**chunk.model_dump(), stream=True))

    print(f"chunks: {chunks}")

    usage_obj: litellm.Usage = chunks[-1].usage  # type: ignore

    response = stream_chunk_builder(chunks=chunks)
    print(f"response: {response}")
    print(f"response usage: {response.usage}")
    for k, v in usage_obj.model_dump(exclude_none=True).items():
        print(k, v)
        response_usage_value = getattr(response.usage, k)  # type: ignore
        print(f"response_usage_value: {response_usage_value}")
        print(f"type: {type(response_usage_value)}")
        if isinstance(response_usage_value, BaseModel):
            assert response_usage_value.model_dump(exclude_none=True) == v
        else:
            assert response_usage_value == v


def test_stream_chunk_builder_openai_audio_output_usage():
    from pydantic import BaseModel
    from openai import OpenAI
    from typing import Optional

    client = OpenAI(
        # This is the default and can be omitted
        api_key=os.getenv("OPENAI_API_KEY"),
    )

    completion = client.chat.completions.create(
        model="gpt-4o-audio-preview",
        modalities=["text", "audio"],
        audio={"voice": "alloy", "format": "pcm16"},
        messages=[{"role": "user", "content": "response in 1 word - yes or no"}],
        stream=True,
        stream_options={"include_usage": True},
    )

    chunks = []
    for chunk in completion:
        chunks.append(litellm.ModelResponse(**chunk.model_dump(), stream=True))

    usage_obj: Optional[litellm.Usage] = None

    for index, chunk in enumerate(chunks):
        if hasattr(chunk, "usage"):
            usage_obj = chunk.usage
            print(f"chunk usage: {chunk.usage}")
            print(f"index: {index}")
            print(f"len chunks: {len(chunks)}")

    print(f"usage_obj: {usage_obj}")
    response = stream_chunk_builder(chunks=chunks)
    print(f"response usage: {response.usage}")
    check_non_streaming_response(response)
    print(f"response: {response}")
    for k, v in usage_obj.model_dump(exclude_none=True).items():
        print(k, v)
        response_usage_value = getattr(response.usage, k)  # type: ignore
        print(f"response_usage_value: {response_usage_value}")
        print(f"type: {type(response_usage_value)}")
        if isinstance(response_usage_value, BaseModel):
            assert response_usage_value.model_dump(exclude_none=True) == v
        else:
            assert response_usage_value == v