litellm-mirror/tests/litellm/test_main.py

import json
import os
import sys
import httpx
import pytest
import respx

from fastapi.testclient import TestClient

sys.path.insert(
    0, os.path.abspath("../..")
)  # Adds the parent directory to the system path

from unittest.mock import MagicMock, call, patch

import litellm


@pytest.fixture(autouse=True)
def add_api_keys_to_env(monkeypatch):
    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-1234567890")
    monkeypatch.setenv("OPENAI_API_KEY", "sk-openai-api03-1234567890")
    monkeypatch.setenv("AWS_ACCESS_KEY_ID", "my-fake-aws-access-key-id")
    monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "my-fake-aws-secret-access-key")
    monkeypatch.setenv("AWS_REGION", "us-east-1")


@pytest.fixture
def openai_api_response():
    mock_response_data = {
        "id": "chatcmpl-B0W3vmiM78Xkgx7kI7dr7PC949DMS",
        "choices": [
            {
                "finish_reason": "stop",
                "index": 0,
                "logprobs": None,
                "message": {
                    "content": "",
                    "refusal": None,
                    "role": "assistant",
                    "audio": None,
                    "function_call": None,
                    "tool_calls": None,
                },
            }
        ],
        "created": 1739462947,
        "model": "gpt-4o-mini-2024-07-18",
        "object": "chat.completion",
        "service_tier": "default",
        "system_fingerprint": "fp_bd83329f63",
        "usage": {
            "completion_tokens": 1,
            "prompt_tokens": 121,
            "total_tokens": 122,
            "completion_tokens_details": {
                "accepted_prediction_tokens": 0,
                "audio_tokens": 0,
                "reasoning_tokens": 0,
                "rejected_prediction_tokens": 0,
            },
            "prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0},
        },
    }

    return mock_response_data


def test_completion_missing_role(openai_api_response):
    from openai import OpenAI

    from litellm.types.utils import ModelResponse

    client = OpenAI(api_key="test_api_key")

    mock_raw_response = MagicMock()
    mock_raw_response.headers = {
        "x-request-id": "123",
        "openai-organization": "org-123",
        "x-ratelimit-limit-requests": "100",
        "x-ratelimit-remaining-requests": "99",
    }
    mock_raw_response.parse.return_value = ModelResponse(**openai_api_response)

    print(f"openai_api_response: {openai_api_response}")

    with patch.object(
        client.chat.completions.with_raw_response, "create", mock_raw_response
    ) as mock_create:
        litellm.completion(
            model="gpt-4o-mini",
            messages=[
                {"role": "user", "content": "Hey"},
                {
                    "content": "",
                    "tool_calls": [
                        {
                            "id": "call_m0vFJjQmTH1McvaHBPR2YFwY",
                            "function": {
                                "arguments": '{"input": "dksjsdkjdhskdjshdskhjkhlk"}',
                                "name": "tool_name",
                            },
                            "type": "function",
                            "index": 0,
                        },
                        {
                            "id": "call_Vw6RaqV2n5aaANXEdp5pYxo2",
                            "function": {
                                "arguments": '{"input": "jkljlkjlkjlkjlk"}',
                                "name": "tool_name",
                            },
                            "type": "function",
                            "index": 1,
                        },
                        {
                            "id": "call_hBIKwldUEGlNh6NlSXil62K4",
                            "function": {
                                "arguments": '{"input": "jkjlkjlkjlkj;lj"}',
                                "name": "tool_name",
                            },
                            "type": "function",
                            "index": 2,
                        },
                    ],
                },
            ],
            client=client,
        )

        mock_create.assert_called_once()


@pytest.mark.parametrize(
    "model",
    [
        "gemini/gemini-1.5-flash",
        "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
        "bedrock/invoke/anthropic.claude-3-5-sonnet-20240620-v1:0",
        "anthropic/claude-3-5-sonnet",
    ],
)
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_url_with_format_param(model, sync_mode, monkeypatch):

    from litellm import acompletion, completion
    from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler

    if sync_mode:
        client = HTTPHandler()
    else:
        client = AsyncHTTPHandler()

    args = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
                            "format": "image/png",
                        },
                    },
                    {"type": "text", "text": "Describe this image"},
                ],
            }
        ],
    }
    with patch.object(client, "post", new=MagicMock()) as mock_client:
        try:
            if sync_mode:
                response = completion(**args, client=client)
            else:
                response = await acompletion(**args, client=client)
            print(response)
        except Exception as e:
            print(e)

        mock_client.assert_called()

        print(mock_client.call_args.kwargs)

        if "data" in mock_client.call_args.kwargs:
            json_str = mock_client.call_args.kwargs["data"]
        else:
            json_str = json.dumps(mock_client.call_args.kwargs["json"])
        assert "png" in json_str
        assert "jpeg" not in json_str


@pytest.mark.parametrize("model", ["gpt-4o-mini"])
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
async def test_url_with_format_param_openai(model, sync_mode):
    from openai import AsyncOpenAI, OpenAI

    from litellm import acompletion, completion

    if sync_mode:
        client = OpenAI()
    else:
        client = AsyncOpenAI()

    args = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
                            "format": "image/png",
                        },
                    },
                    {"type": "text", "text": "Describe this image"},
                ],
            }
        ],
    }
    with patch.object(
        client.chat.completions.with_raw_response, "create"
    ) as mock_client:
        try:
            if sync_mode:
                response = completion(**args, client=client)
            else:
                response = await acompletion(**args, client=client)
            print(response)
        except Exception as e:
            print(e)

        mock_client.assert_called()

        print(mock_client.call_args.kwargs)

        json_str = json.dumps(mock_client.call_args.kwargs)

        assert "format" not in json_str


def test_bedrock_latency_optimized_inference():
    from litellm.llms.custom_httpx.http_handler import HTTPHandler

    client = HTTPHandler()
    with patch.object(client, "post") as mock_post:
        try:
            response = litellm.completion(
                model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0",
                messages=[{"role": "user", "content": "Hello, how are you?"}],
                performanceConfig={"latency": "optimized"},
                client=client,
            )
        except Exception as e:
            print(e)

        mock_post.assert_called_once()
        json_data = json.loads(mock_post.call_args.kwargs["data"])
        assert json_data["performanceConfig"]["latency"] == "optimized"

@pytest.fixture(autouse=True)
def set_openrouter_api_key():
    original_api_key = os.environ.get("OPENROUTER_API_KEY")
    os.environ["OPENROUTER_API_KEY"] = "fake-key-for-testing"
    yield
    if original_api_key is not None:
        os.environ["OPENROUTER_API_KEY"] = original_api_key
    else:
        del os.environ["OPENROUTER_API_KEY"]


@pytest.mark.asyncio
async def test_extra_body_with_fallback(respx_mock: respx.MockRouter, set_openrouter_api_key):
    """
    test regression for https://github.com/BerriAI/litellm/issues/8425.

    This was perhaps a wider issue with the acompletion function not passing kwargs such as extra_body correctly when fallbacks are specified.
    """
    # Set up test parameters
    model = "openrouter/deepseek/deepseek-chat"
    messages = [{"role": "user", "content": "Hello, world!"}]
    extra_body = {
        "provider": {
            "order": ["DeepSeek"],
            "allow_fallbacks": False,
            "require_parameters": True
        }
    }
    fallbacks = [
        {
            "model": "openrouter/google/gemini-flash-1.5-8b"
        }
    ]

    respx_mock.post("https://openrouter.ai/api/v1/chat/completions").respond(
        json={
            "id": "chatcmpl-123",
            "object": "chat.completion",
            "created": 1677652288,
            "model": model,
            "choices": [
                {
                    "index": 0,
                    "message": {
                        "role": "assistant",
                        "content": "Hello from mocked response!",
                    },
                    "finish_reason": "stop",
                }
            ],
            "usage": {"prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21},
        }
    )

    response = await litellm.acompletion(
        model=model,
        messages=messages,
        extra_body=extra_body,
        fallbacks=fallbacks,
        api_key="fake-openrouter-api-key",
    )

    # Get the request from the mock
    request: httpx.Request = respx_mock.calls[0].request
    request_body = request.read()
    request_body = json.loads(request_body)

    # Verify basic parameters
    assert request_body["model"] == "deepseek/deepseek-chat"
    assert request_body["messages"] == messages

    # Verify the extra_body parameters remain under the provider key
    assert request_body["provider"]["order"] == ["DeepSeek"]
    assert request_body["provider"]["allow_fallbacks"] is False
    assert request_body["provider"]["require_parameters"] is True

    # Verify the response
    assert response is not None
    assert response.choices[0].message.content == "Hello from mocked response!"


class Test_Chat:
    @pytest.fixture
    def mock_completion(self, mocker) -> MagicMock:
        return mocker.patch.object(litellm.main, "completion")

    def test_calls_completion_without_side_effect_to_params(self, mock_completion):
        params = {}
        chatobj = litellm.main.Chat(params, router_obj=None)
        chatobj.completions.create(
            messages=[{"role": "user", "content": "hello"}],
            model="gemini/gemini-1.5-flash",
            foo="bar",
        )
        chatobj.completions.create(
            messages=[{"role": "user", "content": "hello"}],
            model="gemini/gemini-1.5-flash",
            bar="foo",
        )
        assert mock_completion.call_args_list == [
            call(model="gemini/gemini-1.5-flash", messages=[{"role": "user", "content": "hello"}], foo="bar"),
            call(model="gemini/gemini-1.5-flash", messages=[{"role": "user", "content": "hello"}], bar="foo"),
        ]
        assert params == {}

    @pytest.fixture
    def mock_acompletion(self, mocker) -> MagicMock:
        return mocker.patch.object(litellm.main, "acompletion")

    @pytest.mark.asyncio
    async def test_calls_acompletion_without_side_effect_to_params(self, mock_acompletion):
        params = {"acompletion": True}
        chatobj = litellm.main.Chat(params, router_obj=None)
        await chatobj.completions.create(  # type: ignore
            messages=[{"role": "user", "content": "hello"}],
            model="gemini/gemini-1.5-flash",
            foo="bar",
        )
        await chatobj.completions.create(  # type: ignore
            messages=[{"role": "user", "content": "hello"}],
            model="gemini/gemini-1.5-flash",
            bar="foo",
        )
        assert mock_acompletion.call_args_list == [
            call(model="gemini/gemini-1.5-flash", messages=[{"role": "user", "content": "hello"}], foo="bar"),
            call(model="gemini/gemini-1.5-flash", messages=[{"role": "user", "content": "hello"}], bar="foo"),
        ]
        assert params == {"acompletion": True}

    def test_calls_completion_with_router_obj(self, mocker):
        router_obj = mocker.MagicMock()
        chatobj = litellm.main.Chat({}, router_obj=router_obj)
        chatobj.completions.create(
            messages=[{"role": "user", "content": "hello"}],
            model="gemini/gemini-1.5-flash",
            foo="bar",
        )
        router_obj.completion.assert_called_once_with(
            model="gemini/gemini-1.5-flash",
            messages=[{"role": "user", "content": "hello"}],
            foo="bar"
        )

    @pytest.mark.asyncio
    async def test_calls_acompletion_with_router_obj(self, mocker):
        router_obj = mocker.AsyncMock()
        chatobj = litellm.main.Chat({"acompletion": True}, router_obj=router_obj)
        await chatobj.completions.create(  # type: ignore
            messages=[{"role": "user", "content": "hello"}],
            model="gemini/gemini-1.5-flash",
            foo="bar",
        )
        router_obj.acompletion.assert_called_once_with(
            model="gemini/gemini-1.5-flash",
            messages=[{"role": "user", "content": "hello"}],
            foo="bar"
        )