import json import os import sys import httpx import pytest import respx from fastapi.testclient import TestClient sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path from unittest.mock import MagicMock, call, patch import litellm @pytest.fixture(autouse=True) def add_api_keys_to_env(monkeypatch): monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-ant-api03-1234567890") monkeypatch.setenv("OPENAI_API_KEY", "sk-openai-api03-1234567890") monkeypatch.setenv("AWS_ACCESS_KEY_ID", "my-fake-aws-access-key-id") monkeypatch.setenv("AWS_SECRET_ACCESS_KEY", "my-fake-aws-secret-access-key") monkeypatch.setenv("AWS_REGION", "us-east-1") @pytest.fixture def openai_api_response(): mock_response_data = { "id": "chatcmpl-B0W3vmiM78Xkgx7kI7dr7PC949DMS", "choices": [ { "finish_reason": "stop", "index": 0, "logprobs": None, "message": { "content": "", "refusal": None, "role": "assistant", "audio": None, "function_call": None, "tool_calls": None, }, } ], "created": 1739462947, "model": "gpt-4o-mini-2024-07-18", "object": "chat.completion", "service_tier": "default", "system_fingerprint": "fp_bd83329f63", "usage": { "completion_tokens": 1, "prompt_tokens": 121, "total_tokens": 122, "completion_tokens_details": { "accepted_prediction_tokens": 0, "audio_tokens": 0, "reasoning_tokens": 0, "rejected_prediction_tokens": 0, }, "prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0}, }, } return mock_response_data def test_completion_missing_role(openai_api_response): from openai import OpenAI from litellm.types.utils import ModelResponse client = OpenAI(api_key="test_api_key") mock_raw_response = MagicMock() mock_raw_response.headers = { "x-request-id": "123", "openai-organization": "org-123", "x-ratelimit-limit-requests": "100", "x-ratelimit-remaining-requests": "99", } mock_raw_response.parse.return_value = ModelResponse(**openai_api_response) print(f"openai_api_response: {openai_api_response}") with patch.object( client.chat.completions.with_raw_response, "create", mock_raw_response ) as mock_create: litellm.completion( model="gpt-4o-mini", messages=[ {"role": "user", "content": "Hey"}, { "content": "", "tool_calls": [ { "id": "call_m0vFJjQmTH1McvaHBPR2YFwY", "function": { "arguments": '{"input": "dksjsdkjdhskdjshdskhjkhlk"}', "name": "tool_name", }, "type": "function", "index": 0, }, { "id": "call_Vw6RaqV2n5aaANXEdp5pYxo2", "function": { "arguments": '{"input": "jkljlkjlkjlkjlk"}', "name": "tool_name", }, "type": "function", "index": 1, }, { "id": "call_hBIKwldUEGlNh6NlSXil62K4", "function": { "arguments": '{"input": "jkjlkjlkjlkj;lj"}', "name": "tool_name", }, "type": "function", "index": 2, }, ], }, ], client=client, ) mock_create.assert_called_once() @pytest.mark.parametrize( "model", [ "gemini/gemini-1.5-flash", "bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", "bedrock/invoke/anthropic.claude-3-5-sonnet-20240620-v1:0", "anthropic/claude-3-5-sonnet", ], ) @pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.asyncio async def test_url_with_format_param(model, sync_mode, monkeypatch): from litellm import acompletion, completion from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler if sync_mode: client = HTTPHandler() else: client = AsyncHTTPHandler() args = { "model": model, "messages": [ { "role": "user", "content": [ { "type": "image_url", "image_url": { "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", "format": "image/png", }, }, {"type": "text", "text": "Describe this image"}, ], } ], } with patch.object(client, "post", new=MagicMock()) as mock_client: try: if sync_mode: response = completion(**args, client=client) else: response = await acompletion(**args, client=client) print(response) except Exception as e: print(e) mock_client.assert_called() print(mock_client.call_args.kwargs) if "data" in mock_client.call_args.kwargs: json_str = mock_client.call_args.kwargs["data"] else: json_str = json.dumps(mock_client.call_args.kwargs["json"]) assert "png" in json_str assert "jpeg" not in json_str @pytest.mark.parametrize("model", ["gpt-4o-mini"]) @pytest.mark.parametrize("sync_mode", [True, False]) @pytest.mark.asyncio async def test_url_with_format_param_openai(model, sync_mode): from openai import AsyncOpenAI, OpenAI from litellm import acompletion, completion if sync_mode: client = OpenAI() else: client = AsyncOpenAI() args = { "model": model, "messages": [ { "role": "user", "content": [ { "type": "image_url", "image_url": { "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", "format": "image/png", }, }, {"type": "text", "text": "Describe this image"}, ], } ], } with patch.object( client.chat.completions.with_raw_response, "create" ) as mock_client: try: if sync_mode: response = completion(**args, client=client) else: response = await acompletion(**args, client=client) print(response) except Exception as e: print(e) mock_client.assert_called() print(mock_client.call_args.kwargs) json_str = json.dumps(mock_client.call_args.kwargs) assert "format" not in json_str def test_bedrock_latency_optimized_inference(): from litellm.llms.custom_httpx.http_handler import HTTPHandler client = HTTPHandler() with patch.object(client, "post") as mock_post: try: response = litellm.completion( model="bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0", messages=[{"role": "user", "content": "Hello, how are you?"}], performanceConfig={"latency": "optimized"}, client=client, ) except Exception as e: print(e) mock_post.assert_called_once() json_data = json.loads(mock_post.call_args.kwargs["data"]) assert json_data["performanceConfig"]["latency"] == "optimized" @pytest.fixture(autouse=True) def set_openrouter_api_key(): original_api_key = os.environ.get("OPENROUTER_API_KEY") os.environ["OPENROUTER_API_KEY"] = "fake-key-for-testing" yield if original_api_key is not None: os.environ["OPENROUTER_API_KEY"] = original_api_key else: del os.environ["OPENROUTER_API_KEY"] @pytest.mark.asyncio async def test_extra_body_with_fallback(respx_mock: respx.MockRouter, set_openrouter_api_key): """ test regression for https://github.com/BerriAI/litellm/issues/8425. This was perhaps a wider issue with the acompletion function not passing kwargs such as extra_body correctly when fallbacks are specified. """ # Set up test parameters model = "openrouter/deepseek/deepseek-chat" messages = [{"role": "user", "content": "Hello, world!"}] extra_body = { "provider": { "order": ["DeepSeek"], "allow_fallbacks": False, "require_parameters": True } } fallbacks = [ { "model": "openrouter/google/gemini-flash-1.5-8b" } ] respx_mock.post("https://openrouter.ai/api/v1/chat/completions").respond( json={ "id": "chatcmpl-123", "object": "chat.completion", "created": 1677652288, "model": model, "choices": [ { "index": 0, "message": { "role": "assistant", "content": "Hello from mocked response!", }, "finish_reason": "stop", } ], "usage": {"prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21}, } ) response = await litellm.acompletion( model=model, messages=messages, extra_body=extra_body, fallbacks=fallbacks, api_key="fake-openrouter-api-key", ) # Get the request from the mock request: httpx.Request = respx_mock.calls[0].request request_body = request.read() request_body = json.loads(request_body) # Verify basic parameters assert request_body["model"] == "deepseek/deepseek-chat" assert request_body["messages"] == messages # Verify the extra_body parameters remain under the provider key assert request_body["provider"]["order"] == ["DeepSeek"] assert request_body["provider"]["allow_fallbacks"] is False assert request_body["provider"]["require_parameters"] is True # Verify the response assert response is not None assert response.choices[0].message.content == "Hello from mocked response!" class Test_Chat: @pytest.fixture def mock_completion(self, mocker) -> MagicMock: return mocker.patch.object(litellm.main, "completion") def test_calls_completion_without_side_effect_to_params(self, mock_completion): params = {} chatobj = litellm.main.Chat(params, router_obj=None) chatobj.completions.create( messages=[{"role": "user", "content": "hello"}], model="gemini/gemini-1.5-flash", foo="bar", ) chatobj.completions.create( messages=[{"role": "user", "content": "hello"}], model="gemini/gemini-1.5-flash", bar="foo", ) assert mock_completion.call_args_list == [ call(model="gemini/gemini-1.5-flash", messages=[{"role": "user", "content": "hello"}], foo="bar"), call(model="gemini/gemini-1.5-flash", messages=[{"role": "user", "content": "hello"}], bar="foo"), ] assert params == {} @pytest.fixture def mock_acompletion(self, mocker) -> MagicMock: return mocker.patch.object(litellm.main, "acompletion") @pytest.mark.asyncio async def test_calls_acompletion_without_side_effect_to_params(self, mock_acompletion): params = {"acompletion": True} chatobj = litellm.main.Chat(params, router_obj=None) await chatobj.completions.create( # type: ignore messages=[{"role": "user", "content": "hello"}], model="gemini/gemini-1.5-flash", foo="bar", ) await chatobj.completions.create( # type: ignore messages=[{"role": "user", "content": "hello"}], model="gemini/gemini-1.5-flash", bar="foo", ) assert mock_acompletion.call_args_list == [ call(model="gemini/gemini-1.5-flash", messages=[{"role": "user", "content": "hello"}], foo="bar"), call(model="gemini/gemini-1.5-flash", messages=[{"role": "user", "content": "hello"}], bar="foo"), ] assert params == {"acompletion": True} def test_calls_completion_with_router_obj(self, mocker): router_obj = mocker.MagicMock() chatobj = litellm.main.Chat({}, router_obj=router_obj) chatobj.completions.create( messages=[{"role": "user", "content": "hello"}], model="gemini/gemini-1.5-flash", foo="bar", ) router_obj.completion.assert_called_once_with( model="gemini/gemini-1.5-flash", messages=[{"role": "user", "content": "hello"}], foo="bar" ) @pytest.mark.asyncio async def test_calls_acompletion_with_router_obj(self, mocker): router_obj = mocker.AsyncMock() chatobj = litellm.main.Chat({"acompletion": True}, router_obj=router_obj) await chatobj.completions.create( # type: ignore messages=[{"role": "user", "content": "hello"}], model="gemini/gemini-1.5-flash", foo="bar", ) router_obj.acompletion.assert_called_once_with( model="gemini/gemini-1.5-flash", messages=[{"role": "user", "content": "hello"}], foo="bar" )