litellm/tests/local_testing/test_proxy_exception_mapping.py

# test that the proxy actually does exception mapping to the OpenAI format

import json
import os
import sys
from unittest import mock

from dotenv import load_dotenv

load_dotenv()
import asyncio
import io
import os

sys.path.insert(
    0, os.path.abspath("../..")
)  # Adds the parent directory to the system path
import openai
import pytest
from fastapi import Response
from fastapi.testclient import TestClient

import litellm
from litellm.proxy.proxy_server import (  # Replace with the actual module where your FastAPI router is defined
    initialize,
    router,
    save_worker_config,
)

invalid_authentication_error_response = Response(
    status_code=401,
    content=json.dumps({"error": "Invalid Authentication"}),
)
context_length_exceeded_error_response_dict = {
    "error": {
        "message": "AzureException - Error code: 400 - {'error': {'message': \"This model's maximum context length is 4096 tokens. However, your messages resulted in 10007 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}",
        "type": None,
        "param": None,
        "code": 400,
    },
}
context_length_exceeded_error_response = Response(
    status_code=400,
    content=json.dumps(context_length_exceeded_error_response_dict),
)


@pytest.fixture
def client():
    filepath = os.path.dirname(os.path.abspath(__file__))
    config_fp = f"{filepath}/test_configs/test_bad_config.yaml"
    asyncio.run(initialize(config=config_fp))
    from litellm.proxy.proxy_server import app

    return TestClient(app)


# raise openai.AuthenticationError
def test_chat_completion_exception(client):
    try:
        # Your test data
        test_data = {
            "model": "gpt-3.5-turbo",
            "messages": [
                {"role": "user", "content": "hi"},
            ],
            "max_tokens": 10,
        }

        response = client.post("/chat/completions", json=test_data)

        json_response = response.json()
        print("keys in json response", json_response.keys())
        assert json_response.keys() == {"error"}
        print("ERROR=", json_response["error"])
        assert isinstance(json_response["error"]["message"], str)
        assert (
            "litellm.AuthenticationError: AuthenticationError"
            in json_response["error"]["message"]
        )

        code_in_error = json_response["error"]["code"]
        # OpenAI SDK required code to be STR, https://github.com/BerriAI/litellm/issues/4970
        # If we look on official python OpenAI lib, the code should be a string:
        # https://github.com/openai/openai-python/blob/195c05a64d39c87b2dfdf1eca2d339597f1fce03/src/openai/types/shared/error_object.py#L11
        # Related LiteLLM issue: https://github.com/BerriAI/litellm/discussions/4834
        assert type(code_in_error) == str

        # make an openai client to call _make_status_error_from_response
        openai_client = openai.OpenAI(api_key="anything")
        openai_exception = openai_client._make_status_error_from_response(
            response=response
        )
        assert isinstance(openai_exception, openai.AuthenticationError)

    except Exception as e:
        pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}")


# raise openai.AuthenticationError
@mock.patch(
    "litellm.proxy.proxy_server.llm_router.acompletion",
    return_value=invalid_authentication_error_response,
)
def test_chat_completion_exception_azure(mock_acompletion, client):
    try:
        # Your test data
        test_data = {
            "model": "azure-gpt-3.5-turbo",
            "messages": [
                {"role": "user", "content": "hi"},
            ],
            "max_tokens": 10,
        }

        response = client.post("/chat/completions", json=test_data)

        mock_acompletion.assert_called_once_with(
            **test_data,
            litellm_call_id=mock.ANY,
            litellm_logging_obj=mock.ANY,
            request_timeout=mock.ANY,
            metadata=mock.ANY,
            proxy_server_request=mock.ANY,
        )

        json_response = response.json()
        print("keys in json response", json_response.keys())
        assert json_response.keys() == {"error"}

        # make an openai client to call _make_status_error_from_response
        openai_client = openai.OpenAI(api_key="anything")
        openai_exception = openai_client._make_status_error_from_response(
            response=response
        )
        print(openai_exception)
        assert isinstance(openai_exception, openai.AuthenticationError)

    except Exception as e:
        pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}")


# raise openai.AuthenticationError
@mock.patch(
    "litellm.proxy.proxy_server.llm_router.aembedding",
    return_value=invalid_authentication_error_response,
)
def test_embedding_auth_exception_azure(mock_aembedding, client):
    try:
        # Your test data
        test_data = {"model": "azure-embedding", "input": ["hi"]}

        response = client.post("/embeddings", json=test_data)
        mock_aembedding.assert_called_once_with(
            **test_data,
            metadata=mock.ANY,
            proxy_server_request=mock.ANY,
        )
        print("Response from proxy=", response)

        json_response = response.json()
        print("keys in json response", json_response.keys())
        assert json_response.keys() == {"error"}

        # make an openai client to call _make_status_error_from_response
        openai_client = openai.OpenAI(api_key="anything")
        openai_exception = openai_client._make_status_error_from_response(
            response=response
        )
        print("Exception raised=", openai_exception)
        assert isinstance(openai_exception, openai.AuthenticationError)

    except Exception as e:
        pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}")


# raise openai.BadRequestError
# chat/completions openai
def test_exception_openai_bad_model(client):
    try:
        # Your test data
        test_data = {
            "model": "azure/GPT-12",
            "messages": [
                {"role": "user", "content": "hi"},
            ],
            "max_tokens": 10,
        }

        response = client.post("/chat/completions", json=test_data)

        json_response = response.json()
        print("keys in json response", json_response.keys())
        assert json_response.keys() == {"error"}

        # make an openai client to call _make_status_error_from_response
        openai_client = openai.OpenAI(api_key="anything")
        openai_exception = openai_client._make_status_error_from_response(
            response=response
        )
        print("Type of exception=", type(openai_exception))
        assert isinstance(openai_exception, openai.BadRequestError)

    except Exception as e:
        pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}")


# chat/completions any model
def test_chat_completion_exception_any_model(client):
    try:
        # Your test data
        test_data = {
            "model": "Lite-GPT-12",
            "messages": [
                {"role": "user", "content": "hi"},
            ],
            "max_tokens": 10,
        }

        response = client.post("/chat/completions", json=test_data)

        json_response = response.json()
        assert json_response.keys() == {"error"}

        # make an openai client to call _make_status_error_from_response
        openai_client = openai.OpenAI(api_key="anything")
        openai_exception = openai_client._make_status_error_from_response(
            response=response
        )
        assert isinstance(openai_exception, openai.BadRequestError)
        _error_message = openai_exception.message
        assert (
            "/chat/completions: Invalid model name passed in model=Lite-GPT-12"
            in str(_error_message)
        )

    except Exception as e:
        pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}")


# embeddings any model
def test_embedding_exception_any_model(client):
    try:
        # Your test data
        test_data = {"model": "Lite-GPT-12", "input": ["hi"]}

        response = client.post("/embeddings", json=test_data)
        print("Response from proxy=", response)
        print(response.json())

        json_response = response.json()
        print("keys in json response", json_response.keys())
        assert json_response.keys() == {"error"}

        # make an openai client to call _make_status_error_from_response
        openai_client = openai.OpenAI(api_key="anything")
        openai_exception = openai_client._make_status_error_from_response(
            response=response
        )
        print("Exception raised=", openai_exception)
        assert isinstance(openai_exception, openai.BadRequestError)
        _error_message = openai_exception.message
        assert "/embeddings: Invalid model name passed in model=Lite-GPT-12" in str(
            _error_message
        )

    except Exception as e:
        pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}")


# raise openai.BadRequestError
@mock.patch(
    "litellm.proxy.proxy_server.llm_router.acompletion",
    return_value=context_length_exceeded_error_response,
)
def test_chat_completion_exception_azure_context_window(mock_acompletion, client):
    try:
        # Your test data
        test_data = {
            "model": "working-azure-gpt-3.5-turbo",
            "messages": [
                {"role": "user", "content": "hi" * 10000},
            ],
            "max_tokens": 10,
        }
        response = None

        response = client.post("/chat/completions", json=test_data)
        print("got response from server", response)

        mock_acompletion.assert_called_once_with(
            **test_data,
            litellm_call_id=mock.ANY,
            litellm_logging_obj=mock.ANY,
            request_timeout=mock.ANY,
            metadata=mock.ANY,
            proxy_server_request=mock.ANY,
        )

        json_response = response.json()

        print("keys in json response", json_response.keys())

        assert json_response.keys() == {"error"}

        assert json_response == context_length_exceeded_error_response_dict

        # make an openai client to call _make_status_error_from_response
        openai_client = openai.OpenAI(api_key="anything")
        openai_exception = openai_client._make_status_error_from_response(
            response=response
        )
        print("exception from proxy", openai_exception)
        assert isinstance(openai_exception, openai.BadRequestError)
        print("passed exception is of type BadRequestError")

    except Exception as e:
        pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}")