diff --git a/litellm/tests/test_proxy_exception_mapping.py b/litellm/tests/test_proxy_exception_mapping.py index 0cc7b0d30..ccd071d01 100644 --- a/litellm/tests/test_proxy_exception_mapping.py +++ b/litellm/tests/test_proxy_exception_mapping.py @@ -1,6 +1,8 @@ # test that the proxy actually does exception mapping to the OpenAI format import sys, os +from unittest import mock +import json from dotenv import load_dotenv load_dotenv() @@ -12,13 +14,30 @@ sys.path.insert( import pytest import litellm, openai from fastapi.testclient import TestClient -from fastapi import FastAPI +from fastapi import Response from litellm.proxy.proxy_server import ( router, save_worker_config, initialize, ) # Replace with the actual module where your FastAPI router is defined +invalid_authentication_error_response = Response( + status_code=401, + content=json.dumps({"error": "Invalid Authentication"}), +) +context_length_exceeded_error_response_dict = { + "error": { + "message": "AzureException - Error code: 400 - {'error': {'message': \"This model's maximum context length is 4096 tokens. However, your messages resulted in 10007 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}", + "type": None, + "param": None, + "code": 400, + }, +} +context_length_exceeded_error_response = Response( + status_code=400, + content=json.dumps(context_length_exceeded_error_response_dict), +) + @pytest.fixture def client(): @@ -60,7 +79,11 @@ def test_chat_completion_exception(client): # raise openai.AuthenticationError -def test_chat_completion_exception_azure(client): +@mock.patch( + "litellm.proxy.proxy_server.llm_router.acompletion", + return_value=invalid_authentication_error_response, +) +def test_chat_completion_exception_azure(mock_acompletion, client): try: # Your test data test_data = { @@ -73,6 +96,15 @@ def test_chat_completion_exception_azure(client): response = client.post("/chat/completions", json=test_data) + mock_acompletion.assert_called_once_with( + **test_data, + litellm_call_id=mock.ANY, + litellm_logging_obj=mock.ANY, + request_timeout=mock.ANY, + metadata=mock.ANY, + proxy_server_request=mock.ANY, + ) + json_response = response.json() print("keys in json response", json_response.keys()) assert json_response.keys() == {"error"} @@ -90,12 +122,21 @@ def test_chat_completion_exception_azure(client): # raise openai.AuthenticationError -def test_embedding_auth_exception_azure(client): +@mock.patch( + "litellm.proxy.proxy_server.llm_router.aembedding", + return_value=invalid_authentication_error_response, +) +def test_embedding_auth_exception_azure(mock_aembedding, client): try: # Your test data test_data = {"model": "azure-embedding", "input": ["hi"]} response = client.post("/embeddings", json=test_data) + mock_aembedding.assert_called_once_with( + **test_data, + metadata=mock.ANY, + proxy_server_request=mock.ANY, + ) print("Response from proxy=", response) json_response = response.json() @@ -204,7 +245,11 @@ def test_embedding_exception_any_model(client): # raise openai.BadRequestError -def test_chat_completion_exception_azure_context_window(client): +@mock.patch( + "litellm.proxy.proxy_server.llm_router.acompletion", + return_value=context_length_exceeded_error_response, +) +def test_chat_completion_exception_azure_context_window(mock_acompletion, client): try: # Your test data test_data = { @@ -219,20 +264,22 @@ def test_chat_completion_exception_azure_context_window(client): response = client.post("/chat/completions", json=test_data) print("got response from server", response) + mock_acompletion.assert_called_once_with( + **test_data, + litellm_call_id=mock.ANY, + litellm_logging_obj=mock.ANY, + request_timeout=mock.ANY, + metadata=mock.ANY, + proxy_server_request=mock.ANY, + ) + json_response = response.json() print("keys in json response", json_response.keys()) assert json_response.keys() == {"error"} - assert json_response == { - "error": { - "message": "AzureException - Error code: 400 - {'error': {'message': \"This model's maximum context length is 4096 tokens. However, your messages resulted in 10007 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}", - "type": None, - "param": None, - "code": 400, - } - } + assert json_response == context_length_exceeded_error_response_dict # make an openai client to call _make_status_error_from_response openai_client = openai.OpenAI(api_key="anything")