Improve mocking in test_proxy_exception_mapping

Mock the calls to the backend and assert that the correct parameters are passed to the backend.
2025-04-26 11:14:04 +00:00 · 2024-05-02 14:42:20 -07:00 · 2024-05-02 14:42:20 -07:00 · cdb39e90ce
commit cdb39e90ce
parent f893f8bc55
1 changed files with 59 additions and 12 deletions
--- a/litellm/tests/test_proxy_exception_mapping.py
+++ b/litellm/tests/test_proxy_exception_mapping.py
@ -1,6 +1,8 @@
 # test that the proxy actually does exception mapping to the OpenAI format

 import sys, os
+from unittest import mock
+import json
 from dotenv import load_dotenv

 load_dotenv()
@ -12,13 +14,30 @@ sys.path.insert(
 import pytest
 import litellm, openai
 from fastapi.testclient import TestClient
-from fastapi import FastAPI
+from fastapi import Response
 from litellm.proxy.proxy_server import (
    router,
    save_worker_config,
    initialize,
 )  # Replace with the actual module where your FastAPI router is defined

+invalid_authentication_error_response = Response(
+    status_code=401,
+    content=json.dumps({"error": "Invalid Authentication"}),
+)
+context_length_exceeded_error_response_dict = {
+    "error": {
+        "message": "AzureException - Error code: 400 - {'error': {'message': \"This model's maximum context length is 4096 tokens. However, your messages resulted in 10007 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}",
+        "type": None,
+        "param": None,
+        "code": 400,
+    },
+}
+context_length_exceeded_error_response = Response(
+    status_code=400,
+    content=json.dumps(context_length_exceeded_error_response_dict),
+)
+

@pytest.fixture
 def client():
@ -60,7 +79,11 @@ def test_chat_completion_exception(client):


 # raise openai.AuthenticationError
-def test_chat_completion_exception_azure(client):
+@mock.patch(
+    "litellm.proxy.proxy_server.llm_router.acompletion",
+    return_value=invalid_authentication_error_response,
+)
+def test_chat_completion_exception_azure(mock_acompletion, client):
    try:
        # Your test data
        test_data = {
@ -73,6 +96,15 @@ def test_chat_completion_exception_azure(client):

        response = client.post("/chat/completions", json=test_data)

+        mock_acompletion.assert_called_once_with(
+            **test_data,
+            litellm_call_id=mock.ANY,
+            litellm_logging_obj=mock.ANY,
+            request_timeout=mock.ANY,
+            metadata=mock.ANY,
+            proxy_server_request=mock.ANY,
+        )
+
        json_response = response.json()
        print("keys in json response", json_response.keys())
        assert json_response.keys() == {"error"}
@ -90,12 +122,21 @@ def test_chat_completion_exception_azure(client):


 # raise openai.AuthenticationError
-def test_embedding_auth_exception_azure(client):
+@mock.patch(
+    "litellm.proxy.proxy_server.llm_router.aembedding",
+    return_value=invalid_authentication_error_response,
+)
+def test_embedding_auth_exception_azure(mock_aembedding, client):
    try:
        # Your test data
        test_data = {"model": "azure-embedding", "input": ["hi"]}

        response = client.post("/embeddings", json=test_data)
+        mock_aembedding.assert_called_once_with(
+            **test_data,
+            metadata=mock.ANY,
+            proxy_server_request=mock.ANY,
+        )
        print("Response from proxy=", response)

        json_response = response.json()
@ -204,7 +245,11 @@ def test_embedding_exception_any_model(client):


 # raise openai.BadRequestError
-def test_chat_completion_exception_azure_context_window(client):
+@mock.patch(
+    "litellm.proxy.proxy_server.llm_router.acompletion",
+    return_value=context_length_exceeded_error_response,
+)
+def test_chat_completion_exception_azure_context_window(mock_acompletion, client):
    try:
        # Your test data
        test_data = {
@ -219,20 +264,22 @@ def test_chat_completion_exception_azure_context_window(client):
        response = client.post("/chat/completions", json=test_data)
        print("got response from server", response)

+        mock_acompletion.assert_called_once_with(
+            **test_data,
+            litellm_call_id=mock.ANY,
+            litellm_logging_obj=mock.ANY,
+            request_timeout=mock.ANY,
+            metadata=mock.ANY,
+            proxy_server_request=mock.ANY,
+        )
+
        json_response = response.json()

        print("keys in json response", json_response.keys())

        assert json_response.keys() == {"error"}

-        assert json_response == {
-            "error": {
-                "message": "AzureException - Error code: 400 - {'error': {'message': \"This model's maximum context length is 4096 tokens. However, your messages resulted in 10007 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}",
-                "type": None,
-                "param": None,
-                "code": 400,
-            }
-        }
+        assert json_response == context_length_exceeded_error_response_dict

        # make an openai client to call _make_status_error_from_response
        openai_client = openai.OpenAI(api_key="anything")