(router testing) Add testing coverage for run_async_fallback and run_sync_fallback (#6256)

* add type hints for run_async_fallback * fix async fallback doc string * test run_async_fallback
2024-10-16 16:16:17 +05:30 · 2024-10-16 16:16:17 +05:30 · fc5b75d171
commit fc5b75d171
parent d9a71650e3
2 changed files with 388 additions and 2 deletions
--- a/tests/local_testing/test_router_fallback_handlers.py
+++ b/tests/local_testing/test_router_fallback_handlers.py
@ -0,0 +1,351 @@
+import asyncio
+import os
+import sys
+import time
+import traceback
+
+import pytest
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import litellm
+from litellm import Router
+from litellm.integrations.custom_logger import CustomLogger
+from typing import Any, Dict
+
+
+import sys
+import os
+from typing import List, Dict
+
+sys.path.insert(0, os.path.abspath("../.."))
+
+from litellm.router_utils.fallback_event_handlers import (
+    run_async_fallback,
+    run_sync_fallback,
+    log_success_fallback_event,
+    log_failure_fallback_event,
+)
+
+
+# Helper function to create a Router instance
+def create_test_router():
+    return Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "gpt-3.5-turbo",
+                    "api_key": os.getenv("OPENAI_API_KEY"),
+                },
+            },
+            {
+                "model_name": "gpt-4",
+                "litellm_params": {
+                    "model": "gpt-4",
+                    "api_key": os.getenv("OPENAI_API_KEY"),
+                },
+            },
+        ],
+        fallbacks=[{"gpt-3.5-turbo": ["gpt-4"]}],
+    )
+
+
+router: Router = create_test_router()
+
+
+@pytest.mark.parametrize(
+    "original_function",
+    [router._acompletion, router._atext_completion, router._aembedding],
+)
+@pytest.mark.asyncio
+async def test_run_async_fallback(original_function):
+    """
+    Basic test - given a list of fallback models, run the original function with the fallback models
+    """
+    litellm.set_verbose = True
+    fallback_model_group = ["gpt-4"]
+    original_model_group = "gpt-3.5-turbo"
+    original_exception = litellm.exceptions.InternalServerError(
+        message="Simulated error",
+        llm_provider="openai",
+        model="gpt-3.5-turbo",
+    )
+
+    request_kwargs = {
+        "mock_response": "hello this is a test for run_async_fallback",
+        "metadata": {"previous_models": ["gpt-3.5-turbo"]},
+    }
+
+    if original_function == router._aembedding:
+        request_kwargs["input"] = "hello this is a test for run_async_fallback"
+    elif original_function == router._atext_completion:
+        request_kwargs["prompt"] = "hello this is a test for run_async_fallback"
+    elif original_function == router._acompletion:
+        request_kwargs["messages"] = [{"role": "user", "content": "Hello, world!"}]
+
+    result = await run_async_fallback(
+        router,
+        original_function=original_function,
+        num_retries=1,
+        fallback_model_group=fallback_model_group,
+        original_model_group=original_model_group,
+        original_exception=original_exception,
+        **request_kwargs
+    )
+
+    assert result is not None
+
+    if original_function == router._acompletion:
+        assert isinstance(result, litellm.ModelResponse)
+    elif original_function == router._atext_completion:
+        assert isinstance(result, litellm.TextCompletionResponse)
+    elif original_function == router._aembedding:
+        assert isinstance(result, litellm.EmbeddingResponse)
+
+
+@pytest.mark.parametrize("original_function", [router._completion, router._embedding])
+def test_run_sync_fallback(original_function):
+    litellm.set_verbose = True
+    fallback_model_group = ["gpt-4"]
+    original_model_group = "gpt-3.5-turbo"
+    original_exception = litellm.exceptions.InternalServerError(
+        message="Simulated error",
+        llm_provider="openai",
+        model="gpt-3.5-turbo",
+    )
+
+    request_kwargs = {
+        "mock_response": "hello this is a test for run_async_fallback",
+        "metadata": {"previous_models": ["gpt-3.5-turbo"]},
+    }
+
+    if original_function == router._embedding:
+        request_kwargs["input"] = "hello this is a test for run_async_fallback"
+    elif original_function == router._completion:
+        request_kwargs["messages"] = [{"role": "user", "content": "Hello, world!"}]
+    result = run_sync_fallback(
+        router,
+        original_function=original_function,
+        num_retries=1,
+        fallback_model_group=fallback_model_group,
+        original_model_group=original_model_group,
+        original_exception=original_exception,
+        **request_kwargs
+    )
+
+    assert result is not None
+
+    if original_function == router._completion:
+        assert isinstance(result, litellm.ModelResponse)
+    elif original_function == router._embedding:
+        assert isinstance(result, litellm.EmbeddingResponse)
+
+
+class CustomTestLogger(CustomLogger):
+    def __init__(self):
+        super().__init__()
+        self.success_fallback_events = []
+        self.failure_fallback_events = []
+
+    async def log_success_fallback_event(
+        self, original_model_group, kwargs, original_exception
+    ):
+        print(
+            "in log_success_fallback_event for original_model_group: ",
+            original_model_group,
+        )
+        self.success_fallback_events.append(
+            (original_model_group, kwargs, original_exception)
+        )
+
+    async def log_failure_fallback_event(
+        self, original_model_group, kwargs, original_exception
+    ):
+        print(
+            "in log_failure_fallback_event for original_model_group: ",
+            original_model_group,
+        )
+        self.failure_fallback_events.append(
+            (original_model_group, kwargs, original_exception)
+        )
+
+
+@pytest.mark.asyncio
+async def test_log_success_fallback_event():
+    """
+    Tests that successful fallback events are logged correctly
+    """
+    original_model_group = "gpt-3.5-turbo"
+    kwargs = {"messages": [{"role": "user", "content": "Hello, world!"}]}
+    original_exception = litellm.exceptions.InternalServerError(
+        message="Simulated error",
+        llm_provider="openai",
+        model="gpt-3.5-turbo",
+    )
+
+    logger = CustomTestLogger()
+    litellm.callbacks = [logger]
+
+    # This test mainly checks if the function runs without errors
+    await log_success_fallback_event(original_model_group, kwargs, original_exception)
+
+    await asyncio.sleep(0.5)
+    assert len(logger.success_fallback_events) == 1
+    assert len(logger.failure_fallback_events) == 0
+    assert logger.success_fallback_events[0] == (
+        original_model_group,
+        kwargs,
+        original_exception,
+    )
+
+
+@pytest.mark.asyncio
+async def test_log_failure_fallback_event():
+    """
+    Tests that failed fallback events are logged correctly
+    """
+    original_model_group = "gpt-3.5-turbo"
+    kwargs = {"messages": [{"role": "user", "content": "Hello, world!"}]}
+    original_exception = litellm.exceptions.InternalServerError(
+        message="Simulated error",
+        llm_provider="openai",
+        model="gpt-3.5-turbo",
+    )
+
+    logger = CustomTestLogger()
+    litellm.callbacks = [logger]
+
+    # This test mainly checks if the function runs without errors
+    await log_failure_fallback_event(original_model_group, kwargs, original_exception)
+
+    await asyncio.sleep(0.5)
+
+    assert len(logger.failure_fallback_events) == 1
+    assert len(logger.success_fallback_events) == 0
+    assert logger.failure_fallback_events[0] == (
+        original_model_group,
+        kwargs,
+        original_exception,
+    )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "original_function", [router._acompletion, router._atext_completion]
+)
+async def test_failed_fallbacks_raise_most_recent_exception(original_function):
+    """
+    Tests that if all fallbacks fail, the most recent occuring exception is raised
+
+    meaning the exception from the last fallback model is raised
+    """
+    fallback_model_group = ["gpt-4"]
+    original_model_group = "gpt-3.5-turbo"
+    original_exception = litellm.exceptions.InternalServerError(
+        message="Simulated error",
+        llm_provider="openai",
+        model="gpt-3.5-turbo",
+    )
+
+    request_kwargs: Dict[str, Any] = {
+        "metadata": {"previous_models": ["gpt-3.5-turbo"]}
+    }
+
+    if original_function == router._aembedding:
+        request_kwargs["input"] = "hello this is a test for run_async_fallback"
+    elif original_function == router._atext_completion:
+        request_kwargs["prompt"] = "hello this is a test for run_async_fallback"
+    elif original_function == router._acompletion:
+        request_kwargs["messages"] = [{"role": "user", "content": "Hello, world!"}]
+
+    with pytest.raises(litellm.exceptions.RateLimitError):
+        await run_async_fallback(
+            router,
+            original_function=original_function,
+            num_retries=1,
+            fallback_model_group=fallback_model_group,
+            original_model_group=original_model_group,
+            original_exception=original_exception,
+            mock_response="litellm.RateLimitError",
+            **request_kwargs
+        )
+
+
+router_2 = Router(
+    model_list=[
+        {
+            "model_name": "gpt-3.5-turbo",
+            "litellm_params": {
+                "model": "gpt-3.5-turbo",
+                "api_key": os.getenv("OPENAI_API_KEY"),
+            },
+        },
+        {
+            "model_name": "gpt-4",
+            "litellm_params": {
+                "model": "gpt-4",
+                "api_key": "very-fake-key",
+            },
+        },
+        {
+            "model_name": "fake-openai-endpoint-2",
+            "litellm_params": {
+                "model": "openai/fake-openai-endpoint-2",
+                "api_key": "working-key-since-this-is-fake-endpoint",
+                "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+            },
+        },
+    ],
+)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "original_function", [router_2._acompletion, router_2._atext_completion]
+)
+async def test_multiple_fallbacks(original_function):
+    """
+    Tests that if multiple fallbacks passed:
+    - fallback 1 = bad configured deployment / failing endpoint
+    - fallback 2 = working deployment / working endpoint
+
+    Assert that:
+    - a success response is received from the working endpoint (fallback 2)
+    """
+    fallback_model_group = ["gpt-4", "fake-openai-endpoint-2"]
+    original_model_group = "gpt-3.5-turbo"
+    original_exception = Exception("Simulated error")
+
+    request_kwargs: Dict[str, Any] = {
+        "metadata": {"previous_models": ["gpt-3.5-turbo"]}
+    }
+
+    if original_function == router_2._aembedding:
+        request_kwargs["input"] = "hello this is a test for run_async_fallback"
+    elif original_function == router_2._atext_completion:
+        request_kwargs["prompt"] = "hello this is a test for run_async_fallback"
+    elif original_function == router_2._acompletion:
+        request_kwargs["messages"] = [{"role": "user", "content": "Hello, world!"}]
+
+    result = await run_async_fallback(
+        router_2,
+        original_function=original_function,
+        num_retries=1,
+        fallback_model_group=fallback_model_group,
+        original_model_group=original_model_group,
+        original_exception=original_exception,
+        **request_kwargs
+    )
+
+    print(result)
+
+    print(result._hidden_params)
+
+    assert (
+        result._hidden_params["api_base"]
+        == "https://exampleopenaiendpoint-production.up.railway.app/"
+    )