fix(router.py): check if azure returns 'content_filter' response + fallback available -> fallback

Exception maps azure content filter response exceptions
2025-04-25 18:54:30 +00:00 · 2024-06-22 19:10:15 -07:00 · 2024-06-22 19:10:15 -07:00 · 2c7a80d08d
commit 2c7a80d08d
parent f814f24d9d
8 changed files with 100 additions and 70 deletions
--- a/litellm/main.py
+++ b/litellm/main.py
@ -477,6 +477,9 @@ def mock_completion(
        if time_delay is not None:
            time.sleep(time_delay)

+        if isinstance(mock_response, dict):
+            return ModelResponse(**mock_response)
+
        model_response = ModelResponse(stream=stream)
        if stream is True:
            # don't try to access stream object,
--- a/litellm/proxy/_experimental/out/404.html
+++ b/litellm/proxy/_experimental/out/404.html
--- a/litellm/proxy/_experimental/out/model_hub.html
+++ b/litellm/proxy/_experimental/out/model_hub.html
--- a/litellm/proxy/_experimental/out/onboarding.html
+++ b/litellm/proxy/_experimental/out/onboarding.html
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,61 +1,14 @@
-environment_variables:
-  LANGFUSE_PUBLIC_KEY: Q6K8MQN6L7sPYSJiFKM9eNrETOx6V/FxVPup4FqdKsZK1hyR4gyanlQ2KHLg5D5afng99uIt0JCEQ2jiKF9UxFvtnb4BbJ4qpeceH+iK8v/bdg==
-  LANGFUSE_SECRET_KEY: 5xQ7KMa6YMLsm+H/Pf1VmlqWq1NON5IoCxABhkUBeSck7ftsj2CmpkL2ZwrxwrktgiTUBH+3gJYBX+XBk7lqOOUpvmiLjol/E5lCqq0M1CqLWA==
-  SLACK_WEBHOOK_URL: RJjhS0Hhz0/s07sCIf1OTXmTGodpK9L2K9p953Z+fOX0l2SkPFT6mB9+yIrLufmlwEaku5NNEBKy//+AG01yOd+7wV1GhK65vfj3B/gTN8t5cuVnR4vFxKY5Rx4eSGLtzyAs+aIBTp4GoNXDIjroCqfCjPkItEZWCg==
-general_settings:
-  alerting:
-  - slack
-  alerting_threshold: 300
-  database_connection_pool_limit: 100
-  database_connection_timeout: 60
-  disable_master_key_return: true
-  health_check_interval: 300
-  proxy_batch_write_at: 60
-  ui_access_mode: all
-  # master_key: sk-1234
-litellm_settings:
-  allowed_fails: 3
-  failure_callback:
-  - prometheus
-  num_retries: 3
-  service_callback:
-  - prometheus_system
-  success_callback:
-  - langfuse
-  - prometheus
-  - langsmith
 model_list: 
- litellm_params:
+  - model_name: my-fake-model
+    litellm_params:
      model: gpt-3.5-turbo
-  model_name: gpt-3.5-turbo
- litellm_params:
-    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
      api_key: my-fake-key
-    model: openai/my-fake-model
-    stream_timeout: 0.001
-  model_name: fake-openai-endpoint
- litellm_params:
-    api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/
-    api_key: my-fake-key
-    model: openai/my-fake-model-2
-    stream_timeout: 0.001
-  model_name: fake-openai-endpoint
- litellm_params:
-    api_base: os.environ/AZURE_API_BASE
-    api_key: os.environ/AZURE_API_KEY
-    api_version: 2023-07-01-preview
-    model: azure/chatgpt-v-2
-    stream_timeout: 0.001
-  model_name: azure-gpt-3.5
- litellm_params:
-    api_key: os.environ/OPENAI_API_KEY
-    model: text-embedding-ada-002
-  model_name: text-embedding-ada-002
- litellm_params:
-    model: text-completion-openai/gpt-3.5-turbo-instruct
-  model_name: gpt-instruct
-router_settings:
-  enable_pre_call_checks: true
-  redis_host: os.environ/REDIS_HOST
-  redis_password: os.environ/REDIS_PASSWORD
-  redis_port: os.environ/REDIS_PORT
+      mock_response: hello-world
+  - model_name: gpt-4o
+    litellm_params:
+      model: azure/gpt-4o
+      api_base: https://litellm8397336933.openai.azure.com/
+      api_key: 610f806211ab47f2a694493000045858
+
+litellm_settings:
+  content_policy_fallbacks: [{"gpt-4o": ["my-fake-model"]}]
--- a/litellm/router.py
+++ b/litellm/router.py
@ -562,6 +562,18 @@ class Router:
                f"litellm.completion(model={model_name})\033[32m 200 OK\033[0m"
            )

+            ## CHECK CONTENT FILTER ERROR ##
+            if isinstance(response, ModelResponse):
+                _should_raise = self._should_raise_content_policy_error(
+                    model=model, response=response, kwargs=kwargs
+                )
+                if _should_raise:
+                    raise litellm.ContentPolicyViolationError(
+                        message="Response output was blocked.",
+                        model=model,
+                        llm_provider="",
+                    )
+
            return response
        except Exception as e:
            verbose_router_logger.info(
@ -721,6 +733,18 @@ class Router:
                await self.async_routing_strategy_pre_call_checks(deployment=deployment)
                response = await _response

+            ## CHECK CONTENT FILTER ERROR ##
+            if isinstance(response, ModelResponse):
+                _should_raise = self._should_raise_content_policy_error(
+                    model=model, response=response, kwargs=kwargs
+                )
+                if _should_raise:
+                    raise litellm.ContentPolicyViolationError(
+                        message="Response output was blocked.",
+                        model=model,
+                        llm_provider="",
+                    )
+
            self.success_calls[model_name] += 1
            verbose_router_logger.info(
                f"litellm.acompletion(model={model_name})\033[32m 200 OK\033[0m"
@ -2801,6 +2825,40 @@ class Router:
            # Catch all - if any exceptions default to cooling down
            return True

+    def _should_raise_content_policy_error(
+        self, model: str, response: ModelResponse, kwargs: dict
+    ) -> bool:
+        """
+        Determines if a content policy error should be raised.
+
+        Only raised if a fallback is available.
+
+        Else, original response is returned.
+        """
+        if response.choices[0].finish_reason != "content_filter":
+            return False
+
+        content_policy_fallbacks = kwargs.get(
+            "content_policy_fallbacks", self.content_policy_fallbacks
+        )
+        ### ONLY RAISE ERROR IF CP FALLBACK AVAILABLE ###
+        if content_policy_fallbacks is not None:
+            fallback_model_group = None
+            for item in content_policy_fallbacks:  # [{"gpt-3.5-turbo": ["gpt-4"]}]
+                if list(item.keys())[0] == model:
+                    fallback_model_group = item[model]
+                    break
+
+            if fallback_model_group is not None:
+                return True
+
+        verbose_router_logger.info(
+            "Content Policy Error occurred. No available fallbacks. Returning original response. model={}, content_policy_fallbacks={}".format(
+                model, content_policy_fallbacks
+            )
+        )
+        return False
+
    def _set_cooldown_deployments(
        self,
        original_exception: Any,
--- a/litellm/tests/test_router_fallbacks.py
+++ b/litellm/tests/test_router_fallbacks.py
@ -1,8 +1,12 @@
 #### What this tests ####
 #    This tests calling router with fallback models

-import sys, os, time
-import traceback, asyncio
+import asyncio
+import os
+import sys
+import time
+import traceback
+
 import pytest

 sys.path.insert(
@ -762,9 +766,11 @@ def test_ausage_based_routing_fallbacks():
        # The Request should fail azure/gpt-4-fast. Then fallback -> "azure/gpt-4-basic" -> "openai-gpt-4"
        # It should work with "openai-gpt-4"
        import os
+
+        from dotenv import load_dotenv
+
        import litellm
        from litellm import Router
-        from dotenv import load_dotenv

        load_dotenv()

@ -1112,9 +1118,19 @@ async def test_client_side_fallbacks_list(sync_mode):


@pytest.mark.parametrize("sync_mode", [True, False])
+@pytest.mark.parametrize("content_filter_response_exception", [True, False])
@pytest.mark.asyncio
-async def test_router_content_policy_fallbacks(sync_mode):
+async def test_router_content_policy_fallbacks(
+    sync_mode, content_filter_response_exception
+):
    os.environ["LITELLM_LOG"] = "DEBUG"
+
+    if content_filter_response_exception:
+        mock_response = Exception("content filtering policy")
+    else:
+        mock_response = litellm.ModelResponse(
+            choices=[litellm.Choices(finish_reason="content_filter")]
+        )
    router = Router(
        model_list=[
            {
@ -1122,13 +1138,13 @@ async def test_router_content_policy_fallbacks(sync_mode):
                "litellm_params": {
                    "model": "claude-2",
                    "api_key": "",
-                    "mock_response": Exception("content filtering policy"),
+                    "mock_response": mock_response,
                },
            },
            {
                "model_name": "my-fallback-model",
                "litellm_params": {
-                    "model": "claude-2",
+                    "model": "openai/my-fake-model",
                    "api_key": "",
                    "mock_response": "This works!",
                },
@ -1165,3 +1181,5 @@ async def test_router_content_policy_fallbacks(sync_mode):
            model="claude-2",
            messages=[{"role": "user", "content": "Hey, how's it going?"}],
        )
+
+    assert response.model == "my-fake-model"
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@ -12,6 +12,7 @@ from pydantic import BaseModel, ConfigDict, Field

 from .completion import CompletionRequest
 from .embedding import EmbeddingRequest
+from .utils import ModelResponse


 class ModelConfig(BaseModel):
@ -315,7 +316,7 @@ class LiteLLMParamsTypedDict(TypedDict, total=False):
    input_cost_per_second: Optional[float]
    output_cost_per_second: Optional[float]
    ## MOCK RESPONSES ##
-    mock_response: Optional[str]
+    mock_response: Optional[Union[str, ModelResponse, Exception]]


 class DeploymentTypedDict(TypedDict):