fix(slack_alerting.py): don't fire spam alerts when backend api call fails

2024-05-13 10:04:43 -07:00 · 2024-05-13 10:04:43 -07:00 · 13e1577753
commit 13e1577753
parent b063ef7a47
4 changed files with 69 additions and 21 deletions
--- a/litellm/integrations/slack_alerting.py
+++ b/litellm/integrations/slack_alerting.py
@ -76,16 +76,14 @@ class SlackAlerting(CustomLogger):
        internal_usage_cache: Optional[DualCache] = None,
        alerting_threshold: float = 300,  # threshold for slow / hanging llm responses (in seconds)
        alerting: Optional[List] = [],
-        alert_types: Optional[
+        alert_types: List[
-            List[
+            Literal[
-                Literal[
+                "llm_exceptions",
-                    "llm_exceptions",
+                "llm_too_slow",
-                    "llm_too_slow",
+                "llm_requests_hanging",
-                    "llm_requests_hanging",
+                "budget_alerts",
-                    "budget_alerts",
+                "db_exceptions",
-                    "db_exceptions",
+                "daily_reports",
                    "daily_reports",
                ]
            ]
        ] = [
            "llm_exceptions",
@ -812,14 +810,6 @@ Model Info:
                    updated_at=litellm.utils.get_utc_datetime(),
                )
            )
        if "llm_exceptions" in self.alert_types:
            original_exception = kwargs.get("exception", None)
            await self.send_alert(
                message="LLM API Failure - " + str(original_exception),
                level="High",
                alert_type="llm_exceptions",
            )
    async def _run_scheduler_helper(self, llm_router) -> bool:
        """
--- a/litellm/proxy/_super_secret_config.yaml
+++ b/litellm/proxy/_super_secret_config.yaml
@ -8,6 +8,16 @@ model_list:
    base_model: text-embedding-ada-002
    mode: embedding
  model_name: text-embedding-ada-002
 - model_name: gpt-3.5-turbo-012
  litellm_params:
    model: gpt-3.5-turbo
    api_base: http://0.0.0.0:8080
    api_key: "" 
 - model_name: gpt-3.5-turbo-0125-preview
  litellm_params:
    model: azure/chatgpt-v-2
    api_key: os.environ/AZURE_API_KEY
    api_base: os.environ/AZURE_API_BASE
 router_settings:
  redis_host: redis
@ -17,6 +27,7 @@ router_settings:
 litellm_settings:
  set_verbose: True
  fallbacks: [{"gpt-3.5-turbo-012": ["gpt-3.5-turbo-0125-preview"]}]
  # service_callback: ["prometheus_system"]
  # success_callback: ["prometheus"]
  # failure_callback: ["prometheus"]
@ -25,4 +36,5 @@ general_settings:
  enable_jwt_auth: True
  disable_reset_budget: True
  proxy_batch_write_at: 60 # 👈 Frequency of batch writing logs to server (in seconds)
-  routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
+  routing_strategy: simple-shuffle # Literal["simple-shuffle", "least-busy", "usage-based-routing","latency-based-routing"], default="simple-shuffle"
  alerting: ["slack"]
--- a/litellm/router.py
+++ b/litellm/router.py
@ -1413,7 +1413,7 @@ class Router:
                verbose_router_logger.debug(f"Trying to fallback b/w models")
                if (
                    hasattr(e, "status_code")
-                    and e.status_code == 400
+                    and e.status_code == 400  # type: ignore
                    and not isinstance(e, litellm.ContextWindowExceededError)
                ):  # don't retry a malformed request
                    raise e
@ -3648,7 +3648,7 @@ class Router:
                )
                asyncio.create_task(
                    proxy_logging_obj.slack_alerting_instance.send_alert(
-                        message=f"Router: Cooling down deployment: {_api_base}, for {self.cooldown_time} seconds. Got exception: {str(exception_status)}",
+                        message=f"Router: Cooling down deployment: {_api_base}, for {self.cooldown_time} seconds. Got exception: {str(exception_status)}. Change 'cooldown_time' + 'allowed_failes' under 'Router Settings' on proxy UI, or via config - https://docs.litellm.ai/docs/proxy/reliability#fallbacks--retries--timeouts--cooldowns",
                        alert_type="cooldown_deployment",
                        level="Low",
                    )
--- a/litellm/tests/test_router_fallbacks.py
+++ b/litellm/tests/test_router_fallbacks.py
@ -961,3 +961,49 @@ def test_custom_cooldown_times():
    except Exception as e:
        print(e)
@pytest.mark.parametrize("sync_mode", [True, False])
@pytest.mark.asyncio
 async def test_service_unavailable_fallbacks(sync_mode):
    """
    Initial model - openai
    Fallback - azure
    Error - 503, service unavailable
    """
    router = Router(
        model_list=[
            {
                "model_name": "gpt-3.5-turbo-012",
                "litellm_params": {
                    "model": "gpt-3.5-turbo",
                    "api_key": "anything",
                    "api_base": "http://0.0.0.0:8080",
                },
            },
            {
                "model_name": "gpt-3.5-turbo-0125-preview",
                "litellm_params": {
                    "model": "azure/chatgpt-v-2",
                    "api_key": os.getenv("AZURE_API_KEY"),
                    "api_version": os.getenv("AZURE_API_VERSION"),
                    "api_base": os.getenv("AZURE_API_BASE"),
                },
            },
        ],
        fallbacks=[{"gpt-3.5-turbo-012": ["gpt-3.5-turbo-0125-preview"]}],
    )
    if sync_mode:
        response = router.completion(
            model="gpt-3.5-turbo-012",
            messages=[{"role": "user", "content": "Hey, how's it going?"}],
        )
    else:
        response = await router.acompletion(
            model="gpt-3.5-turbo-012",
            messages=[{"role": "user", "content": "Hey, how's it going?"}],
        )
    assert response.model == "gpt-35-turbo"