From 19c3a82d1bc0960824caad6238680bf58fb58eae Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 20 Aug 2024 11:59:43 -0700
Subject: [PATCH] test + never retry on 404 errors

---
 litellm/router.py                    |  2 ++
 litellm/tests/test_router_retries.py | 48 +++++++++++++++++++++++++---
 2 files changed, 46 insertions(+), 4 deletions(-)

diff --git a/litellm/router.py b/litellm/router.py
index 2d7bd517b..8629c19fd 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -2708,6 +2708,8 @@ class Router:
         ):
             raise error
 
+        if isinstance(error, litellm.NotFoundError):
+            raise error
         # Error we should only retry if there are other deployments
         if isinstance(error, openai.RateLimitError):
             if (
diff --git a/litellm/tests/test_router_retries.py b/litellm/tests/test_router_retries.py
index db2a1c646..17c5762cf 100644
--- a/litellm/tests/test_router_retries.py
+++ b/litellm/tests/test_router_retries.py
@@ -1,18 +1,24 @@
 #### What this tests ####
 #    This tests calling router with fallback models
 
-import sys, os, time
-import traceback, asyncio
+import asyncio
+import os
+import sys
+import time
+import traceback
+
 import pytest
 
 sys.path.insert(
     0, os.path.abspath("../..")
 )  # Adds the parent directory to the system path
 
+import httpx
+import openai
+
 import litellm
 from litellm import Router
 from litellm.integrations.custom_logger import CustomLogger
-import openai, httpx
 
 
 class MyCustomHandler(CustomLogger):
@@ -127,7 +133,7 @@ async def test_router_retries_errors(sync_mode, error_type):
     ["AuthenticationErrorRetries", "ContentPolicyViolationErrorRetries"],  #
 )
 async def test_router_retry_policy(error_type):
-    from litellm.router import RetryPolicy, AllowedFailsPolicy
+    from litellm.router import AllowedFailsPolicy, RetryPolicy
 
     retry_policy = RetryPolicy(
         ContentPolicyViolationErrorRetries=3, AuthenticationErrorRetries=0
@@ -582,3 +588,37 @@ def test_timeout_for_rate_limit_error_with_no_healthy_deployments():
     )
 
     assert _timeout > 0.0
+
+
+def test_no_retry_for_not_found_error_404():
+    healthy_deployments = []
+
+    router = Router(
+        model_list=[
+            {
+                "model_name": "gpt-3.5-turbo",
+                "litellm_params": {
+                    "model": "azure/chatgpt-v-2",
+                    "api_key": os.getenv("AZURE_API_KEY"),
+                    "api_version": os.getenv("AZURE_API_VERSION"),
+                    "api_base": os.getenv("AZURE_API_BASE"),
+                },
+            }
+        ]
+    )
+
+    # Act & Assert
+    error = litellm.NotFoundError(
+        message="404 model not found",
+        model="gpt-12",
+        llm_provider="azure",
+    )
+    try:
+        response = router.should_retry_this_error(
+            error=error, healthy_deployments=healthy_deployments
+        )
+        pytest.fail(
+            "Should have raised an exception 404 NotFoundError should never be retried, it's typically model_not_found error"
+        )
+    except Exception as e:
+        print("got exception", e)