[Fix] Router/ Proxy - Tag Based routing, raise correct error when no deployments found and tag filtering is on (#5745)

* fix tag routing - raise correct error when no model with tag based routing * fix error string from tag based routing * test router tag based routing * raise 401 error when no tags avialable for deploymen * linting fix
2025-04-25 10:44:24 +00:00 · 2024-09-17 20:24:28 -07:00 · 2024-09-17 20:24:28 -07:00 · 1bb1f70a47
commit 1bb1f70a47
parent 911230c434
6 changed files with 79 additions and 2 deletions
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@ -10,7 +10,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union
 from pydantic import BaseModel, ConfigDict, Extra, Field, Json, model_validator
 from typing_extensions import Annotated, TypedDict
-from litellm.types.router import UpdateRouterConfig
+from litellm.types.router import RouterErrors, UpdateRouterConfig
 from litellm.types.utils import ProviderField
 if TYPE_CHECKING:
@ -1826,6 +1826,8 @@ class ProxyException(Exception):
            or "No deployments available" in self.message
        ):
            self.code = "429"
        elif RouterErrors.no_deployments_with_tag_routing.value in self.message:
            self.code = "401"
    def to_dict(self) -> dict:
        """Converts the ProxyException instance to a dictionary."""
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -19,9 +19,12 @@ model_list:
      model: openai/429
      api_key: fake-key
      api_base: https://exampleopenaiendpoint-production.up.railway.app
      tags: ["fake"]
 general_settings: 
 master_key: sk-1234 
 router_settings:
  enable_tag_filtering: true
--- a/litellm/router.py
+++ b/litellm/router.py
@ -5180,6 +5180,7 @@ class Router:
            # check if user wants to do tag based routing
            healthy_deployments = await get_deployments_for_tag(  # type: ignore
                llm_router_instance=self,
                model=model,
                request_kwargs=request_kwargs,
                healthy_deployments=healthy_deployments,
            )
--- a/litellm/router_strategy/tag_based_routing.py
+++ b/litellm/router_strategy/tag_based_routing.py
@ -9,7 +9,7 @@ Use this to route requests between Teams
 from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union
 from litellm._logging import verbose_logger
-from litellm.types.router import DeploymentTypedDict
+from litellm.types.router import DeploymentTypedDict, RouterErrors
 if TYPE_CHECKING:
    from litellm.router import Router as _Router
@ -21,9 +21,15 @@ else:
 async def get_deployments_for_tag(
    llm_router_instance: LitellmRouter,
    model: str,  # used to raise the correct error
    healthy_deployments: Union[List[Any], Dict[Any, Any]],
    request_kwargs: Optional[Dict[Any, Any]] = None,
 ):
    """
    Returns a list of deployments that match the requested model and tags in the request.
    Executes tag based filtering based on the tags in request metadata and the tags on the deployments
    """
    if llm_router_instance.enable_tag_filtering is not True:
        return healthy_deployments
@ -80,6 +86,11 @@ async def get_deployments_for_tag(
                    )
                    new_healthy_deployments.append(deployment)
            if len(new_healthy_deployments) == 0:
                raise ValueError(
                    f"{RouterErrors.no_deployments_with_tag_routing.value}. Passed model={model} and tags={request_tags}"
                )
            return new_healthy_deployments
    # for Untagged requests use default deployments if set
--- a/litellm/tests/test_router_tag_routing.py
+++ b/litellm/tests/test_router_tag_routing.py
@ -160,3 +160,60 @@ async def test_default_tagged_deployments():
        print("response_extra_info: ", response_extra_info)
        assert response_extra_info["model_id"] == "default-model"
@pytest.mark.asyncio()
 async def test_error_from_tag_routing():
    """
    Tests the correct error raised when no deployments found for tag
    """
    import logging
    from litellm._logging import verbose_logger
    verbose_logger.setLevel(logging.DEBUG)
    router = litellm.Router(
        model_list=[
            {
                "model_name": "gpt-4",
                "litellm_params": {
                    "model": "gpt-4o",
                    "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
                },
                "model_info": {"id": "default-model"},
            },
            {
                "model_name": "gpt-4",
                "litellm_params": {
                    "model": "gpt-4o",
                    "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
                },
                "model_info": {"id": "default-model-2"},
            },
            {
                "model_name": "gpt-4",
                "litellm_params": {
                    "model": "gpt-4o-mini",
                    "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
                    "tags": ["teamA"],
                },
                "model_info": {"id": "very-expensive-model"},
            },
        ],
        enable_tag_filtering=True,
    )
    try:
        response = await router.acompletion(
            model="gpt-4",
            messages=[{"role": "user", "content": "Tell me a joke."}],
            metadata={"tags": ["paid"]},
        )
        pytest.fail("this should have failed - expected it to fail")
    except Exception as e:
        from litellm.types.router import RouterErrors
        assert RouterErrors.no_deployments_with_tag_routing.value in str(e)
        print("got expected exception = ", e)
        pass
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@ -414,6 +414,9 @@ class RouterErrors(enum.Enum):
    user_defined_ratelimit_error = "Deployment over user-defined ratelimit."
    no_deployments_available = "No deployments available for selected model"
    no_deployments_with_tag_routing = (
        "Not allowed to access model due to tags configuration"
    )
 class AllowedFailsPolicy(BaseModel):