From cd40d5854405636baa6790ca679c059e6dcf0e64 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 18 Jul 2024 19:22:09 -0700
Subject: [PATCH 1/9] router - refactor to tag based routing

---
 litellm/router.py                             |  6 +-
 litellm/router_strategy/free_paid_tiers.py    | 69 -------------------
 litellm/router_strategy/tag_based_routing.py  | 68 ++++++++++++++++++
 ...er_tiers.py => test_router_tag_routing.py} | 10 +--
 litellm/types/router.py                       |  4 ++
 5 files changed, 81 insertions(+), 76 deletions(-)
 delete mode 100644 litellm/router_strategy/free_paid_tiers.py
 create mode 100644 litellm/router_strategy/tag_based_routing.py
 rename litellm/tests/{test_router_tiers.py => test_router_tag_routing.py} (89%)

diff --git a/litellm/router.py b/litellm/router.py
index 487d5fd6a4..44c02f1266 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -47,12 +47,12 @@ from litellm.assistants.main import AssistantDeleted
 from litellm.caching import DualCache, InMemoryCache, RedisCache
 from litellm.integrations.custom_logger import CustomLogger
 from litellm.llms.azure import get_azure_ad_token_from_oidc
-from litellm.router_strategy.free_paid_tiers import get_deployments_for_tier
 from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
 from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
 from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
 from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler
 from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2
+from litellm.router_strategy.tag_based_routing import get_deployments_for_tag
 from litellm.router_utils.client_initalization_utils import (
     set_client,
     should_initialize_sync_client,
@@ -4482,8 +4482,8 @@ class Router:
                     request_kwargs=request_kwargs,
                 )
 
-            # check free / paid tier for each deployment
-            healthy_deployments = await get_deployments_for_tier(
+            # check if user wants to do tag based routing
+            healthy_deployments = await get_deployments_for_tag(
                 request_kwargs=request_kwargs,
                 healthy_deployments=healthy_deployments,
             )
diff --git a/litellm/router_strategy/free_paid_tiers.py b/litellm/router_strategy/free_paid_tiers.py
deleted file mode 100644
index 82e38b4f53..0000000000
--- a/litellm/router_strategy/free_paid_tiers.py
+++ /dev/null
@@ -1,69 +0,0 @@
-"""
-Use this to route requests between free and paid tiers
-"""
-
-from typing import Any, Dict, List, Literal, Optional, TypedDict, Union, cast
-
-from litellm._logging import verbose_logger
-from litellm.types.router import DeploymentTypedDict
-
-
-class ModelInfo(TypedDict):
-    tier: Literal["free", "paid"]
-
-
-class Deployment(TypedDict):
-    model_info: ModelInfo
-
-
-async def get_deployments_for_tier(
-    request_kwargs: Optional[Dict[Any, Any]] = None,
-    healthy_deployments: Optional[Union[List[Any], Dict[Any, Any]]] = None,
-):
-    """
-    if request_kwargs contains {"metadata": {"tier": "free"}} or {"metadata": {"tier": "paid"}}, then routes the request to free/paid tier models
-    """
-    if request_kwargs is None:
-        verbose_logger.debug(
-            "get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s",
-            healthy_deployments,
-        )
-        return healthy_deployments
-
-    verbose_logger.debug("request metadata: %s", request_kwargs.get("metadata"))
-    if "metadata" in request_kwargs:
-        metadata = request_kwargs["metadata"]
-        if "tier" in metadata:
-            selected_tier: Literal["free", "paid"] = metadata["tier"]
-            if healthy_deployments is None:
-                return None
-
-            if selected_tier == "free":
-                # get all deployments where model_info has tier = free
-                free_deployments: List[Any] = []
-                verbose_logger.debug(
-                    "Getting deployments in free tier, all_deployments: %s",
-                    healthy_deployments,
-                )
-                for deployment in healthy_deployments:
-                    typed_deployment = cast(Deployment, deployment)
-                    if typed_deployment["model_info"]["tier"] == "free":
-                        free_deployments.append(deployment)
-                verbose_logger.debug("free_deployments: %s", free_deployments)
-                return free_deployments
-
-            elif selected_tier == "paid":
-                # get all deployments where model_info has tier = paid
-                paid_deployments: List[Any] = []
-                for deployment in healthy_deployments:
-                    typed_deployment = cast(Deployment, deployment)
-                    if typed_deployment["model_info"]["tier"] == "paid":
-                        paid_deployments.append(deployment)
-                verbose_logger.debug("paid_deployments: %s", paid_deployments)
-                return paid_deployments
-
-    verbose_logger.debug(
-        "no tier found in metadata, returning healthy_deployments: %s",
-        healthy_deployments,
-    )
-    return healthy_deployments
diff --git a/litellm/router_strategy/tag_based_routing.py b/litellm/router_strategy/tag_based_routing.py
new file mode 100644
index 0000000000..11bad19a33
--- /dev/null
+++ b/litellm/router_strategy/tag_based_routing.py
@@ -0,0 +1,68 @@
+"""
+Use this to route requests between free and paid tiers
+"""
+
+from typing import Any, Dict, List, Literal, Optional, TypedDict, Union, cast
+
+from litellm._logging import verbose_logger
+from litellm.types.router import DeploymentTypedDict
+
+
+async def get_deployments_for_tag(
+    request_kwargs: Optional[Dict[Any, Any]] = None,
+    healthy_deployments: Optional[Union[List[Any], Dict[Any, Any]]] = None,
+):
+    """
+    if request_kwargs contains {"metadata": {"tier": "free"}} or {"metadata": {"tier": "paid"}}, then routes the request to free/paid tier models
+    """
+    if request_kwargs is None:
+        verbose_logger.debug(
+            "get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s",
+            healthy_deployments,
+        )
+        return healthy_deployments
+
+    if healthy_deployments is None:
+        verbose_logger.debug(
+            "get_deployments_for_tier: healthy_deployments is None returning healthy_deployments"
+        )
+        return healthy_deployments
+
+    verbose_logger.debug("request metadata: %s", request_kwargs.get("metadata"))
+    if "metadata" in request_kwargs:
+        metadata = request_kwargs["metadata"]
+        request_tags = metadata.get("tags")
+
+        new_healthy_deployments = []
+        if request_tags:
+            verbose_logger.debug("parameter routing: router_keys: %s", request_tags)
+            # example this can be router_keys=["free", "custom"]
+            # get all deployments that have a superset of these router keys
+            for deployment in healthy_deployments:
+                deployment_litellm_params = deployment.get("litellm_params")
+                deployment_tags = deployment_litellm_params.get("tags")
+
+                verbose_logger.debug(
+                    "deployment: %s,  deployment_router_keys: %s",
+                    deployment,
+                    deployment_tags,
+                )
+
+                if deployment_tags is None:
+                    continue
+
+                if set(request_tags).issubset(set(deployment_tags)):
+                    verbose_logger.debug(
+                        "adding deployment with tags: %s, request tags: %s",
+                        deployment_tags,
+                        request_tags,
+                    )
+                    new_healthy_deployments.append(deployment)
+
+        return new_healthy_deployments
+
+    verbose_logger.debug(
+        "no tier found in metadata, returning healthy_deployments: %s",
+        healthy_deployments,
+    )
+    return healthy_deployments
diff --git a/litellm/tests/test_router_tiers.py b/litellm/tests/test_router_tag_routing.py
similarity index 89%
rename from litellm/tests/test_router_tiers.py
rename to litellm/tests/test_router_tag_routing.py
index 54e67ded3f..feb67c0e93 100644
--- a/litellm/tests/test_router_tiers.py
+++ b/litellm/tests/test_router_tag_routing.py
@@ -45,16 +45,18 @@ async def test_router_free_paid_tier():
                 "litellm_params": {
                     "model": "gpt-4o",
                     "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+                    "tags": ["free"],
                 },
-                "model_info": {"tier": "paid", "id": "very-expensive-model"},
+                "model_info": {"id": "very-cheap-model"},
             },
             {
                 "model_name": "gpt-4",
                 "litellm_params": {
                     "model": "gpt-4o-mini",
                     "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
+                    "tags": ["paid"],
                 },
-                "model_info": {"tier": "free", "id": "very-cheap-model"},
+                "model_info": {"id": "very-expensive-model"},
             },
         ]
     )
@@ -64,7 +66,7 @@ async def test_router_free_paid_tier():
         response = await router.acompletion(
             model="gpt-4",
             messages=[{"role": "user", "content": "Tell me a joke."}],
-            metadata={"tier": "free"},
+            metadata={"tags": ["free"]},
         )
 
         print("Response: ", response)
@@ -79,7 +81,7 @@ async def test_router_free_paid_tier():
         response = await router.acompletion(
             model="gpt-4",
             messages=[{"role": "user", "content": "Tell me a joke."}],
-            metadata={"tier": "paid"},
+            metadata={"tags": ["paid"]},
         )
 
         print("Response: ", response)
diff --git a/litellm/types/router.py b/litellm/types/router.py
index df9947c26e..78dfbc4c19 100644
--- a/litellm/types/router.py
+++ b/litellm/types/router.py
@@ -325,6 +325,10 @@ class LiteLLMParamsTypedDict(TypedDict, total=False):
     ## MOCK RESPONSES ##
     mock_response: Optional[Union[str, ModelResponse, Exception]]
 
+    # routing params
+    # use this for tag-based routing
+    tags: Optional[List[str]]
+
 
 class DeploymentTypedDict(TypedDict):
     model_name: str

From 52682ea0ccd13f7011eae6fb5af8b003d67e945b Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 18 Jul 2024 19:24:13 -0700
Subject: [PATCH 2/9] fix remove previous code on free/paid tier

---
 litellm/proxy/litellm_pre_call_utils.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index 283f31e3c0..e0e875308e 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -132,15 +132,6 @@ async def add_litellm_data_to_request(
             for k, v in key_metadata["cache"].items():
                 if k in SupportedCacheControls:
                     data["cache"][k] = v
-    if "tier" in key_metadata:
-        if premium_user is not True:
-            verbose_logger.warning(
-                "Trying to use free/paid tier feature. This will not be applied %s",
-                CommonProxyErrors.not_premium_user.value,
-            )
-
-        # add request tier to metadata
-        data[_metadata_variable_name]["tier"] = key_metadata["tier"]
 
     # Team spend, budget - used by prometheus.py
     data[_metadata_variable_name][

From 8d227680c75f2049fca842dc2853204ecb8b92b1 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 18 Jul 2024 19:34:45 -0700
Subject: [PATCH 3/9] fix use tags as a litellm param

---
 litellm/main.py                 | 5 +++++
 litellm/proxy/proxy_config.yaml | 6 ++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/litellm/main.py b/litellm/main.py
index e01603b7e7..d7e01c107f 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -735,6 +735,7 @@ def completion(
     ]
     litellm_params = [
         "metadata",
+        "tags",
         "acompletion",
         "atext_completion",
         "text_completion",
@@ -3150,6 +3151,7 @@ def embedding(
         "allowed_model_region",
         "model_config",
         "cooldown_time",
+        "tags",
     ]
     default_params = openai_params + litellm_params
     non_default_params = {
@@ -4379,6 +4381,8 @@ def transcription(
     proxy_server_request = kwargs.get("proxy_server_request", None)
     model_info = kwargs.get("model_info", None)
     metadata = kwargs.get("metadata", {})
+    tags = kwargs.pop("tags", [])
+
     drop_params = kwargs.get("drop_params", None)
     client: Optional[
         Union[
@@ -4551,6 +4555,7 @@ def speech(
 ) -> HttpxBinaryResponseContent:
 
     model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base)  # type: ignore
+    tags = kwargs.pop("tags", [])
 
     optional_params = {}
     if response_format is not None:
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 7e78cf3177..81ed12c07e 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -4,14 +4,12 @@ model_list:
       model: openai/fake
       api_key: fake-key
       api_base: https://exampleopenaiendpoint-production.up.railway.app/
-    model_info:
-      tier: free # 👈 Key Change - set `tier`
+      tags: ["free"]
   - model_name: gpt-4
     litellm_params:
       model: openai/gpt-4o
       api_key: os.environ/OPENAI_API_KEY
-    model_info:
-      tier: paid # 👈 Key Change - set `tier`
+      tags: ["paid"]
 
 general_settings: 
   master_key: sk-1234 

From d1a4246d2b7e965b3965172e80c7494045dea421 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 18 Jul 2024 19:39:04 -0700
Subject: [PATCH 4/9] control using enable_tag_filtering

---
 litellm/router.py                            |  3 +
 litellm/router_strategy/tag_based_routing.py | 13 ++++-
 litellm/tests/test_litellm_pre_call_utils.py | 60 --------------------
 3 files changed, 15 insertions(+), 61 deletions(-)
 delete mode 100644 litellm/tests/test_litellm_pre_call_utils.py

diff --git a/litellm/router.py b/litellm/router.py
index 44c02f1266..0e693e188f 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -145,6 +145,7 @@ class Router:
         content_policy_fallbacks: List = [],
         model_group_alias: Optional[dict] = {},
         enable_pre_call_checks: bool = False,
+        enable_tag_filtering: bool = False,
         retry_after: int = 0,  # min time to wait before retrying a failed request
         retry_policy: Optional[
             RetryPolicy
@@ -246,6 +247,7 @@ class Router:
         self.set_verbose = set_verbose
         self.debug_level = debug_level
         self.enable_pre_call_checks = enable_pre_call_checks
+        self.enable_tag_filtering = enable_tag_filtering
         if self.set_verbose == True:
             if debug_level == "INFO":
                 verbose_router_logger.setLevel(logging.INFO)
@@ -4484,6 +4486,7 @@ class Router:
 
             # check if user wants to do tag based routing
             healthy_deployments = await get_deployments_for_tag(
+                llm_router_instance=self,
                 request_kwargs=request_kwargs,
                 healthy_deployments=healthy_deployments,
             )
diff --git a/litellm/router_strategy/tag_based_routing.py b/litellm/router_strategy/tag_based_routing.py
index 11bad19a33..2dbc5cb93b 100644
--- a/litellm/router_strategy/tag_based_routing.py
+++ b/litellm/router_strategy/tag_based_routing.py
@@ -2,19 +2,30 @@
 Use this to route requests between free and paid tiers
 """
 
-from typing import Any, Dict, List, Literal, Optional, TypedDict, Union, cast
+from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union
 
 from litellm._logging import verbose_logger
 from litellm.types.router import DeploymentTypedDict
 
+if TYPE_CHECKING:
+    from litellm.router import Router as _Router
+
+    LitellmRouter = _Router
+else:
+    LitellmRouter = Any
+
 
 async def get_deployments_for_tag(
+    llm_router_instance: LitellmRouter,
     request_kwargs: Optional[Dict[Any, Any]] = None,
     healthy_deployments: Optional[Union[List[Any], Dict[Any, Any]]] = None,
 ):
     """
     if request_kwargs contains {"metadata": {"tier": "free"}} or {"metadata": {"tier": "paid"}}, then routes the request to free/paid tier models
     """
+    if llm_router_instance.enable_tag_filtering is not True:
+        return healthy_deployments
+
     if request_kwargs is None:
         verbose_logger.debug(
             "get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s",
diff --git a/litellm/tests/test_litellm_pre_call_utils.py b/litellm/tests/test_litellm_pre_call_utils.py
deleted file mode 100644
index 7f56d693d9..0000000000
--- a/litellm/tests/test_litellm_pre_call_utils.py
+++ /dev/null
@@ -1,60 +0,0 @@
-"""
-Tests litellm pre_call_utils
-"""
-
-import os
-import sys
-import traceback
-import uuid
-from datetime import datetime
-
-from dotenv import load_dotenv
-from fastapi import Request
-from fastapi.routing import APIRoute
-
-from litellm.proxy._types import UserAPIKeyAuth
-from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request
-from litellm.proxy.proxy_server import ProxyConfig, chat_completion
-
-load_dotenv()
-import io
-import os
-import time
-
-import pytest
-
-# this file is to test litellm/proxy
-
-sys.path.insert(
-    0, os.path.abspath("../..")
-)  # Adds the parent directory to the system path
-
-
-@pytest.mark.parametrize("tier", ["free", "paid"])
-@pytest.mark.asyncio()
-async def test_adding_key_tier_to_request_metadata(tier):
-    """
-    Tests if we can add tier: free/paid from key metadata to the request metadata
-    """
-    data = {}
-
-    api_route = APIRoute(path="/chat/completions", endpoint=chat_completion)
-    request = Request(
-        {
-            "type": "http",
-            "method": "POST",
-            "route": api_route,
-            "path": api_route.path,
-            "headers": [],
-        }
-    )
-    new_data = await add_litellm_data_to_request(
-        data=data,
-        request=request,
-        user_api_key_dict=UserAPIKeyAuth(metadata={"tier": tier}),
-        proxy_config=ProxyConfig(),
-    )
-
-    print("new_data", new_data)
-
-    assert new_data["metadata"]["tier"] == tier

From c2856c6e8fee1caae1cdd4074cc136ac00e89323 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 18 Jul 2024 20:10:45 -0700
Subject: [PATCH 5/9] check if using tag based routing

---
 litellm/proxy/litellm_pre_call_utils.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index e0e875308e..e6bce53928 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -75,7 +75,7 @@ async def add_litellm_data_to_request(
         dict: The modified data dictionary.
 
     """
-    from litellm.proxy.proxy_server import premium_user
+    from litellm.proxy.proxy_server import llm_router, premium_user
 
     safe_add_api_version_from_query_params(data, request)
 
@@ -166,7 +166,8 @@ async def add_litellm_data_to_request(
     if user_api_key_dict.allowed_model_region is not None:
         data["allowed_model_region"] = user_api_key_dict.allowed_model_region
 
-    ## [Enterprise Only] Add User-IP Address
+    ## [Enterprise Only]
+    # Add User-IP Address
     requester_ip_address = ""
     if premium_user is True:
         # Only set the IP Address for Enterprise Users
@@ -179,6 +180,15 @@ async def add_litellm_data_to_request(
             requester_ip_address = request.client.host
     data[_metadata_variable_name]["requester_ip_address"] = requester_ip_address
 
+    # Enterprise Only - Check if using tag based routing
+    if llm_router and llm_router.enable_tag_filtering is True:
+        if premium_user is not True:
+            verbose_proxy_logger.warning(
+                "router.enable_tag_filtering is on %s \n switched off router.enable_tag_filtering",
+                CommonProxyErrors.not_premium_user.value,
+            )
+            llm_router.enable_tag_filtering = False
+
     ### TEAM-SPECIFIC PARAMS ###
     if user_api_key_dict.team_id is not None:
         team_config = await proxy_config.load_team_config(

From 92f9bdcb941bd023923203de3e0188428df272e0 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 18 Jul 2024 21:48:24 -0700
Subject: [PATCH 6/9] rename doc

---
 docs/my-website/docs/proxy/free_paid_tier.md | 102 -------------------
 docs/my-website/docs/proxy/tag_routing.md    |  38 +++++++
 docs/my-website/sidebars.js                  |   2 +-
 3 files changed, 39 insertions(+), 103 deletions(-)
 delete mode 100644 docs/my-website/docs/proxy/free_paid_tier.md
 create mode 100644 docs/my-website/docs/proxy/tag_routing.md

diff --git a/docs/my-website/docs/proxy/free_paid_tier.md b/docs/my-website/docs/proxy/free_paid_tier.md
deleted file mode 100644
index 01230e1f01..0000000000
--- a/docs/my-website/docs/proxy/free_paid_tier.md
+++ /dev/null
@@ -1,102 +0,0 @@
-# 💸 Free, Paid Tier Routing
-
-Route Virtual Keys on `free tier` to cheaper models
-
-### 1. Define free, paid tier models on config.yaml 
-
-:::info
-Requests with `model=gpt-4` will be routed to either `openai/fake` or `openai/gpt-4o` depending on which tier the virtual key is on
-:::
-
-```yaml
-model_list:
-  - model_name: gpt-4
-    litellm_params:
-      model: openai/fake
-      api_key: fake-key
-      api_base: https://exampleopenaiendpoint-production.up.railway.app/
-    model_info:
-      tier: free # 👈 Key Change - set `tier to paid or free`
-  - model_name: gpt-4
-    litellm_params:
-      model: openai/gpt-4o
-      api_key: os.environ/OPENAI_API_KEY
-    model_info:
-      tier: paid # 👈 Key Change - set `tier to paid or free`
-
-general_settings: 
-  master_key: sk-1234 
-```
-
-### 2. Create Virtual Keys with pricing `tier=free`
-
-```shell
-curl --location 'http://0.0.0.0:4000/key/generate' \
-    --header 'Authorization: Bearer sk-1234' \
-    --header 'Content-Type: application/json' \
-    --data '{
-        "metadata": {"tier": "free"}
-}'
-```
-
-### 3. Make Request with Key on `Free Tier`
-
-```shell
-curl -i http://localhost:4000/v1/chat/completions \
-  -H "Content-Type: application/json" \
-curl -i http://localhost:4000/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer sk-inxzoSurQsjog9gPrVOCcA" \
-  -d '{
-    "model": "gpt-4",
-    "messages": [
-      {"role": "user", "content": "Hello, Claude gm!"}
-    ]
-  }'
-```
-
-**Expected Response**
-
-If this worked as expected then `x-litellm-model-api-base` should be `https://exampleopenaiendpoint-production.up.railway.app/` in the response headers
-
-```shell
-x-litellm-model-api-base: https://exampleopenaiendpoint-production.up.railway.app/
-
-{"id":"chatcmpl-657b750f581240c1908679ed94b31bfe","choices":[{"finish_reason":"stop","index":0,"message":{"content":"\n\nHello there, how may I assist you today?","role":"assistant","tool_calls":null,"function_call":null}}],"created":1677652288,"model":"gpt-3.5-turbo-0125","object":"chat.completion","system_fingerprint":"fp_44709d6fcb","usage":{"completion_tokens":12,"prompt_tokens":9,"total_tokens":21}}%
-```
-
-
-### 4. Create Virtual Keys with pricing `tier=paid`
-
-```shell
-curl --location 'http://0.0.0.0:4000/key/generate' \
-        --header 'Authorization: Bearer sk-1234' \
-        --header 'Content-Type: application/json' \
-        --data '{
-            "metadata": {"tier": "paid"}
-    }'
-```
-
-### 5. Make Request with Key on `Paid Tier`
-
-```shell
-curl -i http://localhost:4000/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer sk-mnJoeSc6jFjzZr256q-iqA" \
-  -d '{
-    "model": "gpt-4",
-    "messages": [
-      {"role": "user", "content": "Hello, Claude gm!"}
-    ]
-  }'
-```
-
-**Expected Response**
-
-If this worked as expected then `x-litellm-model-api-base` should be `https://api.openai.com` in the response headers
-
-```shell
-x-litellm-model-api-base: https://api.openai.com
-
-{"id":"chatcmpl-9mW75EbJCgwmLcO0M5DmwxpiBgWdc","choices":[{"finish_reason":"stop","index":0,"message":{"content":"Good morning! How can I assist you today?","role":"assistant","tool_calls":null,"function_call":null}}],"created":1721350215,"model":"gpt-4o-2024-05-13","object":"chat.completion","system_fingerprint":"fp_c4e5b6fa31","usage":{"completion_tokens":10,"prompt_tokens":12,"total_tokens":22}}
-```
diff --git a/docs/my-website/docs/proxy/tag_routing.md b/docs/my-website/docs/proxy/tag_routing.md
new file mode 100644
index 0000000000..c33bce315f
--- /dev/null
+++ b/docs/my-website/docs/proxy/tag_routing.md
@@ -0,0 +1,38 @@
+# 💸 Tag Based Routing
+
+Route requests based on tags
+
+### 1. Define free, paid tier models on config.yaml 
+
+```yaml
+model_list:
+  - model_name: gpt-4
+    litellm_params:
+      model: openai/fake
+      api_key: fake-key
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/
+      tags: ["free"]
+  - model_name: gpt-4
+    litellm_params:
+      model: openai/gpt-4o
+      api_key: os.environ/OPENAI_API_KEY
+      tags: ["paid"]
+
+general_settings: 
+  master_key: sk-1234 
+```
+
+### Make Request with Key on `Free Tier`
+
+```shell
+curl -i http://localhost:4000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-1234" \
+  -d '{
+    "model": "gpt-4",
+    "metadata": {"tags": ["paid"]},
+    "messages": [
+      {"role": "user", "content": "Hello, Claude gm!"}
+    ]
+  }'
+```
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index a74543c871..8fd83b3db2 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -44,7 +44,7 @@ const sidebars = {
         "proxy/cost_tracking",
         "proxy/self_serve",
         "proxy/virtual_keys",
-        "proxy/free_paid_tier",
+        "proxy/tag_routing",
         "proxy/users",
         "proxy/team_budgets",
         "proxy/customers",

From ae8a65dc54e7ae8241fac6681080dc1dfc8497b7 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 18 Jul 2024 21:49:36 -0700
Subject: [PATCH 7/9] fix test

---
 litellm/tests/test_router_tag_routing.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/litellm/tests/test_router_tag_routing.py b/litellm/tests/test_router_tag_routing.py
index feb67c0e93..67f100d794 100644
--- a/litellm/tests/test_router_tag_routing.py
+++ b/litellm/tests/test_router_tag_routing.py
@@ -58,7 +58,8 @@ async def test_router_free_paid_tier():
                 },
                 "model_info": {"id": "very-expensive-model"},
             },
-        ]
+        ],
+        enable_tag_filtering=True,
     )
 
     for _ in range(5):

From 5aa7430d8055b3c1fcbcb2f996268fd97ad6219b Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 18 Jul 2024 21:55:53 -0700
Subject: [PATCH 8/9] add tags to metadata

---
 litellm/proxy/litellm_pre_call_utils.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py
index e6bce53928..1014a325ab 100644
--- a/litellm/proxy/litellm_pre_call_utils.py
+++ b/litellm/proxy/litellm_pre_call_utils.py
@@ -188,6 +188,9 @@ async def add_litellm_data_to_request(
                 CommonProxyErrors.not_premium_user.value,
             )
             llm_router.enable_tag_filtering = False
+        else:
+            if "tags" in data:
+                data[_metadata_variable_name]["tags"] = data["tags"]
 
     ### TEAM-SPECIFIC PARAMS ###
     if user_api_key_dict.team_id is not None:

From 90a169f16fbd7ae7a3e44f0316ae0ac85c196ab7 Mon Sep 17 00:00:00 2001
From: Ishaan Jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 18 Jul 2024 22:18:10 -0700
Subject: [PATCH 9/9] docs - tag based routing

---
 docs/my-website/docs/proxy/tag_routing.md | 109 ++++++++++++++++++++--
 litellm/proxy/proxy_config.yaml           |  10 +-
 2 files changed, 107 insertions(+), 12 deletions(-)

diff --git a/docs/my-website/docs/proxy/tag_routing.md b/docs/my-website/docs/proxy/tag_routing.md
index c33bce315f..763d509185 100644
--- a/docs/my-website/docs/proxy/tag_routing.md
+++ b/docs/my-website/docs/proxy/tag_routing.md
@@ -1,8 +1,12 @@
 # 💸 Tag Based Routing
 
-Route requests based on tags
+Route requests based on tags. 
+This is useful for implementing free / paid tiers for users
 
-### 1. Define free, paid tier models on config.yaml 
+### 1. Define tags on config.yaml 
+
+- A request with `tags=["free"]` will get routed to `openai/fake`
+- A request with `tags=["paid"]`  will get routed to `openai/gpt-4o`
 
 ```yaml
 model_list:
@@ -11,18 +15,22 @@ model_list:
       model: openai/fake
       api_key: fake-key
       api_base: https://exampleopenaiendpoint-production.up.railway.app/
-      tags: ["free"]
+      tags: ["free"] # 👈 Key Change
   - model_name: gpt-4
     litellm_params:
       model: openai/gpt-4o
       api_key: os.environ/OPENAI_API_KEY
-      tags: ["paid"]
+      tags: ["paid"] # 👈 Key Change
 
+router_settings:
+  enable_tag_filtering: True # 👈 Key Change
 general_settings: 
   master_key: sk-1234 
 ```
 
-### Make Request with Key on `Free Tier`
+### 2. Make Request with `tags=["free"]`
+
+This request includes "tags": ["free"], which routes it to `openai/fake`
 
 ```shell
 curl -i http://localhost:4000/v1/chat/completions \
@@ -30,9 +38,96 @@ curl -i http://localhost:4000/v1/chat/completions \
   -H "Authorization: Bearer sk-1234" \
   -d '{
     "model": "gpt-4",
-    "metadata": {"tags": ["paid"]},
     "messages": [
       {"role": "user", "content": "Hello, Claude gm!"}
-    ]
+    ],
+    "tags": ["free"]
   }'
 ```
+**Expected Response**
+
+Expect to see the following response header when this works
+```shell
+x-litellm-model-api-base: https://exampleopenaiendpoint-production.up.railway.app/
+```
+
+Response
+```shell
+{
+ "id": "chatcmpl-33c534e3d70148218e2d62496b81270b",
+ "choices": [
+   {
+     "finish_reason": "stop",
+     "index": 0,
+     "message": {
+       "content": "\n\nHello there, how may I assist you today?",
+       "role": "assistant",
+       "tool_calls": null,
+       "function_call": null
+     }
+   }
+ ],
+ "created": 1677652288,
+ "model": "gpt-3.5-turbo-0125",
+ "object": "chat.completion",
+ "system_fingerprint": "fp_44709d6fcb",
+ "usage": {
+   "completion_tokens": 12,
+   "prompt_tokens": 9,
+   "total_tokens": 21
+ }
+}
+```
+
+
+### 3. Make Request with `tags=["paid"]`
+
+This request includes "tags": ["paid"], which routes it to `openai/gpt-4`
+
+```shell
+curl -i http://localhost:4000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer sk-1234" \
+  -d '{
+    "model": "gpt-4",
+    "messages": [
+      {"role": "user", "content": "Hello, Claude gm!"}
+    ],
+    "tags": ["paid"]
+  }'
+```
+
+**Expected Response**
+
+Expect to see the following response header when this works
+```shell
+x-litellm-model-api-base: https://api.openai.com
+```
+
+Response
+```shell
+{
+ "id": "chatcmpl-9maCcqQYTqdJrtvfakIawMOIUbEZx",
+ "choices": [
+   {
+     "finish_reason": "stop",
+     "index": 0,
+     "message": {
+       "content": "Good morning! How can I assist you today?",
+       "role": "assistant",
+       "tool_calls": null,
+       "function_call": null
+     }
+   }
+ ],
+ "created": 1721365934,
+ "model": "gpt-4o-2024-05-13",
+ "object": "chat.completion",
+ "system_fingerprint": "fp_c4e5b6fa31",
+ "usage": {
+   "completion_tokens": 10,
+   "prompt_tokens": 12,
+   "total_tokens": 22
+ }
+}
+```
\ No newline at end of file
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 81ed12c07e..f20c780cc9 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -4,14 +4,14 @@ model_list:
       model: openai/fake
       api_key: fake-key
       api_base: https://exampleopenaiendpoint-production.up.railway.app/
-      tags: ["free"]
+      tags: ["free"] # 👈 Key Change
   - model_name: gpt-4
     litellm_params:
       model: openai/gpt-4o
       api_key: os.environ/OPENAI_API_KEY
-      tags: ["paid"]
+      tags: ["paid"] # 👈 Key Change
 
+router_settings:
+  enable_tag_filtering: True # 👈 Key Change
 general_settings: 
-  master_key: sk-1234 
-
-
+  master_key: sk-1234
\ No newline at end of file