From cd40d5854405636baa6790ca679c059e6dcf0e64 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 18 Jul 2024 19:22:09 -0700 Subject: [PATCH 1/9] router - refactor to tag based routing --- litellm/router.py | 6 +- litellm/router_strategy/free_paid_tiers.py | 69 ------------------- litellm/router_strategy/tag_based_routing.py | 68 ++++++++++++++++++ ...er_tiers.py => test_router_tag_routing.py} | 10 +-- litellm/types/router.py | 4 ++ 5 files changed, 81 insertions(+), 76 deletions(-) delete mode 100644 litellm/router_strategy/free_paid_tiers.py create mode 100644 litellm/router_strategy/tag_based_routing.py rename litellm/tests/{test_router_tiers.py => test_router_tag_routing.py} (89%) diff --git a/litellm/router.py b/litellm/router.py index 487d5fd6a4..44c02f1266 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -47,12 +47,12 @@ from litellm.assistants.main import AssistantDeleted from litellm.caching import DualCache, InMemoryCache, RedisCache from litellm.integrations.custom_logger import CustomLogger from litellm.llms.azure import get_azure_ad_token_from_oidc -from litellm.router_strategy.free_paid_tiers import get_deployments_for_tier from litellm.router_strategy.least_busy import LeastBusyLoggingHandler from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2 +from litellm.router_strategy.tag_based_routing import get_deployments_for_tag from litellm.router_utils.client_initalization_utils import ( set_client, should_initialize_sync_client, @@ -4482,8 +4482,8 @@ class Router: request_kwargs=request_kwargs, ) - # check free / paid tier for each deployment - healthy_deployments = await get_deployments_for_tier( + # check if user wants to do tag based routing + healthy_deployments = await get_deployments_for_tag( request_kwargs=request_kwargs, healthy_deployments=healthy_deployments, ) diff --git a/litellm/router_strategy/free_paid_tiers.py b/litellm/router_strategy/free_paid_tiers.py deleted file mode 100644 index 82e38b4f53..0000000000 --- a/litellm/router_strategy/free_paid_tiers.py +++ /dev/null @@ -1,69 +0,0 @@ -""" -Use this to route requests between free and paid tiers -""" - -from typing import Any, Dict, List, Literal, Optional, TypedDict, Union, cast - -from litellm._logging import verbose_logger -from litellm.types.router import DeploymentTypedDict - - -class ModelInfo(TypedDict): - tier: Literal["free", "paid"] - - -class Deployment(TypedDict): - model_info: ModelInfo - - -async def get_deployments_for_tier( - request_kwargs: Optional[Dict[Any, Any]] = None, - healthy_deployments: Optional[Union[List[Any], Dict[Any, Any]]] = None, -): - """ - if request_kwargs contains {"metadata": {"tier": "free"}} or {"metadata": {"tier": "paid"}}, then routes the request to free/paid tier models - """ - if request_kwargs is None: - verbose_logger.debug( - "get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s", - healthy_deployments, - ) - return healthy_deployments - - verbose_logger.debug("request metadata: %s", request_kwargs.get("metadata")) - if "metadata" in request_kwargs: - metadata = request_kwargs["metadata"] - if "tier" in metadata: - selected_tier: Literal["free", "paid"] = metadata["tier"] - if healthy_deployments is None: - return None - - if selected_tier == "free": - # get all deployments where model_info has tier = free - free_deployments: List[Any] = [] - verbose_logger.debug( - "Getting deployments in free tier, all_deployments: %s", - healthy_deployments, - ) - for deployment in healthy_deployments: - typed_deployment = cast(Deployment, deployment) - if typed_deployment["model_info"]["tier"] == "free": - free_deployments.append(deployment) - verbose_logger.debug("free_deployments: %s", free_deployments) - return free_deployments - - elif selected_tier == "paid": - # get all deployments where model_info has tier = paid - paid_deployments: List[Any] = [] - for deployment in healthy_deployments: - typed_deployment = cast(Deployment, deployment) - if typed_deployment["model_info"]["tier"] == "paid": - paid_deployments.append(deployment) - verbose_logger.debug("paid_deployments: %s", paid_deployments) - return paid_deployments - - verbose_logger.debug( - "no tier found in metadata, returning healthy_deployments: %s", - healthy_deployments, - ) - return healthy_deployments diff --git a/litellm/router_strategy/tag_based_routing.py b/litellm/router_strategy/tag_based_routing.py new file mode 100644 index 0000000000..11bad19a33 --- /dev/null +++ b/litellm/router_strategy/tag_based_routing.py @@ -0,0 +1,68 @@ +""" +Use this to route requests between free and paid tiers +""" + +from typing import Any, Dict, List, Literal, Optional, TypedDict, Union, cast + +from litellm._logging import verbose_logger +from litellm.types.router import DeploymentTypedDict + + +async def get_deployments_for_tag( + request_kwargs: Optional[Dict[Any, Any]] = None, + healthy_deployments: Optional[Union[List[Any], Dict[Any, Any]]] = None, +): + """ + if request_kwargs contains {"metadata": {"tier": "free"}} or {"metadata": {"tier": "paid"}}, then routes the request to free/paid tier models + """ + if request_kwargs is None: + verbose_logger.debug( + "get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s", + healthy_deployments, + ) + return healthy_deployments + + if healthy_deployments is None: + verbose_logger.debug( + "get_deployments_for_tier: healthy_deployments is None returning healthy_deployments" + ) + return healthy_deployments + + verbose_logger.debug("request metadata: %s", request_kwargs.get("metadata")) + if "metadata" in request_kwargs: + metadata = request_kwargs["metadata"] + request_tags = metadata.get("tags") + + new_healthy_deployments = [] + if request_tags: + verbose_logger.debug("parameter routing: router_keys: %s", request_tags) + # example this can be router_keys=["free", "custom"] + # get all deployments that have a superset of these router keys + for deployment in healthy_deployments: + deployment_litellm_params = deployment.get("litellm_params") + deployment_tags = deployment_litellm_params.get("tags") + + verbose_logger.debug( + "deployment: %s, deployment_router_keys: %s", + deployment, + deployment_tags, + ) + + if deployment_tags is None: + continue + + if set(request_tags).issubset(set(deployment_tags)): + verbose_logger.debug( + "adding deployment with tags: %s, request tags: %s", + deployment_tags, + request_tags, + ) + new_healthy_deployments.append(deployment) + + return new_healthy_deployments + + verbose_logger.debug( + "no tier found in metadata, returning healthy_deployments: %s", + healthy_deployments, + ) + return healthy_deployments diff --git a/litellm/tests/test_router_tiers.py b/litellm/tests/test_router_tag_routing.py similarity index 89% rename from litellm/tests/test_router_tiers.py rename to litellm/tests/test_router_tag_routing.py index 54e67ded3f..feb67c0e93 100644 --- a/litellm/tests/test_router_tiers.py +++ b/litellm/tests/test_router_tag_routing.py @@ -45,16 +45,18 @@ async def test_router_free_paid_tier(): "litellm_params": { "model": "gpt-4o", "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", + "tags": ["free"], }, - "model_info": {"tier": "paid", "id": "very-expensive-model"}, + "model_info": {"id": "very-cheap-model"}, }, { "model_name": "gpt-4", "litellm_params": { "model": "gpt-4o-mini", "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", + "tags": ["paid"], }, - "model_info": {"tier": "free", "id": "very-cheap-model"}, + "model_info": {"id": "very-expensive-model"}, }, ] ) @@ -64,7 +66,7 @@ async def test_router_free_paid_tier(): response = await router.acompletion( model="gpt-4", messages=[{"role": "user", "content": "Tell me a joke."}], - metadata={"tier": "free"}, + metadata={"tags": ["free"]}, ) print("Response: ", response) @@ -79,7 +81,7 @@ async def test_router_free_paid_tier(): response = await router.acompletion( model="gpt-4", messages=[{"role": "user", "content": "Tell me a joke."}], - metadata={"tier": "paid"}, + metadata={"tags": ["paid"]}, ) print("Response: ", response) diff --git a/litellm/types/router.py b/litellm/types/router.py index df9947c26e..78dfbc4c19 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -325,6 +325,10 @@ class LiteLLMParamsTypedDict(TypedDict, total=False): ## MOCK RESPONSES ## mock_response: Optional[Union[str, ModelResponse, Exception]] + # routing params + # use this for tag-based routing + tags: Optional[List[str]] + class DeploymentTypedDict(TypedDict): model_name: str From 52682ea0ccd13f7011eae6fb5af8b003d67e945b Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 18 Jul 2024 19:24:13 -0700 Subject: [PATCH 2/9] fix remove previous code on free/paid tier --- litellm/proxy/litellm_pre_call_utils.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 283f31e3c0..e0e875308e 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -132,15 +132,6 @@ async def add_litellm_data_to_request( for k, v in key_metadata["cache"].items(): if k in SupportedCacheControls: data["cache"][k] = v - if "tier" in key_metadata: - if premium_user is not True: - verbose_logger.warning( - "Trying to use free/paid tier feature. This will not be applied %s", - CommonProxyErrors.not_premium_user.value, - ) - - # add request tier to metadata - data[_metadata_variable_name]["tier"] = key_metadata["tier"] # Team spend, budget - used by prometheus.py data[_metadata_variable_name][ From 8d227680c75f2049fca842dc2853204ecb8b92b1 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 18 Jul 2024 19:34:45 -0700 Subject: [PATCH 3/9] fix use tags as a litellm param --- litellm/main.py | 5 +++++ litellm/proxy/proxy_config.yaml | 6 ++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/litellm/main.py b/litellm/main.py index e01603b7e7..d7e01c107f 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -735,6 +735,7 @@ def completion( ] litellm_params = [ "metadata", + "tags", "acompletion", "atext_completion", "text_completion", @@ -3150,6 +3151,7 @@ def embedding( "allowed_model_region", "model_config", "cooldown_time", + "tags", ] default_params = openai_params + litellm_params non_default_params = { @@ -4379,6 +4381,8 @@ def transcription( proxy_server_request = kwargs.get("proxy_server_request", None) model_info = kwargs.get("model_info", None) metadata = kwargs.get("metadata", {}) + tags = kwargs.pop("tags", []) + drop_params = kwargs.get("drop_params", None) client: Optional[ Union[ @@ -4551,6 +4555,7 @@ def speech( ) -> HttpxBinaryResponseContent: model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider(model=model, custom_llm_provider=custom_llm_provider, api_base=api_base) # type: ignore + tags = kwargs.pop("tags", []) optional_params = {} if response_format is not None: diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 7e78cf3177..81ed12c07e 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -4,14 +4,12 @@ model_list: model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ - model_info: - tier: free # 👈 Key Change - set `tier` + tags: ["free"] - model_name: gpt-4 litellm_params: model: openai/gpt-4o api_key: os.environ/OPENAI_API_KEY - model_info: - tier: paid # 👈 Key Change - set `tier` + tags: ["paid"] general_settings: master_key: sk-1234 From d1a4246d2b7e965b3965172e80c7494045dea421 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 18 Jul 2024 19:39:04 -0700 Subject: [PATCH 4/9] control using enable_tag_filtering --- litellm/router.py | 3 + litellm/router_strategy/tag_based_routing.py | 13 ++++- litellm/tests/test_litellm_pre_call_utils.py | 60 -------------------- 3 files changed, 15 insertions(+), 61 deletions(-) delete mode 100644 litellm/tests/test_litellm_pre_call_utils.py diff --git a/litellm/router.py b/litellm/router.py index 44c02f1266..0e693e188f 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -145,6 +145,7 @@ class Router: content_policy_fallbacks: List = [], model_group_alias: Optional[dict] = {}, enable_pre_call_checks: bool = False, + enable_tag_filtering: bool = False, retry_after: int = 0, # min time to wait before retrying a failed request retry_policy: Optional[ RetryPolicy @@ -246,6 +247,7 @@ class Router: self.set_verbose = set_verbose self.debug_level = debug_level self.enable_pre_call_checks = enable_pre_call_checks + self.enable_tag_filtering = enable_tag_filtering if self.set_verbose == True: if debug_level == "INFO": verbose_router_logger.setLevel(logging.INFO) @@ -4484,6 +4486,7 @@ class Router: # check if user wants to do tag based routing healthy_deployments = await get_deployments_for_tag( + llm_router_instance=self, request_kwargs=request_kwargs, healthy_deployments=healthy_deployments, ) diff --git a/litellm/router_strategy/tag_based_routing.py b/litellm/router_strategy/tag_based_routing.py index 11bad19a33..2dbc5cb93b 100644 --- a/litellm/router_strategy/tag_based_routing.py +++ b/litellm/router_strategy/tag_based_routing.py @@ -2,19 +2,30 @@ Use this to route requests between free and paid tiers """ -from typing import Any, Dict, List, Literal, Optional, TypedDict, Union, cast +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union from litellm._logging import verbose_logger from litellm.types.router import DeploymentTypedDict +if TYPE_CHECKING: + from litellm.router import Router as _Router + + LitellmRouter = _Router +else: + LitellmRouter = Any + async def get_deployments_for_tag( + llm_router_instance: LitellmRouter, request_kwargs: Optional[Dict[Any, Any]] = None, healthy_deployments: Optional[Union[List[Any], Dict[Any, Any]]] = None, ): """ if request_kwargs contains {"metadata": {"tier": "free"}} or {"metadata": {"tier": "paid"}}, then routes the request to free/paid tier models """ + if llm_router_instance.enable_tag_filtering is not True: + return healthy_deployments + if request_kwargs is None: verbose_logger.debug( "get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s", diff --git a/litellm/tests/test_litellm_pre_call_utils.py b/litellm/tests/test_litellm_pre_call_utils.py deleted file mode 100644 index 7f56d693d9..0000000000 --- a/litellm/tests/test_litellm_pre_call_utils.py +++ /dev/null @@ -1,60 +0,0 @@ -""" -Tests litellm pre_call_utils -""" - -import os -import sys -import traceback -import uuid -from datetime import datetime - -from dotenv import load_dotenv -from fastapi import Request -from fastapi.routing import APIRoute - -from litellm.proxy._types import UserAPIKeyAuth -from litellm.proxy.litellm_pre_call_utils import add_litellm_data_to_request -from litellm.proxy.proxy_server import ProxyConfig, chat_completion - -load_dotenv() -import io -import os -import time - -import pytest - -# this file is to test litellm/proxy - -sys.path.insert( - 0, os.path.abspath("../..") -) # Adds the parent directory to the system path - - -@pytest.mark.parametrize("tier", ["free", "paid"]) -@pytest.mark.asyncio() -async def test_adding_key_tier_to_request_metadata(tier): - """ - Tests if we can add tier: free/paid from key metadata to the request metadata - """ - data = {} - - api_route = APIRoute(path="/chat/completions", endpoint=chat_completion) - request = Request( - { - "type": "http", - "method": "POST", - "route": api_route, - "path": api_route.path, - "headers": [], - } - ) - new_data = await add_litellm_data_to_request( - data=data, - request=request, - user_api_key_dict=UserAPIKeyAuth(metadata={"tier": tier}), - proxy_config=ProxyConfig(), - ) - - print("new_data", new_data) - - assert new_data["metadata"]["tier"] == tier From c2856c6e8fee1caae1cdd4074cc136ac00e89323 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 18 Jul 2024 20:10:45 -0700 Subject: [PATCH 5/9] check if using tag based routing --- litellm/proxy/litellm_pre_call_utils.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index e0e875308e..e6bce53928 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -75,7 +75,7 @@ async def add_litellm_data_to_request( dict: The modified data dictionary. """ - from litellm.proxy.proxy_server import premium_user + from litellm.proxy.proxy_server import llm_router, premium_user safe_add_api_version_from_query_params(data, request) @@ -166,7 +166,8 @@ async def add_litellm_data_to_request( if user_api_key_dict.allowed_model_region is not None: data["allowed_model_region"] = user_api_key_dict.allowed_model_region - ## [Enterprise Only] Add User-IP Address + ## [Enterprise Only] + # Add User-IP Address requester_ip_address = "" if premium_user is True: # Only set the IP Address for Enterprise Users @@ -179,6 +180,15 @@ async def add_litellm_data_to_request( requester_ip_address = request.client.host data[_metadata_variable_name]["requester_ip_address"] = requester_ip_address + # Enterprise Only - Check if using tag based routing + if llm_router and llm_router.enable_tag_filtering is True: + if premium_user is not True: + verbose_proxy_logger.warning( + "router.enable_tag_filtering is on %s \n switched off router.enable_tag_filtering", + CommonProxyErrors.not_premium_user.value, + ) + llm_router.enable_tag_filtering = False + ### TEAM-SPECIFIC PARAMS ### if user_api_key_dict.team_id is not None: team_config = await proxy_config.load_team_config( From 92f9bdcb941bd023923203de3e0188428df272e0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 18 Jul 2024 21:48:24 -0700 Subject: [PATCH 6/9] rename doc --- docs/my-website/docs/proxy/free_paid_tier.md | 102 ------------------- docs/my-website/docs/proxy/tag_routing.md | 38 +++++++ docs/my-website/sidebars.js | 2 +- 3 files changed, 39 insertions(+), 103 deletions(-) delete mode 100644 docs/my-website/docs/proxy/free_paid_tier.md create mode 100644 docs/my-website/docs/proxy/tag_routing.md diff --git a/docs/my-website/docs/proxy/free_paid_tier.md b/docs/my-website/docs/proxy/free_paid_tier.md deleted file mode 100644 index 01230e1f01..0000000000 --- a/docs/my-website/docs/proxy/free_paid_tier.md +++ /dev/null @@ -1,102 +0,0 @@ -# 💸 Free, Paid Tier Routing - -Route Virtual Keys on `free tier` to cheaper models - -### 1. Define free, paid tier models on config.yaml - -:::info -Requests with `model=gpt-4` will be routed to either `openai/fake` or `openai/gpt-4o` depending on which tier the virtual key is on -::: - -```yaml -model_list: - - model_name: gpt-4 - litellm_params: - model: openai/fake - api_key: fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app/ - model_info: - tier: free # 👈 Key Change - set `tier to paid or free` - - model_name: gpt-4 - litellm_params: - model: openai/gpt-4o - api_key: os.environ/OPENAI_API_KEY - model_info: - tier: paid # 👈 Key Change - set `tier to paid or free` - -general_settings: - master_key: sk-1234 -``` - -### 2. Create Virtual Keys with pricing `tier=free` - -```shell -curl --location 'http://0.0.0.0:4000/key/generate' \ - --header 'Authorization: Bearer sk-1234' \ - --header 'Content-Type: application/json' \ - --data '{ - "metadata": {"tier": "free"} -}' -``` - -### 3. Make Request with Key on `Free Tier` - -```shell -curl -i http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ -curl -i http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-inxzoSurQsjog9gPrVOCcA" \ - -d '{ - "model": "gpt-4", - "messages": [ - {"role": "user", "content": "Hello, Claude gm!"} - ] - }' -``` - -**Expected Response** - -If this worked as expected then `x-litellm-model-api-base` should be `https://exampleopenaiendpoint-production.up.railway.app/` in the response headers - -```shell -x-litellm-model-api-base: https://exampleopenaiendpoint-production.up.railway.app/ - -{"id":"chatcmpl-657b750f581240c1908679ed94b31bfe","choices":[{"finish_reason":"stop","index":0,"message":{"content":"\n\nHello there, how may I assist you today?","role":"assistant","tool_calls":null,"function_call":null}}],"created":1677652288,"model":"gpt-3.5-turbo-0125","object":"chat.completion","system_fingerprint":"fp_44709d6fcb","usage":{"completion_tokens":12,"prompt_tokens":9,"total_tokens":21}}% -``` - - -### 4. Create Virtual Keys with pricing `tier=paid` - -```shell -curl --location 'http://0.0.0.0:4000/key/generate' \ - --header 'Authorization: Bearer sk-1234' \ - --header 'Content-Type: application/json' \ - --data '{ - "metadata": {"tier": "paid"} - }' -``` - -### 5. Make Request with Key on `Paid Tier` - -```shell -curl -i http://localhost:4000/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer sk-mnJoeSc6jFjzZr256q-iqA" \ - -d '{ - "model": "gpt-4", - "messages": [ - {"role": "user", "content": "Hello, Claude gm!"} - ] - }' -``` - -**Expected Response** - -If this worked as expected then `x-litellm-model-api-base` should be `https://api.openai.com` in the response headers - -```shell -x-litellm-model-api-base: https://api.openai.com - -{"id":"chatcmpl-9mW75EbJCgwmLcO0M5DmwxpiBgWdc","choices":[{"finish_reason":"stop","index":0,"message":{"content":"Good morning! How can I assist you today?","role":"assistant","tool_calls":null,"function_call":null}}],"created":1721350215,"model":"gpt-4o-2024-05-13","object":"chat.completion","system_fingerprint":"fp_c4e5b6fa31","usage":{"completion_tokens":10,"prompt_tokens":12,"total_tokens":22}} -``` diff --git a/docs/my-website/docs/proxy/tag_routing.md b/docs/my-website/docs/proxy/tag_routing.md new file mode 100644 index 0000000000..c33bce315f --- /dev/null +++ b/docs/my-website/docs/proxy/tag_routing.md @@ -0,0 +1,38 @@ +# 💸 Tag Based Routing + +Route requests based on tags + +### 1. Define free, paid tier models on config.yaml + +```yaml +model_list: + - model_name: gpt-4 + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["free"] + - model_name: gpt-4 + litellm_params: + model: openai/gpt-4o + api_key: os.environ/OPENAI_API_KEY + tags: ["paid"] + +general_settings: + master_key: sk-1234 +``` + +### Make Request with Key on `Free Tier` + +```shell +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gpt-4", + "metadata": {"tags": ["paid"]}, + "messages": [ + {"role": "user", "content": "Hello, Claude gm!"} + ] + }' +``` diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index a74543c871..8fd83b3db2 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -44,7 +44,7 @@ const sidebars = { "proxy/cost_tracking", "proxy/self_serve", "proxy/virtual_keys", - "proxy/free_paid_tier", + "proxy/tag_routing", "proxy/users", "proxy/team_budgets", "proxy/customers", From ae8a65dc54e7ae8241fac6681080dc1dfc8497b7 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 18 Jul 2024 21:49:36 -0700 Subject: [PATCH 7/9] fix test --- litellm/tests/test_router_tag_routing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/litellm/tests/test_router_tag_routing.py b/litellm/tests/test_router_tag_routing.py index feb67c0e93..67f100d794 100644 --- a/litellm/tests/test_router_tag_routing.py +++ b/litellm/tests/test_router_tag_routing.py @@ -58,7 +58,8 @@ async def test_router_free_paid_tier(): }, "model_info": {"id": "very-expensive-model"}, }, - ] + ], + enable_tag_filtering=True, ) for _ in range(5): From 5aa7430d8055b3c1fcbcb2f996268fd97ad6219b Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 18 Jul 2024 21:55:53 -0700 Subject: [PATCH 8/9] add tags to metadata --- litellm/proxy/litellm_pre_call_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index e6bce53928..1014a325ab 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -188,6 +188,9 @@ async def add_litellm_data_to_request( CommonProxyErrors.not_premium_user.value, ) llm_router.enable_tag_filtering = False + else: + if "tags" in data: + data[_metadata_variable_name]["tags"] = data["tags"] ### TEAM-SPECIFIC PARAMS ### if user_api_key_dict.team_id is not None: From 90a169f16fbd7ae7a3e44f0316ae0ac85c196ab7 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 18 Jul 2024 22:18:10 -0700 Subject: [PATCH 9/9] docs - tag based routing --- docs/my-website/docs/proxy/tag_routing.md | 109 ++++++++++++++++++++-- litellm/proxy/proxy_config.yaml | 10 +- 2 files changed, 107 insertions(+), 12 deletions(-) diff --git a/docs/my-website/docs/proxy/tag_routing.md b/docs/my-website/docs/proxy/tag_routing.md index c33bce315f..763d509185 100644 --- a/docs/my-website/docs/proxy/tag_routing.md +++ b/docs/my-website/docs/proxy/tag_routing.md @@ -1,8 +1,12 @@ # 💸 Tag Based Routing -Route requests based on tags +Route requests based on tags. +This is useful for implementing free / paid tiers for users -### 1. Define free, paid tier models on config.yaml +### 1. Define tags on config.yaml + +- A request with `tags=["free"]` will get routed to `openai/fake` +- A request with `tags=["paid"]` will get routed to `openai/gpt-4o` ```yaml model_list: @@ -11,18 +15,22 @@ model_list: model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ - tags: ["free"] + tags: ["free"] # 👈 Key Change - model_name: gpt-4 litellm_params: model: openai/gpt-4o api_key: os.environ/OPENAI_API_KEY - tags: ["paid"] + tags: ["paid"] # 👈 Key Change +router_settings: + enable_tag_filtering: True # 👈 Key Change general_settings: master_key: sk-1234 ``` -### Make Request with Key on `Free Tier` +### 2. Make Request with `tags=["free"]` + +This request includes "tags": ["free"], which routes it to `openai/fake` ```shell curl -i http://localhost:4000/v1/chat/completions \ @@ -30,9 +38,96 @@ curl -i http://localhost:4000/v1/chat/completions \ -H "Authorization: Bearer sk-1234" \ -d '{ "model": "gpt-4", - "metadata": {"tags": ["paid"]}, "messages": [ {"role": "user", "content": "Hello, Claude gm!"} - ] + ], + "tags": ["free"] }' ``` +**Expected Response** + +Expect to see the following response header when this works +```shell +x-litellm-model-api-base: https://exampleopenaiendpoint-production.up.railway.app/ +``` + +Response +```shell +{ + "id": "chatcmpl-33c534e3d70148218e2d62496b81270b", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": "\n\nHello there, how may I assist you today?", + "role": "assistant", + "tool_calls": null, + "function_call": null + } + } + ], + "created": 1677652288, + "model": "gpt-3.5-turbo-0125", + "object": "chat.completion", + "system_fingerprint": "fp_44709d6fcb", + "usage": { + "completion_tokens": 12, + "prompt_tokens": 9, + "total_tokens": 21 + } +} +``` + + +### 3. Make Request with `tags=["paid"]` + +This request includes "tags": ["paid"], which routes it to `openai/gpt-4` + +```shell +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "gpt-4", + "messages": [ + {"role": "user", "content": "Hello, Claude gm!"} + ], + "tags": ["paid"] + }' +``` + +**Expected Response** + +Expect to see the following response header when this works +```shell +x-litellm-model-api-base: https://api.openai.com +``` + +Response +```shell +{ + "id": "chatcmpl-9maCcqQYTqdJrtvfakIawMOIUbEZx", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "content": "Good morning! How can I assist you today?", + "role": "assistant", + "tool_calls": null, + "function_call": null + } + } + ], + "created": 1721365934, + "model": "gpt-4o-2024-05-13", + "object": "chat.completion", + "system_fingerprint": "fp_c4e5b6fa31", + "usage": { + "completion_tokens": 10, + "prompt_tokens": 12, + "total_tokens": 22 + } +} +``` \ No newline at end of file diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 81ed12c07e..f20c780cc9 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -4,14 +4,14 @@ model_list: model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ - tags: ["free"] + tags: ["free"] # 👈 Key Change - model_name: gpt-4 litellm_params: model: openai/gpt-4o api_key: os.environ/OPENAI_API_KEY - tags: ["paid"] + tags: ["paid"] # 👈 Key Change +router_settings: + enable_tag_filtering: True # 👈 Key Change general_settings: - master_key: sk-1234 - - + master_key: sk-1234 \ No newline at end of file