From d2ddc5aba9a020537794e1f6328d9a32a695e805 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 13:02:57 -0700 Subject: [PATCH 01/14] add test_team_tags to set / update tags --- litellm/tests/test_key_generate_prisma.py | 59 +++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index 912373024..afde12393 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -3033,3 +3033,62 @@ async def test_regenerate_api_key(prisma_client): assert new_key.key_name == f"sk-...{new_key.key[-4:]}" pass + + +@pytest.mark.asyncio() +async def test_team_tags(prisma_client): + """ + - Test setting tags on a team + - Assert this is returned when calling /team/info + - Team/update with tags should update the tags + - Assert new tags are returned when calling /team/info + """ + litellm.set_verbose = True + setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + await litellm.proxy.proxy_server.prisma_client.connect() + + _new_team = NewTeamRequest( + team_alias="test-teamA", + tags=["teamA"], + ) + + new_team_response = await new_team( + data=_new_team, + user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN), + http_request=Request(scope={"type": "http"}), + ) + + print("new_team_response", new_team_response) + + # call /team/info + team_info_response = await team_info( + team_id=new_team_response["team_id"], + user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN), + http_request=Request(scope={"type": "http"}), + ) + print("team_info_response", team_info_response) + + assert team_info_response["team_info"].metadata["tags"] == ["teamA"] + + # team update with tags + team_update_response = await update_team( + data=UpdateTeamRequest( + team_id=new_team_response["team_id"], + tags=["teamA", "teamB"], + ), + user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN), + http_request=Request(scope={"type": "http"}), + ) + + print("team_update_response", team_update_response) + + # call /team/info again + team_info_response = await team_info( + team_id=new_team_response["team_id"], + user_api_key_dict=UserAPIKeyAuth(user_role=LitellmUserRoles.PROXY_ADMIN), + http_request=Request(scope={"type": "http"}), + ) + + print("team_info_response", team_info_response) + assert team_info_response["team_info"].metadata["tags"] == ["teamA", "teamB"] From 5e590e7326fdfbd746df85e8b5c7f24a0a85c4b7 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 13:03:49 -0700 Subject: [PATCH 02/14] allow settings tags per team --- litellm/proxy/_types.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index dd038d80b..11d99b5ea 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -813,6 +813,7 @@ class TeamBase(LiteLLMBase): class NewTeamRequest(TeamBase): model_aliases: Optional[dict] = None + tags: Optional[list] = None model_config = ConfigDict(protected_namespaces=()) @@ -883,6 +884,7 @@ class UpdateTeamRequest(LiteLLMBase): models: Optional[list] = None blocked: Optional[bool] = None budget_duration: Optional[str] = None + tags: Optional[list] = None class ResetTeamBudgetRequest(LiteLLMBase): From ffeb5ce22a306cd3747d51a80836dfcfde66a8e7 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 13:05:00 -0700 Subject: [PATCH 03/14] add set / update tags for a team --- .../proxy/management_endpoints/team_endpoints.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/litellm/proxy/management_endpoints/team_endpoints.py b/litellm/proxy/management_endpoints/team_endpoints.py index 5b6885ecd..0858e6089 100644 --- a/litellm/proxy/management_endpoints/team_endpoints.py +++ b/litellm/proxy/management_endpoints/team_endpoints.py @@ -224,6 +224,13 @@ async def new_team( model_id=_model_id, ) + # Set tags on the new team + if data.tags is not None: + if complete_team_data.metadata is None: + complete_team_data.metadata = {"tags": data.tags} + else: + complete_team_data.metadata["tags"] = data.tags + # If budget_duration is set, set `budget_reset_at` if complete_team_data.budget_duration is not None: duration_s = _duration_in_seconds(duration=complete_team_data.budget_duration) @@ -365,6 +372,15 @@ async def update_team( # set the budget_reset_at in DB updated_kv["budget_reset_at"] = reset_at + # check if user is trying to update tags for team + if "tags" in updated_kv and updated_kv["tags"] is not None: + # remove tags from updated_kv + _tags = updated_kv.pop("tags") + if "metadata" in updated_kv and updated_kv["metadata"] is not None: + updated_kv["metadata"]["tags"] = _tags + else: + updated_kv["metadata"] = {"tags": _tags} + updated_kv = prisma_client.jsonify_object(data=updated_kv) team_row: Optional[ LiteLLM_TeamTable From ff429627506c06a6950d198b58e17ff1146b4409 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 13:06:03 -0700 Subject: [PATCH 04/14] add_team_based_tags_to_metadata --- litellm/proxy/litellm_pre_call_utils.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index 60052bc27..f973d614c 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -379,6 +379,12 @@ async def add_litellm_data_to_request( # unpack callback_vars in data for k, v in callback_settings_obj.callback_vars.items(): data[k] = v + # Team based tags + add_team_based_tags_to_metadata( + data=data, + _metadata_variable_name=_metadata_variable_name, + user_api_key_dict=user_api_key_dict, + ) # Guardrails move_guardrails_to_metadata( @@ -390,6 +396,24 @@ async def add_litellm_data_to_request( return data +def add_team_based_tags_to_metadata( + data: dict, + _metadata_variable_name: str, + user_api_key_dict: UserAPIKeyAuth, +): + from litellm.proxy.proxy_server import premium_user + + if premium_user is True: + if ( + user_api_key_dict.team_metadata is not None + and "tags" in user_api_key_dict.team_metadata + ): + _team_tags = user_api_key_dict.team_metadata["tags"] + _tags_in_metadata = data[_metadata_variable_name].get("tags", []) + _tags_in_metadata.extend(_team_tags) + data[_metadata_variable_name]["tags"] = _tags_in_metadata + + def move_guardrails_to_metadata( data: dict, _metadata_variable_name: str, From 2cb4882e700dfa9361051941d8656aee7e1d38f7 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 13:06:39 -0700 Subject: [PATCH 05/14] define tags on model list --- litellm/proxy/proxy_config.yaml | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index ac17a1d84..5f9f99d57 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,26 +1,21 @@ model_list: - - model_name: fake-openai-endpoint + - model_name: gpt-4 litellm_params: model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ - - model_name: Salesforce/Llama-Rank-V1 + tags: ["teamA"] # 👈 Key Change + - model_name: gpt-4 litellm_params: - model: together_ai/Salesforce/Llama-Rank-V1 - api_key: os.environ/TOGETHERAI_API_KEY - - model_name: rerank-english-v3.0 - litellm_params: - model: cohere/rerank-english-v3.0 - api_key: os.environ/COHERE_API_KEY + model: openai/gpt-4o + api_key: os.environ/OPENAI_API_KEY + tags: ["teamB"] # 👈 Key Change -general_settings: - enable_oauth2_proxy_auth: True - oauth2_config_mappings: - token: X-Auth-Token - user_id: X-Auth-Client-ID - team_id: X-Auth-Team-ID - max_budget: X-Auth-Max-Budget - models: X-Auth-Allowed-Models +router_settings: + enable_tag_filtering: True # 👈 Key Change + +general_settings: + master_key: sk-1234 # default off mode litellm_settings: From 34f1d327996bc13200ba3bc91f0705a77b74d0a5 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 13:45:24 -0700 Subject: [PATCH 06/14] add test for tag based routing --- tests/otel_tests/test_team_tag_routing.py | 121 ++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 tests/otel_tests/test_team_tag_routing.py diff --git a/tests/otel_tests/test_team_tag_routing.py b/tests/otel_tests/test_team_tag_routing.py new file mode 100644 index 000000000..56bf361a8 --- /dev/null +++ b/tests/otel_tests/test_team_tag_routing.py @@ -0,0 +1,121 @@ +# What this tests ? +## Set tags on a team and then make a request to /chat/completions +import pytest +import asyncio +import aiohttp, openai +from openai import OpenAI, AsyncOpenAI +from typing import Optional, List, Union +import uuid + +LITELLM_MASTER_KEY = "sk-1234" + + +async def chat_completion( + session, key, model: Union[str, List] = "fake-openai-endpoint" +): + url = "http://0.0.0.0:4000/chat/completions" + headers = { + "Authorization": f"Bearer {key}", + "Content-Type": "application/json", + } + data = { + "model": model, + "messages": [ + {"role": "user", "content": f"Hello! {str(uuid.uuid4())}"}, + ], + } + + async with session.post(url, headers=headers, json=data) as response: + status = response.status + response_text = await response.text() + + if status != 200: + raise Exception(response_text) + + return await response.json(), response.headers + + +async def create_team_with_tags(session, key, tags: List[str]): + url = "http://0.0.0.0:4000/team/new" + headers = { + "Authorization": f"Bearer {key}", + "Content-Type": "application/json", + } + data = { + "tags": tags, + } + + async with session.post(url, headers=headers, json=data) as response: + status = response.status + response_text = await response.text() + + if status != 200: + raise Exception(response_text) + + return await response.json() + + +async def create_key_with_team(session, key, team_id: str): + url = f"http://0.0.0.0:4000/key/generate" + headers = { + "Authorization": f"Bearer {key}", + "Content-Type": "application/json", + } + data = { + "team_id": team_id, + } + async with session.post(url, headers=headers, json=data) as response: + status = response.status + response_text = await response.text() + + if status != 200: + raise Exception(response_text) + + return await response.json() + + +async def model_info_get_call(session, key, model_id: str): + # make get call pass "litellm_model_id" in query params + url = f"http://0.0.0.0:4000/model/info?litellm_model_id={model_id}" + headers = { + "Authorization": f"Bearer {key}", + "Content-Type": "application/json", + } + async with session.get(url, headers=headers) as response: + status = response.status + response_text = await response.text() + + if status != 200: + raise Exception(response_text) + + return await response.json() + + +@pytest.mark.asyncio() +async def test_team_tag_routing(): + async with aiohttp.ClientSession() as session: + key = LITELLM_MASTER_KEY + team_a_data = await create_team_with_tags(session, key, ["teamA"]) + team_a_id = team_a_data["team_id"] + + team_b_data = await create_team_with_tags(session, key, ["teamB"]) + team_b_id = team_b_data["team_id"] + + key_with_team_a = await create_key_with_team(session, key, team_a_id) + print(key_with_team_a) + _key_with_team_a = key_with_team_a["key"] + for _ in range(5): + response_a, headers = await chat_completion(session, _key_with_team_a) + headers = dict(headers) + print(response_a) + print(headers) + assert headers["x-litellm-model-id"] == "teama", "Model ID should be teamA" + + key_with_team_b = await create_key_with_team(session, key, team_b_id) + _key_with_team_b = key_with_team_b["key"] + for _ in range(5): + response_b, headers = await chat_completion(session, _key_with_team_b) + headers = dict(headers) + print(response_b) + print(headers) + assert headers["x-litellm-model-id"] == "teamb", "Model ID should be teamB" From 84bda9cc808ba93caed0ce447996df772e8f5f0e Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 13:51:36 -0700 Subject: [PATCH 07/14] fix get_deployments_for_tag --- litellm/router_strategy/tag_based_routing.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/litellm/router_strategy/tag_based_routing.py b/litellm/router_strategy/tag_based_routing.py index 2dbc5cb93..ed350109c 100644 --- a/litellm/router_strategy/tag_based_routing.py +++ b/litellm/router_strategy/tag_based_routing.py @@ -20,9 +20,6 @@ async def get_deployments_for_tag( request_kwargs: Optional[Dict[Any, Any]] = None, healthy_deployments: Optional[Union[List[Any], Dict[Any, Any]]] = None, ): - """ - if request_kwargs contains {"metadata": {"tier": "free"}} or {"metadata": {"tier": "paid"}}, then routes the request to free/paid tier models - """ if llm_router_instance.enable_tag_filtering is not True: return healthy_deployments @@ -70,7 +67,7 @@ async def get_deployments_for_tag( ) new_healthy_deployments.append(deployment) - return new_healthy_deployments + return new_healthy_deployments verbose_logger.debug( "no tier found in metadata, returning healthy_deployments: %s", From f592aeaa3842341e194d7e375827212d92f53bf8 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 13:54:11 -0700 Subject: [PATCH 08/14] add test_chat_completion_with_no_tags --- .../example_config_yaml/otel_test_config.yaml | 11 +++++++++++ litellm/proxy/proxy_config.yaml | 18 ++++++++++++------ tests/otel_tests/test_team_tag_routing.py | 11 +++++++++++ 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/litellm/proxy/example_config_yaml/otel_test_config.yaml b/litellm/proxy/example_config_yaml/otel_test_config.yaml index 7d8f6d4fe..54d9286c9 100644 --- a/litellm/proxy/example_config_yaml/otel_test_config.yaml +++ b/litellm/proxy/example_config_yaml/otel_test_config.yaml @@ -4,6 +4,17 @@ model_list: model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["teamA"] + model_info: + id: "teama" + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["teamB"] + model_info: + id: "teamb" - model_name: rerank-english-v3.0 litellm_params: model: cohere/rerank-english-v3.0 diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 5f9f99d57..1afac20cd 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,15 +1,21 @@ model_list: - - model_name: gpt-4 + - model_name: fake-openai-endpoint litellm_params: model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ - tags: ["teamA"] # 👈 Key Change - - model_name: gpt-4 + tags: ["teamA"] + model_info: + id: "teama" + - model_name: fake-openai-endpoint litellm_params: - model: openai/gpt-4o - api_key: os.environ/OPENAI_API_KEY - tags: ["teamB"] # 👈 Key Change + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["teamB"] + model_info: + id: "teamb" + router_settings: enable_tag_filtering: True # 👈 Key Change diff --git a/tests/otel_tests/test_team_tag_routing.py b/tests/otel_tests/test_team_tag_routing.py index 56bf361a8..390e7dce6 100644 --- a/tests/otel_tests/test_team_tag_routing.py +++ b/tests/otel_tests/test_team_tag_routing.py @@ -119,3 +119,14 @@ async def test_team_tag_routing(): print(response_b) print(headers) assert headers["x-litellm-model-id"] == "teamb", "Model ID should be teamB" + + +@pytest.mark.asyncio() +async def test_chat_completion_with_no_tags(): + async with aiohttp.ClientSession() as session: + key = LITELLM_MASTER_KEY + response, headers = await chat_completion(session, key) + headers = dict(headers) + print(response) + print(headers) + assert response is not None From 944c7ac3fa0a5e36f2d539ac642998e6c1b2b08b Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 14:00:16 -0700 Subject: [PATCH 09/14] fix missing link on docs --- docs/my-website/docs/proxy/reliability.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/my-website/docs/proxy/reliability.md b/docs/my-website/docs/proxy/reliability.md index 7a2c65a90..5045ba36a 100644 --- a/docs/my-website/docs/proxy/reliability.md +++ b/docs/my-website/docs/proxy/reliability.md @@ -283,8 +283,6 @@ litellm_settings: **Covers all errors (429, 500, etc.)** -[**See Code**]() - **Set via config** ```yaml model_list: From d9433d9f9432afdc75f3362ab42227a85a7ad25c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 14:14:37 -0700 Subject: [PATCH 10/14] doc Tag Based Routing --- docs/my-website/docs/proxy/tag_routing.md | 159 +++++++++++++++++++++- 1 file changed, 157 insertions(+), 2 deletions(-) diff --git a/docs/my-website/docs/proxy/tag_routing.md b/docs/my-website/docs/proxy/tag_routing.md index c3d533564..81012c900 100644 --- a/docs/my-website/docs/proxy/tag_routing.md +++ b/docs/my-website/docs/proxy/tag_routing.md @@ -1,7 +1,11 @@ # Tag Based Routing Route requests based on tags. -This is useful for implementing free / paid tiers for users +This is useful for +- implementing free / paid tiers for users +- controlling model access per team, example Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B + +## Quick Start ### 1. Define tags on config.yaml @@ -130,4 +134,155 @@ Response "total_tokens": 22 } } -``` \ No newline at end of file +``` + +## ✨ Team based tag routing (Enterprise) + +LiteLLM Proxy supports team-based tag routing, allowing you to associate specific tags with teams and route requests accordingly. Example **Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B** + + +:::info + +This is an enterprise feature, [Contact us here to get a free trial](https://calendly.com/d/4mp-gd3-k5k/litellm-1-1-onboarding-chat) + +::: + +Here's how to set up and use team-based tag routing using curl commands: + +1. **Enable tag filtering in your proxy configuration:** + + In your `proxy_config.yaml`, ensure you have the following setting: + + ```yaml + model_list: + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["teamA"] # 👈 Key Change + model_info: + id: "team-a-model" # used for identifying model in response headers + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["teamB"] # 👈 Key Change + model_info: + id: "team-b-model" # used for identifying model in response headers + + router_settings: + enable_tag_filtering: True # 👈 Key Change + + general_settings: + master_key: sk-1234 + ``` + +2. **Create teams with tags:** + + Use the `/team/new` endpoint to create teams with specific tags: + + ```shell + # Create Team A + curl -X POST http://0.0.0.0:4000/team/new \ + -H "Authorization: Bearer sk-1234" \ + -H "Content-Type: application/json" \ + -d '{"tags": ["teamA"]}' + ``` + + ```shell + # Create Team B + curl -X POST http://0.0.0.0:4000/team/new \ + -H "Authorization: Bearer sk-1234" \ + -H "Content-Type: application/json" \ + -d '{"tags": ["teamB"]}' + ``` + + These commands will return JSON responses containing the `team_id` for each team. + +3. **Generate keys for team members:** + + Use the `/key/generate` endpoint to create keys associated with specific teams: + + ```shell + # Generate key for Team A + curl -X POST http://0.0.0.0:4000/key/generate \ + -H "Authorization: Bearer sk-1234" \ + -H "Content-Type: application/json" \ + -d '{"team_id": "team_a_id_here"}' + ``` + + ```shell + # Generate key for Team B + curl -X POST http://0.0.0.0:4000/key/generate \ + -H "Authorization: Bearer sk-1234" \ + -H "Content-Type: application/json" \ + -d '{"team_id": "team_b_id_here"}' + ``` + + Replace `team_a_id_here` and `team_b_id_here` with the actual team IDs received from step 2. + +4. **Make requests with team-specific keys:** + + When making requests to the `/chat/completions` endpoint, use the team-specific keys. The proxy will automatically route the request to the appropriate model based on the team's tags: + + ```shell + # Request using Team A's key + curl -X POST http://0.0.0.0:4000/chat/completions \ + -H "Authorization: Bearer team_a_key_here" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "fake-openai-endpoint", + "messages": [ + {"role": "user", "content": "Hello!"} + ] + }' + ``` + + ```shell + # Request using Team B's key + curl -X POST http://0.0.0.0:4000/chat/completions \ + -H "Authorization: Bearer team_b_key_here" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "fake-openai-endpoint", + "messages": [ + {"role": "user", "content": "Hello!"} + ] + }' + ``` + + Replace `team_a_key_here` and `team_b_key_here` with the actual keys generated in step 3. + +5. **Verify routing:** + + Check the `x-litellm-model-id` header in the response to confirm that the request was routed to the correct model based on the team's tags. You can use the `-i` flag with curl to include the response headers: + + Request with Team A's key (including headers) + ```shell + curl -i -X POST http://0.0.0.0:4000/chat/completions \ + -H "Authorization: Bearer team_a_key_here" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "fake-openai-endpoint", + "messages": [ + {"role": "user", "content": "Hello!"} + ] + }' + ``` + + In the response headers, you should see: + ``` + x-litellm-model-id: teama + ``` + + Similarly, when using Team B's key, you should see: + ``` + x-litellm-model-id: teamb + ``` + +By following these steps and using these curl commands, you can implement and test team-based tag routing in your LiteLLM Proxy setup, ensuring that different teams are routed to the appropriate models or deployments based on their assigned tags. + + + From 242f66054dca4db53bcec3126016f324bac7a9da Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 14:14:46 -0700 Subject: [PATCH 11/14] enable_tag_filtering --- litellm/proxy/proxy_config.yaml | 41 +++++++++++++++------------------ 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 1afac20cd..c5f736bac 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,28 +1,23 @@ model_list: - - model_name: fake-openai-endpoint - litellm_params: - model: openai/fake - api_key: fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app/ - tags: ["teamA"] - model_info: - id: "teama" - - model_name: fake-openai-endpoint - litellm_params: - model: openai/fake - api_key: fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app/ - tags: ["teamB"] - model_info: - id: "teamb" - + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["teamA"] # 👈 Key Change + model_info: + id: "team-a-model" # used for identifying model in response headers + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["teamB"] # 👈 Key Change + model_info: + id: "team-b-model" # used for identifying model in response headers router_settings: - enable_tag_filtering: True # 👈 Key Change + enable_tag_filtering: True # 👈 Key Change general_settings: - master_key: sk-1234 - -# default off mode -litellm_settings: - set_verbose: True \ No newline at end of file + master_key: sk-1234 \ No newline at end of file From 308377fbe27a092526213a006f70ec7dc94dc5dc Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 14:23:55 -0700 Subject: [PATCH 12/14] docs tag based routing per team --- docs/my-website/docs/proxy/tag_routing.md | 46 ++++------------------- 1 file changed, 8 insertions(+), 38 deletions(-) diff --git a/docs/my-website/docs/proxy/tag_routing.md b/docs/my-website/docs/proxy/tag_routing.md index 81012c900..e63a35dee 100644 --- a/docs/my-website/docs/proxy/tag_routing.md +++ b/docs/my-website/docs/proxy/tag_routing.md @@ -2,8 +2,8 @@ Route requests based on tags. This is useful for -- implementing free / paid tiers for users -- controlling model access per team, example Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B +- Implementing free / paid tiers for users +- Controlling model access per team, example Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B ## Quick Start @@ -223,39 +223,7 @@ Here's how to set up and use team-based tag routing using curl commands: Replace `team_a_id_here` and `team_b_id_here` with the actual team IDs received from step 2. -4. **Make requests with team-specific keys:** - - When making requests to the `/chat/completions` endpoint, use the team-specific keys. The proxy will automatically route the request to the appropriate model based on the team's tags: - - ```shell - # Request using Team A's key - curl -X POST http://0.0.0.0:4000/chat/completions \ - -H "Authorization: Bearer team_a_key_here" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "fake-openai-endpoint", - "messages": [ - {"role": "user", "content": "Hello!"} - ] - }' - ``` - - ```shell - # Request using Team B's key - curl -X POST http://0.0.0.0:4000/chat/completions \ - -H "Authorization: Bearer team_b_key_here" \ - -H "Content-Type: application/json" \ - -d '{ - "model": "fake-openai-endpoint", - "messages": [ - {"role": "user", "content": "Hello!"} - ] - }' - ``` - - Replace `team_a_key_here` and `team_b_key_here` with the actual keys generated in step 3. - -5. **Verify routing:** +4. **Verify routing:** Check the `x-litellm-model-id` header in the response to confirm that the request was routed to the correct model based on the team's tags. You can use the `-i` flag with curl to include the response headers: @@ -274,15 +242,17 @@ Here's how to set up and use team-based tag routing using curl commands: In the response headers, you should see: ``` - x-litellm-model-id: teama + x-litellm-model-id: team-a-model ``` Similarly, when using Team B's key, you should see: ``` - x-litellm-model-id: teamb + x-litellm-model-id: team-b-model ``` By following these steps and using these curl commands, you can implement and test team-based tag routing in your LiteLLM Proxy setup, ensuring that different teams are routed to the appropriate models or deployments based on their assigned tags. - +## Other Tag Based Features +- [Track spend per tag](cost_tracking#-custom-tags) +- [Setup Budgets per Virtual Key, Team](users) From da2cefc45acc2e4cbfc6999381377df8618f7a79 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 14:37:44 -0700 Subject: [PATCH 13/14] fix team based tag routing --- .../example_config_yaml/otel_test_config.yaml | 32 +++++++++---------- litellm/proxy/litellm_pre_call_utils.py | 24 -------------- tests/otel_tests/test_team_tag_routing.py | 8 +++-- 3 files changed, 22 insertions(+), 42 deletions(-) diff --git a/litellm/proxy/example_config_yaml/otel_test_config.yaml b/litellm/proxy/example_config_yaml/otel_test_config.yaml index 54d9286c9..36cce1198 100644 --- a/litellm/proxy/example_config_yaml/otel_test_config.yaml +++ b/litellm/proxy/example_config_yaml/otel_test_config.yaml @@ -1,20 +1,20 @@ model_list: - - model_name: fake-openai-endpoint - litellm_params: - model: openai/fake - api_key: fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app/ - tags: ["teamA"] - model_info: - id: "teama" - - model_name: fake-openai-endpoint - litellm_params: - model: openai/fake - api_key: fake-key - api_base: https://exampleopenaiendpoint-production.up.railway.app/ - tags: ["teamB"] - model_info: - id: "teamb" + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["teamA"] # 👈 Key Change + model_info: + id: "team-a-model" # used for identifying model in response headers + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["teamB"] # 👈 Key Change + model_info: + id: "team-b-model" # used for identifying model in response headers - model_name: rerank-english-v3.0 litellm_params: model: cohere/rerank-english-v3.0 diff --git a/litellm/proxy/litellm_pre_call_utils.py b/litellm/proxy/litellm_pre_call_utils.py index f973d614c..60052bc27 100644 --- a/litellm/proxy/litellm_pre_call_utils.py +++ b/litellm/proxy/litellm_pre_call_utils.py @@ -379,12 +379,6 @@ async def add_litellm_data_to_request( # unpack callback_vars in data for k, v in callback_settings_obj.callback_vars.items(): data[k] = v - # Team based tags - add_team_based_tags_to_metadata( - data=data, - _metadata_variable_name=_metadata_variable_name, - user_api_key_dict=user_api_key_dict, - ) # Guardrails move_guardrails_to_metadata( @@ -396,24 +390,6 @@ async def add_litellm_data_to_request( return data -def add_team_based_tags_to_metadata( - data: dict, - _metadata_variable_name: str, - user_api_key_dict: UserAPIKeyAuth, -): - from litellm.proxy.proxy_server import premium_user - - if premium_user is True: - if ( - user_api_key_dict.team_metadata is not None - and "tags" in user_api_key_dict.team_metadata - ): - _team_tags = user_api_key_dict.team_metadata["tags"] - _tags_in_metadata = data[_metadata_variable_name].get("tags", []) - _tags_in_metadata.extend(_team_tags) - data[_metadata_variable_name]["tags"] = _tags_in_metadata - - def move_guardrails_to_metadata( data: dict, _metadata_variable_name: str, diff --git a/tests/otel_tests/test_team_tag_routing.py b/tests/otel_tests/test_team_tag_routing.py index 390e7dce6..6c7d9b450 100644 --- a/tests/otel_tests/test_team_tag_routing.py +++ b/tests/otel_tests/test_team_tag_routing.py @@ -109,7 +109,9 @@ async def test_team_tag_routing(): headers = dict(headers) print(response_a) print(headers) - assert headers["x-litellm-model-id"] == "teama", "Model ID should be teamA" + assert ( + headers["x-litellm-model-id"] == "team-a-model" + ), "Model ID should be teamA" key_with_team_b = await create_key_with_team(session, key, team_b_id) _key_with_team_b = key_with_team_b["key"] @@ -118,7 +120,9 @@ async def test_team_tag_routing(): headers = dict(headers) print(response_b) print(headers) - assert headers["x-litellm-model-id"] == "teamb", "Model ID should be teamB" + assert ( + headers["x-litellm-model-id"] == "team-b-model" + ), "Model ID should be teamB" @pytest.mark.asyncio() From a4b88c16dc79ab898c48bbd993ec9c115916ed31 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Thu, 29 Aug 2024 17:01:23 -0700 Subject: [PATCH 14/14] fix indentation --- .../example_config_yaml/otel_test_config.yaml | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/litellm/proxy/example_config_yaml/otel_test_config.yaml b/litellm/proxy/example_config_yaml/otel_test_config.yaml index 36cce1198..80f24bfea 100644 --- a/litellm/proxy/example_config_yaml/otel_test_config.yaml +++ b/litellm/proxy/example_config_yaml/otel_test_config.yaml @@ -4,21 +4,22 @@ model_list: model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ - tags: ["teamA"] # 👈 Key Change + tags: ["teamA"] model_info: - id: "team-a-model" # used for identifying model in response headers + id: "team-a-model" - model_name: fake-openai-endpoint litellm_params: model: openai/fake api_key: fake-key api_base: https://exampleopenaiendpoint-production.up.railway.app/ - tags: ["teamB"] # 👈 Key Change + tags: ["teamB"] model_info: - id: "team-b-model" # used for identifying model in response headers - - model_name: rerank-english-v3.0 - litellm_params: - model: cohere/rerank-english-v3.0 - api_key: os.environ/COHERE_API_KEY + id: "team-b-model" + - model_name: rerank-english-v3.0 # Fixed indentation here + litellm_params: + model: cohere/rerank-english-v3.0 + api_key: os.environ/COHERE_API_KEY + litellm_settings: cache: true