From a1f0df3cea4020f76aa090702492f8d1e9bd11a9 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 9 Sep 2024 14:00:17 -0700 Subject: [PATCH 1/6] fix debug statements --- litellm/proxy/proxy_config.yaml | 30 ++++++++++---------- litellm/router_strategy/tag_based_routing.py | 4 +-- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 71a356b80..f2f4e86ed 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -1,22 +1,22 @@ model_list: - model_name: openai/* litellm_params: - model: gpt-3.5-turbo + model: openai/* api_key: os.environ/OPENAI_API_KEY + model_info: + id: "good-openai" + - model_name: openai/* + litellm_params: + model: openai/* + api_key: os.environ/non-exsitent-env-var + tags: ["bad-model"] + model_info: + id: "test-openai" + + +router_settings: + enable_tag_filtering: True # 👈 Key Chang litellm_settings: success_callback: ["prometheus"] - failure_callback: ["prometheus"] - -guardrails: - - guardrail_name: "presidio-pre-guard" - litellm_params: - guardrail: presidio # supported values: "aporia", "lakera", "presidio" - mode: "pre_call" # pre_call, during_call, post_call - output_parse_pii: True - -general_settings: - master_key: sk-1234 - alerting: ["slack"] - spend_report_frequency: "1d" - + failure_callback: ["prometheus"] \ No newline at end of file diff --git a/litellm/router_strategy/tag_based_routing.py b/litellm/router_strategy/tag_based_routing.py index ed350109c..2ffec522f 100644 --- a/litellm/router_strategy/tag_based_routing.py +++ b/litellm/router_strategy/tag_based_routing.py @@ -25,14 +25,14 @@ async def get_deployments_for_tag( if request_kwargs is None: verbose_logger.debug( - "get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s", + "get_deployments_for_tag: request_kwargs is None returning healthy_deployments: %s", healthy_deployments, ) return healthy_deployments if healthy_deployments is None: verbose_logger.debug( - "get_deployments_for_tier: healthy_deployments is None returning healthy_deployments" + "get_deployments_for_tag: healthy_deployments is None returning healthy_deployments" ) return healthy_deployments From f1d0045ae6f3d9591b70fd7af59eff808c5d0eae Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 9 Sep 2024 14:11:54 -0700 Subject: [PATCH 2/6] fix taf based routing debugging --- litellm/router_strategy/tag_based_routing.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/litellm/router_strategy/tag_based_routing.py b/litellm/router_strategy/tag_based_routing.py index 2ffec522f..5286751c5 100644 --- a/litellm/router_strategy/tag_based_routing.py +++ b/litellm/router_strategy/tag_based_routing.py @@ -43,7 +43,9 @@ async def get_deployments_for_tag( new_healthy_deployments = [] if request_tags: - verbose_logger.debug("parameter routing: router_keys: %s", request_tags) + verbose_logger.debug( + "get_deployments_for_tag routing: router_keys: %s", request_tags + ) # example this can be router_keys=["free", "custom"] # get all deployments that have a superset of these router keys for deployment in healthy_deployments: From c4052ee7d7757451d132f634986baab654e1c6b4 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 9 Sep 2024 14:23:17 -0700 Subject: [PATCH 3/6] support default deployments --- litellm/router_strategy/tag_based_routing.py | 23 +++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/litellm/router_strategy/tag_based_routing.py b/litellm/router_strategy/tag_based_routing.py index 5286751c5..78bc5e4f9 100644 --- a/litellm/router_strategy/tag_based_routing.py +++ b/litellm/router_strategy/tag_based_routing.py @@ -1,5 +1,9 @@ """ -Use this to route requests between free and paid tiers +Use this to route requests between Teams + +- If tags in request is a subset of tags in deployment, return deployment +- if deployments are set with default tags, return all default deployment +- If no default_deployments are set, return all deployments """ from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union @@ -68,9 +72,26 @@ async def get_deployments_for_tag( request_tags, ) new_healthy_deployments.append(deployment) + elif "default" in deployment_tags: + verbose_logger.debug( + "adding default deployment with tags: %s, request tags: %s", + deployment_tags, + request_tags, + ) + new_healthy_deployments.append(deployment) return new_healthy_deployments + # for Untagged requests use default deployments if set + _default_deployments_with_tags = [] + for deployment in healthy_deployments: + if "default" in deployment.get("litellm_params", {}).get("tags", []): + _default_deployments_with_tags.append(deployment) + + if len(_default_deployments_with_tags) > 0: + return _default_deployments_with_tags + + # if no default deployment is found, return healthy_deployments verbose_logger.debug( "no tier found in metadata, returning healthy_deployments: %s", healthy_deployments, From fe7ab3f3d7c4f4e52c382732ed39c7871a4799b0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 9 Sep 2024 14:27:52 -0700 Subject: [PATCH 4/6] test test_default_tagged_deployments --- litellm/tests/test_router_tag_routing.py | 61 ++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/litellm/tests/test_router_tag_routing.py b/litellm/tests/test_router_tag_routing.py index 67f100d79..4da109a13 100644 --- a/litellm/tests/test_router_tag_routing.py +++ b/litellm/tests/test_router_tag_routing.py @@ -91,3 +91,64 @@ async def test_router_free_paid_tier(): print("response_extra_info: ", response_extra_info) assert response_extra_info["model_id"] == "very-expensive-model" + + +@pytest.mark.asyncio() +async def test_default_tagged_deployments(): + """ + - only use default deployment for untagged requests + - if a request has tag "default", use default deployment + """ + + router = litellm.Router( + model_list=[ + { + "model_name": "gpt-4", + "litellm_params": { + "model": "gpt-4o", + "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", + "tags": ["default"], + }, + "model_info": {"id": "default-model"}, + }, + { + "model_name": "gpt-4", + "litellm_params": { + "model": "gpt-4o-mini", + "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", + "tags": ["teamA"], + }, + "model_info": {"id": "very-expensive-model"}, + }, + ], + enable_tag_filtering=True, + ) + + for _ in range(5): + # Untagged request, this should pick model with id == "default-model" + response = await router.acompletion( + model="gpt-4", + messages=[{"role": "user", "content": "Tell me a joke."}], + ) + + print("Response: ", response) + + response_extra_info = response._hidden_params + print("response_extra_info: ", response_extra_info) + + assert response_extra_info["model_id"] == "default-model" + + for _ in range(5): + # requests tagged with "default", this should pick model with id == "default-model" + response = await router.acompletion( + model="gpt-4", + messages=[{"role": "user", "content": "Tell me a joke."}], + metadata={"tags": ["default"]}, + ) + + print("Response: ", response) + + response_extra_info = response._hidden_params + print("response_extra_info: ", response_extra_info) + + assert response_extra_info["model_id"] == "default-model" From 2fceeedd94aa0163f616fc0e5af039e2c2e6df17 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 9 Sep 2024 14:41:22 -0700 Subject: [PATCH 5/6] add "default" tag --- docs/my-website/docs/proxy/tag_routing.md | 53 +++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/docs/my-website/docs/proxy/tag_routing.md b/docs/my-website/docs/proxy/tag_routing.md index 603c47fad..4b2621fa8 100644 --- a/docs/my-website/docs/proxy/tag_routing.md +++ b/docs/my-website/docs/proxy/tag_routing.md @@ -25,6 +25,13 @@ model_list: model: openai/gpt-4o api_key: os.environ/OPENAI_API_KEY tags: ["paid"] # 👈 Key Change + - model_name: gpt-4 + litellm_params: + model: openai/gpt-4o + api_key: os.environ/OPENAI_API_KEY + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["default"] # OPTIONAL - All untagged requests will get routed to this + router_settings: enable_tag_filtering: True # 👈 Key Change @@ -136,6 +143,46 @@ Response } ``` +## Setting Default Tags + +Use this if you want all untagged requests to be routed to specific deployments + +1. Set default tag on your yaml +```yaml + model_list: + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["default"] # 👈 Key Change - All untagged requests will get routed to this + model_info: + id: "default-model" # used for identifying model in response headers +``` + +2. Start proxy +```shell +$ litellm --config /path/to/config.yaml +``` + +3. Make request with no tags +```shell +curl -i http://localhost:4000/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer sk-1234" \ + -d '{ + "model": "fake-openai-endpoint", + "messages": [ + {"role": "user", "content": "Hello, Claude gm!"} + ] + }' +``` + +Expect to see the following response header when this works +```shell +x-litellm-model-id: default-model +``` + ## ✨ Team based tag routing (Enterprise) LiteLLM Proxy supports team-based tag routing, allowing you to associate specific tags with teams and route requests accordingly. Example **Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B** (LLM Access Control For Teams) @@ -170,6 +217,12 @@ Here's how to set up and use team-based tag routing using curl commands: tags: ["teamB"] # 👈 Key Change model_info: id: "team-b-model" # used for identifying model in response headers + - model_name: fake-openai-endpoint + litellm_params: + model: openai/fake + api_key: fake-key + api_base: https://exampleopenaiendpoint-production.up.railway.app/ + tags: ["default"] # OPTIONAL - All untagged requests will get routed to this router_settings: enable_tag_filtering: True # 👈 Key Change From 05210fee6af26be0b45b67d56b1bb34efefa73f9 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Mon, 9 Sep 2024 14:48:29 -0700 Subject: [PATCH 6/6] update test_default_tagged_deployments --- litellm/tests/test_router_tag_routing.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/litellm/tests/test_router_tag_routing.py b/litellm/tests/test_router_tag_routing.py index 4da109a13..f71a9b762 100644 --- a/litellm/tests/test_router_tag_routing.py +++ b/litellm/tests/test_router_tag_routing.py @@ -111,6 +111,14 @@ async def test_default_tagged_deployments(): }, "model_info": {"id": "default-model"}, }, + { + "model_name": "gpt-4", + "litellm_params": { + "model": "gpt-4o", + "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", + }, + "model_info": {"id": "default-model-2"}, + }, { "model_name": "gpt-4", "litellm_params": {