Merge pull request #5601 from BerriAI/litellm_tag_routing_fixes

[Feat] Tag Routing - Allow setting default deployments
This commit is contained in:
Ishaan Jaff 2024-09-09 17:45:24 -07:00 committed by GitHub
commit c86b333054
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 167 additions and 17 deletions

View file

@ -25,6 +25,13 @@ model_list:
model: openai/gpt-4o model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
tags: ["paid"] # 👈 Key Change tags: ["paid"] # 👈 Key Change
- model_name: gpt-4
litellm_params:
model: openai/gpt-4o
api_key: os.environ/OPENAI_API_KEY
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tags: ["default"] # OPTIONAL - All untagged requests will get routed to this
router_settings: router_settings:
enable_tag_filtering: True # 👈 Key Change enable_tag_filtering: True # 👈 Key Change
@ -136,6 +143,46 @@ Response
} }
``` ```
## Setting Default Tags
Use this if you want all untagged requests to be routed to specific deployments
1. Set default tag on your yaml
```yaml
model_list:
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tags: ["default"] # 👈 Key Change - All untagged requests will get routed to this
model_info:
id: "default-model" # used for identifying model in response headers
```
2. Start proxy
```shell
$ litellm --config /path/to/config.yaml
```
3. Make request with no tags
```shell
curl -i http://localhost:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-1234" \
-d '{
"model": "fake-openai-endpoint",
"messages": [
{"role": "user", "content": "Hello, Claude gm!"}
]
}'
```
Expect to see the following response header when this works
```shell
x-litellm-model-id: default-model
```
## ✨ Team based tag routing (Enterprise) ## ✨ Team based tag routing (Enterprise)
LiteLLM Proxy supports team-based tag routing, allowing you to associate specific tags with teams and route requests accordingly. Example **Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B** (LLM Access Control For Teams) LiteLLM Proxy supports team-based tag routing, allowing you to associate specific tags with teams and route requests accordingly. Example **Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B** (LLM Access Control For Teams)
@ -170,6 +217,12 @@ Here's how to set up and use team-based tag routing using curl commands:
tags: ["teamB"] # 👈 Key Change tags: ["teamB"] # 👈 Key Change
model_info: model_info:
id: "team-b-model" # used for identifying model in response headers id: "team-b-model" # used for identifying model in response headers
- model_name: fake-openai-endpoint
litellm_params:
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/
tags: ["default"] # OPTIONAL - All untagged requests will get routed to this
router_settings: router_settings:
enable_tag_filtering: True # 👈 Key Change enable_tag_filtering: True # 👈 Key Change

View file

@ -1,25 +1,30 @@
model_list: model_list:
- model_name: openai/* - model_name: openai/*
litellm_params: litellm_params:
model: gpt-3.5-turbo model: openai/*
api_key: os.environ/OPENAI_API_KEY api_key: os.environ/OPENAI_API_KEY
model_info:
litellm_settings: id: "good-openai"
success_callback: ["prometheus"] - model_name: openai/*
failure_callback: ["prometheus"]
guardrails:
- guardrail_name: "presidio-pre-guard"
litellm_params: litellm_params:
guardrail: presidio # supported values: "aporia", "lakera", "presidio" model: openai/*
mode: "pre_call" # pre_call, during_call, post_call api_key: os.environ/non-exsitent-env-var
output_parse_pii: True tags: ["bad-model"]
model_info:
id: "test-openai"
router_settings:
enable_tag_filtering: True # 👈 Key Chang
litellm_settings:
callbacks: ["prometheus"]
general_settings: general_settings:
master_key: sk-1234 master_key: sk-1234
alerting: ["slack"] alerting: ["slack"]
spend_report_frequency: "1d" spend_report_frequency: "1d"
litellm_settings:
success_callback: ["prometheus"]
failure_callback: ["prometheus"]

View file

@ -1,5 +1,9 @@
""" """
Use this to route requests between free and paid tiers Use this to route requests between Teams
- If tags in request is a subset of tags in deployment, return deployment
- if deployments are set with default tags, return all default deployment
- If no default_deployments are set, return all deployments
""" """
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union
@ -25,14 +29,14 @@ async def get_deployments_for_tag(
if request_kwargs is None: if request_kwargs is None:
verbose_logger.debug( verbose_logger.debug(
"get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s", "get_deployments_for_tag: request_kwargs is None returning healthy_deployments: %s",
healthy_deployments, healthy_deployments,
) )
return healthy_deployments return healthy_deployments
if healthy_deployments is None: if healthy_deployments is None:
verbose_logger.debug( verbose_logger.debug(
"get_deployments_for_tier: healthy_deployments is None returning healthy_deployments" "get_deployments_for_tag: healthy_deployments is None returning healthy_deployments"
) )
return healthy_deployments return healthy_deployments
@ -43,7 +47,9 @@ async def get_deployments_for_tag(
new_healthy_deployments = [] new_healthy_deployments = []
if request_tags: if request_tags:
verbose_logger.debug("parameter routing: router_keys: %s", request_tags) verbose_logger.debug(
"get_deployments_for_tag routing: router_keys: %s", request_tags
)
# example this can be router_keys=["free", "custom"] # example this can be router_keys=["free", "custom"]
# get all deployments that have a superset of these router keys # get all deployments that have a superset of these router keys
for deployment in healthy_deployments: for deployment in healthy_deployments:
@ -66,9 +72,26 @@ async def get_deployments_for_tag(
request_tags, request_tags,
) )
new_healthy_deployments.append(deployment) new_healthy_deployments.append(deployment)
elif "default" in deployment_tags:
verbose_logger.debug(
"adding default deployment with tags: %s, request tags: %s",
deployment_tags,
request_tags,
)
new_healthy_deployments.append(deployment)
return new_healthy_deployments return new_healthy_deployments
# for Untagged requests use default deployments if set
_default_deployments_with_tags = []
for deployment in healthy_deployments:
if "default" in deployment.get("litellm_params", {}).get("tags", []):
_default_deployments_with_tags.append(deployment)
if len(_default_deployments_with_tags) > 0:
return _default_deployments_with_tags
# if no default deployment is found, return healthy_deployments
verbose_logger.debug( verbose_logger.debug(
"no tier found in metadata, returning healthy_deployments: %s", "no tier found in metadata, returning healthy_deployments: %s",
healthy_deployments, healthy_deployments,

View file

@ -91,3 +91,72 @@ async def test_router_free_paid_tier():
print("response_extra_info: ", response_extra_info) print("response_extra_info: ", response_extra_info)
assert response_extra_info["model_id"] == "very-expensive-model" assert response_extra_info["model_id"] == "very-expensive-model"
@pytest.mark.asyncio()
async def test_default_tagged_deployments():
"""
- only use default deployment for untagged requests
- if a request has tag "default", use default deployment
"""
router = litellm.Router(
model_list=[
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4o",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
"tags": ["default"],
},
"model_info": {"id": "default-model"},
},
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4o",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
},
"model_info": {"id": "default-model-2"},
},
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4o-mini",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
"tags": ["teamA"],
},
"model_info": {"id": "very-expensive-model"},
},
],
enable_tag_filtering=True,
)
for _ in range(5):
# Untagged request, this should pick model with id == "default-model"
response = await router.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Tell me a joke."}],
)
print("Response: ", response)
response_extra_info = response._hidden_params
print("response_extra_info: ", response_extra_info)
assert response_extra_info["model_id"] == "default-model"
for _ in range(5):
# requests tagged with "default", this should pick model with id == "default-model"
response = await router.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Tell me a joke."}],
metadata={"tags": ["default"]},
)
print("Response: ", response)
response_extra_info = response._hidden_params
print("response_extra_info: ", response_extra_info)
assert response_extra_info["model_id"] == "default-model"