forked from phoenix/litellm-mirror
Merge pull request #5601 from BerriAI/litellm_tag_routing_fixes
[Feat] Tag Routing - Allow setting default deployments
This commit is contained in:
commit
c86b333054
4 changed files with 167 additions and 17 deletions
|
@ -25,6 +25,13 @@ model_list:
|
||||||
model: openai/gpt-4o
|
model: openai/gpt-4o
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
tags: ["paid"] # 👈 Key Change
|
tags: ["paid"] # 👈 Key Change
|
||||||
|
- model_name: gpt-4
|
||||||
|
litellm_params:
|
||||||
|
model: openai/gpt-4o
|
||||||
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
tags: ["default"] # OPTIONAL - All untagged requests will get routed to this
|
||||||
|
|
||||||
|
|
||||||
router_settings:
|
router_settings:
|
||||||
enable_tag_filtering: True # 👈 Key Change
|
enable_tag_filtering: True # 👈 Key Change
|
||||||
|
@ -136,6 +143,46 @@ Response
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Setting Default Tags
|
||||||
|
|
||||||
|
Use this if you want all untagged requests to be routed to specific deployments
|
||||||
|
|
||||||
|
1. Set default tag on your yaml
|
||||||
|
```yaml
|
||||||
|
model_list:
|
||||||
|
- model_name: fake-openai-endpoint
|
||||||
|
litellm_params:
|
||||||
|
model: openai/fake
|
||||||
|
api_key: fake-key
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
tags: ["default"] # 👈 Key Change - All untagged requests will get routed to this
|
||||||
|
model_info:
|
||||||
|
id: "default-model" # used for identifying model in response headers
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Start proxy
|
||||||
|
```shell
|
||||||
|
$ litellm --config /path/to/config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Make request with no tags
|
||||||
|
```shell
|
||||||
|
curl -i http://localhost:4000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-1234" \
|
||||||
|
-d '{
|
||||||
|
"model": "fake-openai-endpoint",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello, Claude gm!"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
Expect to see the following response header when this works
|
||||||
|
```shell
|
||||||
|
x-litellm-model-id: default-model
|
||||||
|
```
|
||||||
|
|
||||||
## ✨ Team based tag routing (Enterprise)
|
## ✨ Team based tag routing (Enterprise)
|
||||||
|
|
||||||
LiteLLM Proxy supports team-based tag routing, allowing you to associate specific tags with teams and route requests accordingly. Example **Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B** (LLM Access Control For Teams)
|
LiteLLM Proxy supports team-based tag routing, allowing you to associate specific tags with teams and route requests accordingly. Example **Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B** (LLM Access Control For Teams)
|
||||||
|
@ -170,6 +217,12 @@ Here's how to set up and use team-based tag routing using curl commands:
|
||||||
tags: ["teamB"] # 👈 Key Change
|
tags: ["teamB"] # 👈 Key Change
|
||||||
model_info:
|
model_info:
|
||||||
id: "team-b-model" # used for identifying model in response headers
|
id: "team-b-model" # used for identifying model in response headers
|
||||||
|
- model_name: fake-openai-endpoint
|
||||||
|
litellm_params:
|
||||||
|
model: openai/fake
|
||||||
|
api_key: fake-key
|
||||||
|
api_base: https://exampleopenaiendpoint-production.up.railway.app/
|
||||||
|
tags: ["default"] # OPTIONAL - All untagged requests will get routed to this
|
||||||
|
|
||||||
router_settings:
|
router_settings:
|
||||||
enable_tag_filtering: True # 👈 Key Change
|
enable_tag_filtering: True # 👈 Key Change
|
||||||
|
|
|
@ -1,25 +1,30 @@
|
||||||
model_list:
|
model_list:
|
||||||
- model_name: openai/*
|
- model_name: openai/*
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: gpt-3.5-turbo
|
model: openai/*
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
model_info:
|
||||||
litellm_settings:
|
id: "good-openai"
|
||||||
success_callback: ["prometheus"]
|
- model_name: openai/*
|
||||||
failure_callback: ["prometheus"]
|
|
||||||
|
|
||||||
guardrails:
|
|
||||||
- guardrail_name: "presidio-pre-guard"
|
|
||||||
litellm_params:
|
litellm_params:
|
||||||
guardrail: presidio # supported values: "aporia", "lakera", "presidio"
|
model: openai/*
|
||||||
mode: "pre_call" # pre_call, during_call, post_call
|
api_key: os.environ/non-exsitent-env-var
|
||||||
output_parse_pii: True
|
tags: ["bad-model"]
|
||||||
|
model_info:
|
||||||
|
id: "test-openai"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
router_settings:
|
||||||
|
enable_tag_filtering: True # 👈 Key Chang
|
||||||
|
|
||||||
litellm_settings:
|
|
||||||
callbacks: ["prometheus"]
|
|
||||||
|
|
||||||
general_settings:
|
general_settings:
|
||||||
master_key: sk-1234
|
master_key: sk-1234
|
||||||
alerting: ["slack"]
|
alerting: ["slack"]
|
||||||
spend_report_frequency: "1d"
|
spend_report_frequency: "1d"
|
||||||
|
|
||||||
|
|
||||||
|
litellm_settings:
|
||||||
|
success_callback: ["prometheus"]
|
||||||
|
failure_callback: ["prometheus"]
|
|
@ -1,5 +1,9 @@
|
||||||
"""
|
"""
|
||||||
Use this to route requests between free and paid tiers
|
Use this to route requests between Teams
|
||||||
|
|
||||||
|
- If tags in request is a subset of tags in deployment, return deployment
|
||||||
|
- if deployments are set with default tags, return all default deployment
|
||||||
|
- If no default_deployments are set, return all deployments
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union
|
from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union
|
||||||
|
@ -25,14 +29,14 @@ async def get_deployments_for_tag(
|
||||||
|
|
||||||
if request_kwargs is None:
|
if request_kwargs is None:
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
"get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s",
|
"get_deployments_for_tag: request_kwargs is None returning healthy_deployments: %s",
|
||||||
healthy_deployments,
|
healthy_deployments,
|
||||||
)
|
)
|
||||||
return healthy_deployments
|
return healthy_deployments
|
||||||
|
|
||||||
if healthy_deployments is None:
|
if healthy_deployments is None:
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
"get_deployments_for_tier: healthy_deployments is None returning healthy_deployments"
|
"get_deployments_for_tag: healthy_deployments is None returning healthy_deployments"
|
||||||
)
|
)
|
||||||
return healthy_deployments
|
return healthy_deployments
|
||||||
|
|
||||||
|
@ -43,7 +47,9 @@ async def get_deployments_for_tag(
|
||||||
|
|
||||||
new_healthy_deployments = []
|
new_healthy_deployments = []
|
||||||
if request_tags:
|
if request_tags:
|
||||||
verbose_logger.debug("parameter routing: router_keys: %s", request_tags)
|
verbose_logger.debug(
|
||||||
|
"get_deployments_for_tag routing: router_keys: %s", request_tags
|
||||||
|
)
|
||||||
# example this can be router_keys=["free", "custom"]
|
# example this can be router_keys=["free", "custom"]
|
||||||
# get all deployments that have a superset of these router keys
|
# get all deployments that have a superset of these router keys
|
||||||
for deployment in healthy_deployments:
|
for deployment in healthy_deployments:
|
||||||
|
@ -66,9 +72,26 @@ async def get_deployments_for_tag(
|
||||||
request_tags,
|
request_tags,
|
||||||
)
|
)
|
||||||
new_healthy_deployments.append(deployment)
|
new_healthy_deployments.append(deployment)
|
||||||
|
elif "default" in deployment_tags:
|
||||||
|
verbose_logger.debug(
|
||||||
|
"adding default deployment with tags: %s, request tags: %s",
|
||||||
|
deployment_tags,
|
||||||
|
request_tags,
|
||||||
|
)
|
||||||
|
new_healthy_deployments.append(deployment)
|
||||||
|
|
||||||
return new_healthy_deployments
|
return new_healthy_deployments
|
||||||
|
|
||||||
|
# for Untagged requests use default deployments if set
|
||||||
|
_default_deployments_with_tags = []
|
||||||
|
for deployment in healthy_deployments:
|
||||||
|
if "default" in deployment.get("litellm_params", {}).get("tags", []):
|
||||||
|
_default_deployments_with_tags.append(deployment)
|
||||||
|
|
||||||
|
if len(_default_deployments_with_tags) > 0:
|
||||||
|
return _default_deployments_with_tags
|
||||||
|
|
||||||
|
# if no default deployment is found, return healthy_deployments
|
||||||
verbose_logger.debug(
|
verbose_logger.debug(
|
||||||
"no tier found in metadata, returning healthy_deployments: %s",
|
"no tier found in metadata, returning healthy_deployments: %s",
|
||||||
healthy_deployments,
|
healthy_deployments,
|
||||||
|
|
|
@ -91,3 +91,72 @@ async def test_router_free_paid_tier():
|
||||||
print("response_extra_info: ", response_extra_info)
|
print("response_extra_info: ", response_extra_info)
|
||||||
|
|
||||||
assert response_extra_info["model_id"] == "very-expensive-model"
|
assert response_extra_info["model_id"] == "very-expensive-model"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio()
|
||||||
|
async def test_default_tagged_deployments():
|
||||||
|
"""
|
||||||
|
- only use default deployment for untagged requests
|
||||||
|
- if a request has tag "default", use default deployment
|
||||||
|
"""
|
||||||
|
|
||||||
|
router = litellm.Router(
|
||||||
|
model_list=[
|
||||||
|
{
|
||||||
|
"model_name": "gpt-4",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
|
||||||
|
"tags": ["default"],
|
||||||
|
},
|
||||||
|
"model_info": {"id": "default-model"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-4",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-4o",
|
||||||
|
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
|
||||||
|
},
|
||||||
|
"model_info": {"id": "default-model-2"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"model_name": "gpt-4",
|
||||||
|
"litellm_params": {
|
||||||
|
"model": "gpt-4o-mini",
|
||||||
|
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
|
||||||
|
"tags": ["teamA"],
|
||||||
|
},
|
||||||
|
"model_info": {"id": "very-expensive-model"},
|
||||||
|
},
|
||||||
|
],
|
||||||
|
enable_tag_filtering=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
for _ in range(5):
|
||||||
|
# Untagged request, this should pick model with id == "default-model"
|
||||||
|
response = await router.acompletion(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[{"role": "user", "content": "Tell me a joke."}],
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Response: ", response)
|
||||||
|
|
||||||
|
response_extra_info = response._hidden_params
|
||||||
|
print("response_extra_info: ", response_extra_info)
|
||||||
|
|
||||||
|
assert response_extra_info["model_id"] == "default-model"
|
||||||
|
|
||||||
|
for _ in range(5):
|
||||||
|
# requests tagged with "default", this should pick model with id == "default-model"
|
||||||
|
response = await router.acompletion(
|
||||||
|
model="gpt-4",
|
||||||
|
messages=[{"role": "user", "content": "Tell me a joke."}],
|
||||||
|
metadata={"tags": ["default"]},
|
||||||
|
)
|
||||||
|
|
||||||
|
print("Response: ", response)
|
||||||
|
|
||||||
|
response_extra_info = response._hidden_params
|
||||||
|
print("response_extra_info: ", response_extra_info)
|
||||||
|
|
||||||
|
assert response_extra_info["model_id"] == "default-model"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue