Merge pull request #5601 from BerriAI/litellm_tag_routing_fixes

[Feat] Tag Routing - Allow setting default deployments
2024-09-09 17:45:24 -07:00 · 2024-09-09 17:45:24 -07:00 · c86b333054
commit c86b333054
parent 00f1d7b1ff a6d3bd0ab7
4 changed files with 167 additions and 17 deletions
--- a/docs/my-website/docs/proxy/tag_routing.md
+++ b/docs/my-website/docs/proxy/tag_routing.md
@ -25,6 +25,13 @@ model_list:
      model: openai/gpt-4o
      api_key: os.environ/OPENAI_API_KEY
      tags: ["paid"] # 👈 Key Change
  - model_name: gpt-4
    litellm_params:
      model: openai/gpt-4o
      api_key: os.environ/OPENAI_API_KEY
      api_base: https://exampleopenaiendpoint-production.up.railway.app/
      tags: ["default"] # OPTIONAL - All untagged requests will get routed to this
 router_settings:
  enable_tag_filtering: True # 👈 Key Change
@ -136,6 +143,46 @@ Response
 }
 ```
 ## Setting Default Tags 
 Use this if you want all untagged requests to be routed to specific deployments
 1. Set default tag on your yaml
 ```yaml
  model_list:
    - model_name: fake-openai-endpoint
      litellm_params:
        model: openai/fake
        api_key: fake-key
        api_base: https://exampleopenaiendpoint-production.up.railway.app/
        tags: ["default"] # 👈 Key Change - All untagged requests will get routed to this
      model_info:
        id: "default-model" # used for identifying model in response headers
 ```
 2. Start proxy
 ```shell
 $ litellm --config /path/to/config.yaml
 ```
 3. Make request with no tags
 ```shell
 curl -i http://localhost:4000/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-1234" \
  -d '{
    "model": "fake-openai-endpoint",
    "messages": [
      {"role": "user", "content": "Hello, Claude gm!"}
    ]
  }'
 ```
 Expect to see the following response header when this works
 ```shell
 x-litellm-model-id: default-model
 ```
 ## ✨ Team based tag routing (Enterprise)
 LiteLLM Proxy supports team-based tag routing, allowing you to associate specific tags with teams and route requests accordingly. Example **Team A can access gpt-4 deployment A, Team B can access gpt-4 deployment B** (LLM Access Control For Teams)
@ -170,6 +217,12 @@ Here's how to set up and use team-based tag routing using curl commands:
        tags: ["teamB"] # 👈 Key Change
      model_info:
        id: "team-b-model" # used for identifying model in response headers
    - model_name: fake-openai-endpoint
      litellm_params:
        model: openai/fake
        api_key: fake-key
        api_base: https://exampleopenaiendpoint-production.up.railway.app/
        tags: ["default"] # OPTIONAL - All untagged requests will get routed to this
  router_settings:
    enable_tag_filtering: True # 👈 Key Change
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -1,25 +1,30 @@
 model_list:
  - model_name: openai/*
    litellm_params:
-      model: gpt-3.5-turbo
+      model: openai/*
      api_key: os.environ/OPENAI_API_KEY
-
+    model_info:
-litellm_settings:
+      id: "good-openai"
-  success_callback: ["prometheus"]
+  - model_name: openai/*
  failure_callback: ["prometheus"]
 guardrails:
  - guardrail_name: "presidio-pre-guard"
    litellm_params:
-      guardrail: presidio  # supported values: "aporia", "lakera", "presidio"
+      model: openai/*
-      mode: "pre_call"  # pre_call, during_call, post_call
+      api_key: os.environ/non-exsitent-env-var
-      output_parse_pii: True
+      tags: ["bad-model"]
    model_info:
      id: "test-openai"
 router_settings:
 enable_tag_filtering: True # 👈 Key Chang
 litellm_settings:
  callbacks: ["prometheus"]
 general_settings: 
 master_key: sk-1234 
 alerting: ["slack"]
 spend_report_frequency: "1d"
 litellm_settings:
  success_callback: ["prometheus"]
  failure_callback: ["prometheus"]
--- a/litellm/router_strategy/tag_based_routing.py
+++ b/litellm/router_strategy/tag_based_routing.py
@ -1,5 +1,9 @@
 """
-Use this to route requests between free and paid tiers
+Use this to route requests between Teams
 - If tags in request is a subset of tags in deployment, return deployment
 - if deployments are set with default tags, return all default deployment
 - If no default_deployments are set, return all deployments
 """
 from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, TypedDict, Union
@ -25,14 +29,14 @@ async def get_deployments_for_tag(
    if request_kwargs is None:
        verbose_logger.debug(
-            "get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s",
+            "get_deployments_for_tag: request_kwargs is None returning healthy_deployments: %s",
            healthy_deployments,
        )
        return healthy_deployments
    if healthy_deployments is None:
        verbose_logger.debug(
-            "get_deployments_for_tier: healthy_deployments is None returning healthy_deployments"
+            "get_deployments_for_tag: healthy_deployments is None returning healthy_deployments"
        )
        return healthy_deployments
@ -43,7 +47,9 @@ async def get_deployments_for_tag(
        new_healthy_deployments = []
        if request_tags:
-            verbose_logger.debug("parameter routing: router_keys: %s", request_tags)
+            verbose_logger.debug(
                "get_deployments_for_tag routing: router_keys: %s", request_tags
            )
            # example this can be router_keys=["free", "custom"]
            # get all deployments that have a superset of these router keys
            for deployment in healthy_deployments:
@ -66,9 +72,26 @@ async def get_deployments_for_tag(
                        request_tags,
                    )
                    new_healthy_deployments.append(deployment)
                elif "default" in deployment_tags:
                    verbose_logger.debug(
                        "adding default deployment with tags: %s, request tags: %s",
                        deployment_tags,
                        request_tags,
                    )
                    new_healthy_deployments.append(deployment)
            return new_healthy_deployments
    # for Untagged requests use default deployments if set
    _default_deployments_with_tags = []
    for deployment in healthy_deployments:
        if "default" in deployment.get("litellm_params", {}).get("tags", []):
            _default_deployments_with_tags.append(deployment)
    if len(_default_deployments_with_tags) > 0:
        return _default_deployments_with_tags
    # if no default deployment is found, return healthy_deployments
    verbose_logger.debug(
        "no tier found in metadata, returning healthy_deployments: %s",
        healthy_deployments,
--- a/litellm/tests/test_router_tag_routing.py
+++ b/litellm/tests/test_router_tag_routing.py
@ -91,3 +91,72 @@ async def test_router_free_paid_tier():
        print("response_extra_info: ", response_extra_info)
        assert response_extra_info["model_id"] == "very-expensive-model"
@pytest.mark.asyncio()
 async def test_default_tagged_deployments():
    """
    - only use default deployment for untagged requests
    - if a request has tag "default", use default deployment
    """
    router = litellm.Router(
        model_list=[
            {
                "model_name": "gpt-4",
                "litellm_params": {
                    "model": "gpt-4o",
                    "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
                    "tags": ["default"],
                },
                "model_info": {"id": "default-model"},
            },
            {
                "model_name": "gpt-4",
                "litellm_params": {
                    "model": "gpt-4o",
                    "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
                },
                "model_info": {"id": "default-model-2"},
            },
            {
                "model_name": "gpt-4",
                "litellm_params": {
                    "model": "gpt-4o-mini",
                    "api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
                    "tags": ["teamA"],
                },
                "model_info": {"id": "very-expensive-model"},
            },
        ],
        enable_tag_filtering=True,
    )
    for _ in range(5):
        # Untagged request, this should pick model with id == "default-model"
        response = await router.acompletion(
            model="gpt-4",
            messages=[{"role": "user", "content": "Tell me a joke."}],
        )
        print("Response: ", response)
        response_extra_info = response._hidden_params
        print("response_extra_info: ", response_extra_info)
        assert response_extra_info["model_id"] == "default-model"
    for _ in range(5):
        # requests tagged with "default", this should pick model with id == "default-model"
        response = await router.acompletion(
            model="gpt-4",
            messages=[{"role": "user", "content": "Tell me a joke."}],
            metadata={"tags": ["default"]},
        )
        print("Response: ", response)
        response_extra_info = response._hidden_params
        print("response_extra_info: ", response_extra_info)
        assert response_extra_info["model_id"] == "default-model"