router - use free paid tier routing

This commit is contained in:
Ishaan Jaff 2024-07-18 17:09:42 -07:00 committed by Krrish Dholakia
parent 229b7a6493
commit 0e70b5df14
3 changed files with 106 additions and 4 deletions

View file

@ -47,6 +47,7 @@ from litellm.assistants.main import AssistantDeleted
from litellm.caching import DualCache, InMemoryCache, RedisCache
from litellm.integrations.custom_logger import CustomLogger
from litellm.llms.azure import get_azure_ad_token_from_oidc
from litellm.router_strategy.free_paid_tiers import get_deployments_for_tier
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
@ -4481,6 +4482,12 @@ class Router:
request_kwargs=request_kwargs,
)
# check free / paid tier for each deployment
healthy_deployments = await get_deployments_for_tier(
request_kwargs=request_kwargs,
healthy_deployments=healthy_deployments,
)
if len(healthy_deployments) == 0:
if _allowed_model_region is None:
_allowed_model_region = "n/a"

View file

@ -17,14 +17,19 @@ class Deployment(TypedDict):
async def get_deployments_for_tier(
request_kwargs: dict,
healthy_deployments: Optional[
Union[List[DeploymentTypedDict], List[Dict[str, Any]]]
] = None,
request_kwargs: Optional[Dict[Any, Any]] = None,
healthy_deployments: Optional[Union[List[Any], Dict[Any, Any]]] = None,
):
"""
if request_kwargs contains {"metadata": {"tier": "free"}} or {"metadata": {"tier": "paid"}}, then routes the request to free/paid tier models
"""
if request_kwargs is None:
verbose_logger.debug(
"get_deployments_for_tier: request_kwargs is None returning healthy_deployments: %s",
healthy_deployments,
)
return healthy_deployments
verbose_logger.debug("request metadata: %s", request_kwargs.get("metadata"))
if "metadata" in request_kwargs:
metadata = request_kwargs["metadata"]

View file

@ -0,0 +1,90 @@
#### What this tests ####
# This tests litellm router
import asyncio
import os
import sys
import time
import traceback
import openai
import pytest
sys.path.insert(
0, os.path.abspath("../..")
) # Adds the parent directory to the system path
import logging
import os
from collections import defaultdict
from concurrent.futures import ThreadPoolExecutor
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
from dotenv import load_dotenv
import litellm
from litellm import Router
from litellm._logging import verbose_logger
verbose_logger.setLevel(logging.DEBUG)
load_dotenv()
@pytest.mark.asyncio()
async def test_router_free_paid_tier():
"""
Pass list of orgs in 1 model definition,
expect a unique deployment for each to be created
"""
router = litellm.Router(
model_list=[
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4o",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
},
"model_info": {"tier": "paid", "id": "very-expensive-model"},
},
{
"model_name": "gpt-4",
"litellm_params": {
"model": "gpt-4o-mini",
"api_base": "https://exampleopenaiendpoint-production.up.railway.app/",
},
"model_info": {"tier": "free", "id": "very-cheap-model"},
},
]
)
for _ in range(5):
# this should pick model with id == very-cheap-model
response = await router.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Tell me a joke."}],
metadata={"tier": "free"},
)
print("Response: ", response)
response_extra_info = response._hidden_params
print("response_extra_info: ", response_extra_info)
assert response_extra_info["model_id"] == "very-cheap-model"
for _ in range(5):
# this should pick model with id == very-cheap-model
response = await router.acompletion(
model="gpt-4",
messages=[{"role": "user", "content": "Tell me a joke."}],
metadata={"tier": "paid"},
)
print("Response: ", response)
response_extra_info = response._hidden_params
print("response_extra_info: ", response_extra_info)
assert response_extra_info["model_id"] == "very-expensive-model"