From c8793c80226294ccefae237abe8f3008d1b53c0c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 11:46:39 -0800 Subject: [PATCH 01/16] add ProviderBudgetConfig --- litellm/types/router.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/litellm/types/router.py b/litellm/types/router.py index bb93aaa63..c160a8124 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -628,3 +628,16 @@ class RoutingStrategy(enum.Enum): COST_BASED = "cost-based-routing" USAGE_BASED_ROUTING_V2 = "usage-based-routing-v2" USAGE_BASED_ROUTING = "usage-based-routing" + PROVIDER_BUDGET_LIMITING = "provider-budget-routing" + + +class DayToBudgetLimit(TypedDict): + day: str + budget_limit: float + + +class ProviderBudgetConfig(TypedDict): + custom_llm_provider: str # The name of the provider (e.g., OpenAI, Azure) + budgets: ( + DayToBudgetLimit # Time periods (e.g., '1d', '30d') mapped to budget limits + ) From 1a9cf00bb473742f69a85d397e1e610bf17ae95a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 12:35:58 -0800 Subject: [PATCH 02/16] working test_provider_budgets_e2e_test --- litellm/router.py | 25 ++- litellm/router_strategy/provider_budgets.py | 192 +++++++++++++++++++ litellm/types/router.py | 10 +- tests/local_testing/test_provider_budgets.py | 84 ++++++++ 4 files changed, 303 insertions(+), 8 deletions(-) create mode 100644 litellm/router_strategy/provider_budgets.py create mode 100644 tests/local_testing/test_provider_budgets.py diff --git a/litellm/router.py b/litellm/router.py index 97065bc85..d582f614f 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -59,6 +59,7 @@ from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2 +from litellm.router_strategy.provider_budgets import ProviderBudgetLimiting from litellm.router_strategy.simple_shuffle import simple_shuffle from litellm.router_strategy.tag_based_routing import get_deployments_for_tag from litellm.router_utils.batch_utils import ( @@ -234,8 +235,9 @@ class Router: "latency-based-routing", "cost-based-routing", "usage-based-routing-v2", + "provider-budget-routing", ] = "simple-shuffle", - routing_strategy_args: dict = {}, # just for latency-based routing + routing_strategy_args: dict = {}, # just for latency-based, semaphore: Optional[asyncio.Semaphore] = None, alerting_config: Optional[AlertingConfig] = None, router_general_settings: Optional[ @@ -644,6 +646,16 @@ class Router: ) if isinstance(litellm.callbacks, list): litellm.callbacks.append(self.lowestcost_logger) # type: ignore + elif ( + routing_strategy == RoutingStrategy.PROVIDER_BUDGET_LIMITING.value + or routing_strategy == RoutingStrategy.PROVIDER_BUDGET_LIMITING + ): + self.provider_budget_logger = ProviderBudgetLimiting( + router_cache=self.cache, + provider_budget_config=routing_strategy_args, + ) + if isinstance(litellm.callbacks, list): + litellm.callbacks.append(self.provider_budget_logger) # type: ignore else: pass @@ -5055,6 +5067,7 @@ class Router: and self.routing_strategy != "cost-based-routing" and self.routing_strategy != "latency-based-routing" and self.routing_strategy != "least-busy" + and self.routing_strategy != "provider-budget-routing" ): # prevent regressions for other routing strategies, that don't have async get available deployments implemented. return self.get_available_deployment( model=model, @@ -5170,6 +5183,16 @@ class Router: healthy_deployments=healthy_deployments, # type: ignore ) ) + elif ( + self.routing_strategy == "provider-budget-routing" + and self.provider_budget_logger is not None + ): + deployment = ( + await self.provider_budget_logger.async_get_available_deployments( + model_group=model, + healthy_deployments=healthy_deployments, # type: ignore + ) + ) else: deployment = None if deployment is None: diff --git a/litellm/router_strategy/provider_budgets.py b/litellm/router_strategy/provider_budgets.py new file mode 100644 index 000000000..423bcdd59 --- /dev/null +++ b/litellm/router_strategy/provider_budgets.py @@ -0,0 +1,192 @@ +import random +from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union + +import litellm +from litellm._logging import verbose_router_logger +from litellm.caching.caching import DualCache +from litellm.integrations.custom_logger import CustomLogger +from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs +from litellm.types.router import ( + LiteLLM_Params, + ProviderBudgetConfigType, + ProviderBudgetInfo, +) +from litellm.types.utils import StandardLoggingPayload + +if TYPE_CHECKING: + from opentelemetry.trace import Span as _Span + + Span = _Span +else: + Span = Any + + +class ProviderSpend(TypedDict, total=False): + """ + Provider spend data + + { + "openai": 300.0, + "anthropic": 100.0 + } + """ + + provider: str + spend: float + + +class ProviderBudgetLimiting(CustomLogger): + def __init__(self, router_cache: DualCache, provider_budget_config: dict): + self.router_cache = router_cache + self.provider_budget_config: ProviderBudgetConfigType = provider_budget_config + verbose_router_logger.debug( + f"Initalized Provider budget config: {self.provider_budget_config}" + ) + + async def async_get_available_deployments( + self, + model_group: str, + healthy_deployments: List[Dict], + messages: Optional[List[Dict[str, str]]] = None, + input: Optional[Union[str, List]] = None, + request_kwargs: Optional[Dict] = None, + ): + """ + Filter list of healthy deployments based on provider budget + """ + potential_deployments: List[Dict] = [] + + parent_otel_span: Optional[Span] = _get_parent_otel_span_from_kwargs( + request_kwargs + ) + + for deployment in healthy_deployments: + provider = self._get_llm_provider_for_deployment(deployment) + budget_config = self._get_budget_config_for_provider(provider) + if budget_config is None: + verbose_router_logger.debug( + f"No budget config found for provider {provider}, skipping" + ) + continue + + budget_limit = budget_config.budget_limit + current_spend: float = ( + await self.router_cache.async_get_cache( + key=f"provider_spend:{provider}:{budget_config.time_period}", + parent_otel_span=parent_otel_span, + ) + or 0.0 + ) + + verbose_router_logger.debug( + f"Current spend for {provider}: {current_spend}, budget limit: {budget_limit}" + ) + + if current_spend >= budget_limit: + verbose_router_logger.debug( + f"Skipping deployment {deployment} for provider {provider} as spend limit exceeded" + ) + continue + + potential_deployments.append(deployment) + # randomly pick one deployment from potential_deployments + if potential_deployments: + return random.choice(potential_deployments) + return None + + async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): + """ + Increment provider spend in DualCache (InMemory + Redis) + """ + verbose_router_logger.debug( + f"in ProviderBudgetLimiting.async_log_success_event" + ) + standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get( + "standard_logging_object", None + ) + if standard_logging_payload is None: + raise ValueError("standard_logging_payload is required") + + response_cost: float = standard_logging_payload.get("response_cost", 0) + + custom_llm_provider: str = kwargs.get("litellm_params", {}).get( + "custom_llm_provider", None + ) + if custom_llm_provider is None: + raise ValueError("custom_llm_provider is required") + + budget_config = self._get_budget_config_for_provider(custom_llm_provider) + if budget_config is None: + raise ValueError( + f"No budget config found for provider {custom_llm_provider}, self.provider_budget_config: {self.provider_budget_config}" + ) + + spend_key = f"provider_spend:{custom_llm_provider}:{budget_config.time_period}" + ttl_seconds = self.get_ttl_seconds(budget_config.time_period) + verbose_router_logger.debug( + f"Incrementing spend for {spend_key} by {response_cost}, ttl: {ttl_seconds}" + ) + # Increment the spend in Redis and set TTL + await self.router_cache.async_increment_cache( + key=spend_key, + value=response_cost, + ttl=ttl_seconds, + ) + verbose_router_logger.debug( + f"Incremented spend for {spend_key} by {response_cost}, ttl: {ttl_seconds}" + ) + + def _get_budget_config_for_provider( + self, provider: str + ) -> Optional[ProviderBudgetInfo]: + return self.provider_budget_config.get(provider, None) + + def _get_llm_provider_for_deployment(self, deployment: Dict) -> str: + try: + _litellm_params: LiteLLM_Params = LiteLLM_Params( + **deployment["litellm_params"] + ) + _, custom_llm_provider, _, _ = litellm.get_llm_provider( + model=_litellm_params.model, + litellm_params=_litellm_params, + ) + except Exception as e: + raise e + return custom_llm_provider + + def _get_unique_custom_llm_providers_in_deployments( + self, deployments: List[Dict] + ) -> list: + """ + Get unique custom LLM providers in deployments + """ + unique_providers = set() + for deployment in deployments: + provider = self._get_llm_provider_for_deployment(deployment) + unique_providers.add(provider) + return list(unique_providers) + + def get_ttl_seconds(self, time_period: str) -> int: + """ + Convert time period (e.g., '1d', '30d') to seconds for Redis TTL + """ + if time_period.endswith("d"): + days = int(time_period[:-1]) + return days * 24 * 60 * 60 + raise ValueError(f"Unsupported time period format: {time_period}") + + def get_budget_limit(self, custom_llm_provider: str, time_period: str) -> float: + """ + Fetch the budget limit for a given provider and time period. + This can be fetched from a config or database. + """ + _provider_budget_settings = self.provider_budget_config.get( + custom_llm_provider, None + ) + if _provider_budget_settings is None: + return float("inf") + + verbose_router_logger.debug( + f"Provider budget settings: {_provider_budget_settings}" + ) + return _provider_budget_settings.budget_limit diff --git a/litellm/types/router.py b/litellm/types/router.py index c160a8124..f4d2b39ed 100644 --- a/litellm/types/router.py +++ b/litellm/types/router.py @@ -631,13 +631,9 @@ class RoutingStrategy(enum.Enum): PROVIDER_BUDGET_LIMITING = "provider-budget-routing" -class DayToBudgetLimit(TypedDict): - day: str +class ProviderBudgetInfo(BaseModel): + time_period: str # e.g., '1d', '30d' budget_limit: float -class ProviderBudgetConfig(TypedDict): - custom_llm_provider: str # The name of the provider (e.g., OpenAI, Azure) - budgets: ( - DayToBudgetLimit # Time periods (e.g., '1d', '30d') mapped to budget limits - ) +ProviderBudgetConfigType = Dict[str, ProviderBudgetInfo] diff --git a/tests/local_testing/test_provider_budgets.py b/tests/local_testing/test_provider_budgets.py new file mode 100644 index 000000000..40630c130 --- /dev/null +++ b/tests/local_testing/test_provider_budgets.py @@ -0,0 +1,84 @@ +import sys, os, asyncio, time, random +from datetime import datetime +import traceback +from dotenv import load_dotenv + +load_dotenv() +import os, copy + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import pytest +from litellm import Router +from litellm.router_strategy.provider_budgets import ProviderBudgetLimiting +from litellm.types.router import ( + RoutingStrategy, + ProviderBudgetConfigType, + ProviderBudgetInfo, +) +from litellm.caching.caching import DualCache +import logging +from litellm._logging import verbose_router_logger + +verbose_router_logger.setLevel(logging.DEBUG) + + +@pytest.mark.asyncio +async def test_provider_budgets_e2e_test(): + """ + Expected behavior: + - First request forced to OpenAI + - Hit OpenAI budget limit + - Next 3 requests all go to Azure + + """ + provider_budget_config: ProviderBudgetConfigType = { + "openai": ProviderBudgetInfo(time_period="1d", budget_limit=0.000000000001), + "azure": ProviderBudgetInfo(time_period="1d", budget_limit=100), + } + + router = Router( + routing_strategy="provider-budget-routing", + routing_strategy_args=provider_budget_config, + model_list=[ + { + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { # params for litellm completion/embedding call + "model": "azure/chatgpt-v-2", + "api_key": os.getenv("AZURE_API_KEY"), + "api_version": os.getenv("AZURE_API_VERSION"), + "api_base": os.getenv("AZURE_API_BASE"), + }, + "model_info": {"id": "azure-model-id"}, + }, + { + "model_name": "gpt-3.5-turbo", # openai model name + "litellm_params": { + "model": "openai/gpt-4o-mini", + }, + "model_info": {"id": "openai-model-id"}, + }, + ], + ) + + response = await router.acompletion( + messages=[{"role": "user", "content": "Hello, how are you?"}], + model="openai/gpt-4o-mini", + ) + print(response) + + await asyncio.sleep(0.5) + + for _ in range(3): + response = await router.acompletion( + messages=[{"role": "user", "content": "Hello, how are you?"}], + model="gpt-3.5-turbo", + ) + print(response) + + print("response.hidden_params", response._hidden_params) + + await asyncio.sleep(0.5) + + assert response._hidden_params.get("custom_llm_provider") == "azure" From 5aae0e4bb0a34e69840d947332372bb96fbbb2bf Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 12:39:33 -0800 Subject: [PATCH 03/16] test_provider_budgets_e2e_test_expect_to_fail --- tests/local_testing/test_provider_budgets.py | 45 ++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/local_testing/test_provider_budgets.py b/tests/local_testing/test_provider_budgets.py index 40630c130..a26302e6d 100644 --- a/tests/local_testing/test_provider_budgets.py +++ b/tests/local_testing/test_provider_budgets.py @@ -82,3 +82,48 @@ async def test_provider_budgets_e2e_test(): await asyncio.sleep(0.5) assert response._hidden_params.get("custom_llm_provider") == "azure" + + +@pytest.mark.asyncio +async def test_provider_budgets_e2e_test_expect_to_fail(): + """ + Expected behavior: + - first request passes, all subsequent requests fail + + """ + provider_budget_config: ProviderBudgetConfigType = { + "anthropic": ProviderBudgetInfo(time_period="1d", budget_limit=0.000000000001), + } + + router = Router( + routing_strategy="provider-budget-routing", + routing_strategy_args=provider_budget_config, + model_list=[ + { + "model_name": "anthropic/*", # openai model name + "litellm_params": { + "model": "anthropic/*", + }, + }, + ], + ) + + response = await router.acompletion( + messages=[{"role": "user", "content": "Hello, how are you?"}], + model="anthropic/claude-3-5-sonnet-20240620", + ) + print(response) + + await asyncio.sleep(0.5) + + for _ in range(3): + with pytest.raises(Exception) as exc_info: + response = await router.acompletion( + messages=[{"role": "user", "content": "Hello, how are you?"}], + model="anthropic/claude-3-5-sonnet-20240620", + ) + print(response) + print("response.hidden_params", response._hidden_params) + + await asyncio.sleep(0.5) + # Verify the error is related to budget exceeded From ba36ecc65ae65e29c639a36266c0ed1dc2b6b580 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 12:56:59 -0800 Subject: [PATCH 04/16] use 1 cache read for getting provider spend --- litellm/router.py | 2 +- litellm/router_strategy/provider_budgets.py | 64 ++++++++++++++------- 2 files changed, 43 insertions(+), 23 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index d582f614f..db6debd56 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -5189,7 +5189,7 @@ class Router: ): deployment = ( await self.provider_budget_logger.async_get_available_deployments( - model_group=model, + request_kwargs=request_kwargs, healthy_deployments=healthy_deployments, # type: ignore ) ) diff --git a/litellm/router_strategy/provider_budgets.py b/litellm/router_strategy/provider_budgets.py index 423bcdd59..5c6594d30 100644 --- a/litellm/router_strategy/provider_budgets.py +++ b/litellm/router_strategy/provider_budgets.py @@ -45,38 +45,61 @@ class ProviderBudgetLimiting(CustomLogger): async def async_get_available_deployments( self, - model_group: str, healthy_deployments: List[Dict], - messages: Optional[List[Dict[str, str]]] = None, - input: Optional[Union[str, List]] = None, request_kwargs: Optional[Dict] = None, - ): + ) -> Optional[Dict]: """ Filter list of healthy deployments based on provider budget """ potential_deployments: List[Dict] = [] + # Extract the parent OpenTelemetry span for tracing parent_otel_span: Optional[Span] = _get_parent_otel_span_from_kwargs( request_kwargs ) + # Collect all providers and their budget configs + # {"openai": ProviderBudgetInfo, "anthropic": ProviderBudgetInfo, "azure": None} + _provider_configs: Dict[str, Optional[ProviderBudgetInfo]] = {} for deployment in healthy_deployments: provider = self._get_llm_provider_for_deployment(deployment) budget_config = self._get_budget_config_for_provider(provider) - if budget_config is None: - verbose_router_logger.debug( - f"No budget config found for provider {provider}, skipping" - ) + _provider_configs[provider] = budget_config + + # Filter out providers without budget config + provider_configs: Dict[str, ProviderBudgetInfo] = { + provider: config + for provider, config in _provider_configs.items() + if config is not None + } + + # Build cache keys for batch retrieval + cache_keys = [] + for provider, config in provider_configs.items(): + cache_keys.append(f"provider_spend:{provider}:{config.time_period}") + + # Fetch current spend for all providers using batch cache + _current_spends = await self.router_cache.async_batch_get_cache( + keys=cache_keys, + parent_otel_span=parent_otel_span, + ) + current_spends: List = _current_spends or [0.0] * len(provider_configs) + + # Map providers to their current spend values + provider_spend_map: Dict[str, float] = {} + for idx, provider in enumerate(provider_configs.keys()): + provider_spend_map[provider] = float(current_spends[idx] or 0.0) + + # Filter healthy deployments based on budget constraints + for deployment in healthy_deployments: + provider = self._get_llm_provider_for_deployment(deployment) + budget_config = provider_configs.get(provider) + + if not budget_config: continue + current_spend = provider_spend_map.get(provider, 0.0) budget_limit = budget_config.budget_limit - current_spend: float = ( - await self.router_cache.async_get_cache( - key=f"provider_spend:{provider}:{budget_config.time_period}", - parent_otel_span=parent_otel_span, - ) - or 0.0 - ) verbose_router_logger.debug( f"Current spend for {provider}: {current_spend}, budget limit: {budget_limit}" @@ -89,18 +112,15 @@ class ProviderBudgetLimiting(CustomLogger): continue potential_deployments.append(deployment) - # randomly pick one deployment from potential_deployments - if potential_deployments: - return random.choice(potential_deployments) - return None + + # Randomly pick one deployment from potential deployments + return random.choice(potential_deployments) if potential_deployments else None async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): """ Increment provider spend in DualCache (InMemory + Redis) """ - verbose_router_logger.debug( - f"in ProviderBudgetLimiting.async_log_success_event" - ) + verbose_router_logger.debug("in ProviderBudgetLimiting.async_log_success_event") standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get( "standard_logging_object", None ) From 3d296926aeee4941f17c73ab446281f220b54af3 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 13:57:56 -0800 Subject: [PATCH 05/16] test_provider_budgets_e2e_test --- tests/local_testing/test_provider_budgets.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/local_testing/test_provider_budgets.py b/tests/local_testing/test_provider_budgets.py index a26302e6d..17371da54 100644 --- a/tests/local_testing/test_provider_budgets.py +++ b/tests/local_testing/test_provider_budgets.py @@ -60,6 +60,9 @@ async def test_provider_budgets_e2e_test(): "model_info": {"id": "openai-model-id"}, }, ], + redis_host=os.getenv("REDIS_HOST"), + redis_port=int(os.getenv("REDIS_PORT")), + redis_password=os.getenv("REDIS_PASSWORD"), ) response = await router.acompletion( @@ -106,6 +109,9 @@ async def test_provider_budgets_e2e_test_expect_to_fail(): }, }, ], + redis_host=os.getenv("REDIS_HOST"), + redis_port=int(os.getenv("REDIS_PORT")), + redis_password=os.getenv("REDIS_PASSWORD"), ) response = await router.acompletion( From 60aa3ed6a1fd3b92d814635214cd9ac01c7603bf Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 14:18:53 -0800 Subject: [PATCH 06/16] add doc on provider budgets --- docs/my-website/docs/routing.md | 91 +++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 4 deletions(-) diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index 702cafa7f..49e73c4b7 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -159,7 +159,7 @@ Router provides 4 strategies for routing your calls across multiple deployments: -**🎉 NEW** This is an async implementation of usage-based-routing. +This is an async implementation of usage-based-routing. **Filters out deployment if tpm/rpm limit exceeded** - If you pass in the deployment's tpm/rpm limits. @@ -502,6 +502,88 @@ asyncio.run(router_acompletion()) + + + +Use this to set budgets for LLM Providers - example $100/day for OpenAI, $100/day for Azure. + + + + +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: openai/gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY + - model_name: gpt-3.5-turbo + litellm_params: + model: azure/chatgpt-functioncalling + api_key: os.environ/AZURE_API_KEY + api_version: os.environ/AZURE_API_VERSION + api_base: os.environ/AZURE_API_BASE + +router_settings: + routing_strategy: provider-budget-routing + redis_host: + redis_password: + redis_port: + routing_strategy_args: + openai: + budget_limit: 0.000000000001 # float of $ value budget for time period + time_period: 1d # can be 1d, 2d, 30d + azure: + budget_limit: 100 + time_period: 1d + anthropic: + budget_limit: 100 + time_period: 10d + vertexai: + budget_limit: 100 + time_period: 12d + gemini: + budget_limit: 100 + time_period: 12d + +general_settings: + master_key: sk-1234 +``` + + + + + + + + + +#### How provider-budget-routing works + +1. **Budget Tracking**: + - Uses Redis to track spend for each provider + - Tracks spend over specified time periods (e.g., "1d", "30d") + - Automatically resets spend after time period expires + +2. **Routing Logic**: + - Routes requests to providers under their budget limits + - Skips providers that have exceeded their budget + - If all providers exceed budget, raises an error + +3. **Supported Time Periods**: + - Format: "Xd" where X is number of days + - Examples: "1d" (1 day), "30d" (30 days) + +4. **Requirements**: + - Redis required for tracking spend across instances + - Provider names must be litellm provider names. See [Supported Providers](https://docs.litellm.ai/docs/providers) + + + + + + @@ -612,7 +695,7 @@ asyncio.run(router_acompletion()) - + **Plugin a custom routing strategy to select deployments** @@ -727,7 +810,7 @@ for _ in range(10): - + Picks a deployment based on the lowest cost From b3b237a5976c297bc30c130a965429240eb9beae Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 14:27:57 -0800 Subject: [PATCH 07/16] clean up provider budgets --- litellm/router_strategy/provider_budgets.py | 76 +++++++++------------ 1 file changed, 33 insertions(+), 43 deletions(-) diff --git a/litellm/router_strategy/provider_budgets.py b/litellm/router_strategy/provider_budgets.py index 5c6594d30..c5c6b36fa 100644 --- a/litellm/router_strategy/provider_budgets.py +++ b/litellm/router_strategy/provider_budgets.py @@ -1,3 +1,19 @@ +""" +Provider budget limiting strategy + +Use this if you want to set $ budget limits for each provider. + +Example: +``` +openai: + budget_limit: 0.000000000001 + time_period: 1d +anthropic: + budget_limit: 100 + time_period: 7d +``` +""" + import random from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union @@ -21,20 +37,6 @@ else: Span = Any -class ProviderSpend(TypedDict, total=False): - """ - Provider spend data - - { - "openai": 300.0, - "anthropic": 100.0 - } - """ - - provider: str - spend: float - - class ProviderBudgetLimiting(CustomLogger): def __init__(self, router_cache: DualCache, provider_budget_config: dict): self.router_cache = router_cache @@ -49,7 +51,14 @@ class ProviderBudgetLimiting(CustomLogger): request_kwargs: Optional[Dict] = None, ) -> Optional[Dict]: """ - Filter list of healthy deployments based on provider budget + For all deployments, check their LLM provider budget is less than their budget limit. + + If multiple deployments are available, randomly pick one. + + Example: + if deployment = openai/gpt-3.5-turbo + check if openai budget limit is exceeded + """ potential_deployments: List[Dict] = [] @@ -119,6 +128,15 @@ class ProviderBudgetLimiting(CustomLogger): async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): """ Increment provider spend in DualCache (InMemory + Redis) + + Handles saving current provider spend to Redis. + + Spend is stored as: + provider_spend:{provider}:{time_period} + ex. provider_spend:openai:1d + ex. provider_spend:anthropic:7d + + The time period is tracked for time_periods set in the provider budget config. """ verbose_router_logger.debug("in ProviderBudgetLimiting.async_log_success_event") standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get( @@ -174,18 +192,6 @@ class ProviderBudgetLimiting(CustomLogger): raise e return custom_llm_provider - def _get_unique_custom_llm_providers_in_deployments( - self, deployments: List[Dict] - ) -> list: - """ - Get unique custom LLM providers in deployments - """ - unique_providers = set() - for deployment in deployments: - provider = self._get_llm_provider_for_deployment(deployment) - unique_providers.add(provider) - return list(unique_providers) - def get_ttl_seconds(self, time_period: str) -> int: """ Convert time period (e.g., '1d', '30d') to seconds for Redis TTL @@ -194,19 +200,3 @@ class ProviderBudgetLimiting(CustomLogger): days = int(time_period[:-1]) return days * 24 * 60 * 60 raise ValueError(f"Unsupported time period format: {time_period}") - - def get_budget_limit(self, custom_llm_provider: str, time_period: str) -> float: - """ - Fetch the budget limit for a given provider and time period. - This can be fetched from a config or database. - """ - _provider_budget_settings = self.provider_budget_config.get( - custom_llm_provider, None - ) - if _provider_budget_settings is None: - return float("inf") - - verbose_router_logger.debug( - f"Provider budget settings: {_provider_budget_settings}" - ) - return _provider_budget_settings.budget_limit From d14347df95f3334f0544cca941ae4f0b6df180d7 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 14:34:23 -0800 Subject: [PATCH 08/16] unit testing for provider budget routing --- litellm/router_strategy/provider_budgets.py | 15 ++-- tests/local_testing/test_provider_budgets.py | 76 ++++++++++++++++++++ 2 files changed, 87 insertions(+), 4 deletions(-) diff --git a/litellm/router_strategy/provider_budgets.py b/litellm/router_strategy/provider_budgets.py index c5c6b36fa..de8eda19c 100644 --- a/litellm/router_strategy/provider_budgets.py +++ b/litellm/router_strategy/provider_budgets.py @@ -72,6 +72,8 @@ class ProviderBudgetLimiting(CustomLogger): _provider_configs: Dict[str, Optional[ProviderBudgetInfo]] = {} for deployment in healthy_deployments: provider = self._get_llm_provider_for_deployment(deployment) + if provider is None: + continue budget_config = self._get_budget_config_for_provider(provider) _provider_configs[provider] = budget_config @@ -102,6 +104,8 @@ class ProviderBudgetLimiting(CustomLogger): # Filter healthy deployments based on budget constraints for deployment in healthy_deployments: provider = self._get_llm_provider_for_deployment(deployment) + if provider is None: + continue budget_config = provider_configs.get(provider) if not budget_config: @@ -179,17 +183,20 @@ class ProviderBudgetLimiting(CustomLogger): ) -> Optional[ProviderBudgetInfo]: return self.provider_budget_config.get(provider, None) - def _get_llm_provider_for_deployment(self, deployment: Dict) -> str: + def _get_llm_provider_for_deployment(self, deployment: Dict) -> Optional[str]: try: _litellm_params: LiteLLM_Params = LiteLLM_Params( - **deployment["litellm_params"] + **deployment.get("litellm_params", {"model": ""}) ) _, custom_llm_provider, _, _ = litellm.get_llm_provider( model=_litellm_params.model, litellm_params=_litellm_params, ) - except Exception as e: - raise e + except Exception: + verbose_router_logger.error( + f"Error getting LLM provider for deployment: {deployment}" + ) + return None return custom_llm_provider def get_ttl_seconds(self, time_period: str) -> int: diff --git a/tests/local_testing/test_provider_budgets.py b/tests/local_testing/test_provider_budgets.py index 17371da54..2622a79e0 100644 --- a/tests/local_testing/test_provider_budgets.py +++ b/tests/local_testing/test_provider_budgets.py @@ -133,3 +133,79 @@ async def test_provider_budgets_e2e_test_expect_to_fail(): await asyncio.sleep(0.5) # Verify the error is related to budget exceeded + + +def test_get_ttl_seconds(): + """ + Test the get_ttl_seconds helper method" + + """ + provider_budget = ProviderBudgetLimiting( + router_cache=DualCache(), provider_budget_config={} + ) + + assert provider_budget.get_ttl_seconds("1d") == 86400 # 1 day in seconds + assert provider_budget.get_ttl_seconds("7d") == 604800 # 7 days in seconds + assert provider_budget.get_ttl_seconds("30d") == 2592000 # 30 days in seconds + + with pytest.raises(ValueError, match="Unsupported time period format"): + provider_budget.get_ttl_seconds("1h") + + +def test_get_llm_provider_for_deployment(): + """ + Test the _get_llm_provider_for_deployment helper method + + """ + provider_budget = ProviderBudgetLimiting( + router_cache=DualCache(), provider_budget_config={} + ) + + # Test OpenAI deployment + openai_deployment = {"litellm_params": {"model": "openai/gpt-4"}} + assert ( + provider_budget._get_llm_provider_for_deployment(openai_deployment) == "openai" + ) + + # Test Azure deployment + azure_deployment = { + "litellm_params": { + "model": "azure/gpt-4", + "api_key": "test", + "api_base": "test", + } + } + assert provider_budget._get_llm_provider_for_deployment(azure_deployment) == "azure" + + # should not raise error for unknown deployment + unknown_deployment = {} + assert provider_budget._get_llm_provider_for_deployment(unknown_deployment) is None + + +def test_get_budget_config_for_provider(): + """ + Test the _get_budget_config_for_provider helper method + + """ + config = { + "openai": ProviderBudgetInfo(time_period="1d", budget_limit=100), + "anthropic": ProviderBudgetInfo(time_period="7d", budget_limit=500), + } + + provider_budget = ProviderBudgetLimiting( + router_cache=DualCache(), provider_budget_config=config + ) + + # Test existing providers + openai_config = provider_budget._get_budget_config_for_provider("openai") + assert openai_config is not None + assert openai_config.time_period == "1d" + assert openai_config.budget_limit == 100 + + anthropic_config = provider_budget._get_budget_config_for_provider("anthropic") + assert anthropic_config is not None + assert anthropic_config.time_period == "7d" + assert anthropic_config.budget_limit == 500 + + # Test non-existent provider + assert provider_budget._get_budget_config_for_provider("unknown") is None From a7e96ff9ede2db694a6c53db2299e6c650881935 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 17:09:29 -0800 Subject: [PATCH 09/16] use as flag, not routing strat --- litellm/router.py | 28 +++++----------------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index db6debd56..61fcd8b82 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -120,6 +120,7 @@ from litellm.types.router import ( LiteLLMParamsTypedDict, ModelGroupInfo, ModelInfo, + ProviderBudgetConfigType, RetryPolicy, RouterErrors, RouterGeneralSettings, @@ -235,9 +236,9 @@ class Router: "latency-based-routing", "cost-based-routing", "usage-based-routing-v2", - "provider-budget-routing", ] = "simple-shuffle", - routing_strategy_args: dict = {}, # just for latency-based, + routing_strategy_args: dict = {}, # just for latency-based + provider_budget_config: Optional[ProviderBudgetConfigType] = None, semaphore: Optional[asyncio.Semaphore] = None, alerting_config: Optional[AlertingConfig] = None, router_general_settings: Optional[ @@ -274,6 +275,7 @@ class Router: routing_strategy (Literal["simple-shuffle", "least-busy", "usage-based-routing", "latency-based-routing", "cost-based-routing"]): Routing strategy. Defaults to "simple-shuffle". routing_strategy_args (dict): Additional args for latency-based routing. Defaults to {}. alerting_config (AlertingConfig): Slack alerting configuration. Defaults to None. + provider_budget_config (ProviderBudgetConfig): Provider budget configuration. Use this to set llm_provider budget limits. example $100/day to OpenAI, $100/day to Azure, etc. Defaults to None. Returns: Router: An instance of the litellm.Router class. @@ -519,6 +521,7 @@ class Router: ) self.service_logger_obj = ServiceLogging() self.routing_strategy_args = routing_strategy_args + self.provider_budget_config = provider_budget_config self.retry_policy: Optional[RetryPolicy] = None if retry_policy is not None: if isinstance(retry_policy, dict): @@ -646,16 +649,6 @@ class Router: ) if isinstance(litellm.callbacks, list): litellm.callbacks.append(self.lowestcost_logger) # type: ignore - elif ( - routing_strategy == RoutingStrategy.PROVIDER_BUDGET_LIMITING.value - or routing_strategy == RoutingStrategy.PROVIDER_BUDGET_LIMITING - ): - self.provider_budget_logger = ProviderBudgetLimiting( - router_cache=self.cache, - provider_budget_config=routing_strategy_args, - ) - if isinstance(litellm.callbacks, list): - litellm.callbacks.append(self.provider_budget_logger) # type: ignore else: pass @@ -5067,7 +5060,6 @@ class Router: and self.routing_strategy != "cost-based-routing" and self.routing_strategy != "latency-based-routing" and self.routing_strategy != "least-busy" - and self.routing_strategy != "provider-budget-routing" ): # prevent regressions for other routing strategies, that don't have async get available deployments implemented. return self.get_available_deployment( model=model, @@ -5183,16 +5175,6 @@ class Router: healthy_deployments=healthy_deployments, # type: ignore ) ) - elif ( - self.routing_strategy == "provider-budget-routing" - and self.provider_budget_logger is not None - ): - deployment = ( - await self.provider_budget_logger.async_get_available_deployments( - request_kwargs=request_kwargs, - healthy_deployments=healthy_deployments, # type: ignore - ) - ) else: deployment = None if deployment is None: From 50168889beade58eb4b5dc68ab4727901fb17771 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 17:12:57 -0800 Subject: [PATCH 10/16] fix init provider budget routing --- litellm/router_strategy/provider_budgets.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/litellm/router_strategy/provider_budgets.py b/litellm/router_strategy/provider_budgets.py index de8eda19c..590716c1b 100644 --- a/litellm/router_strategy/provider_budgets.py +++ b/litellm/router_strategy/provider_budgets.py @@ -1,8 +1,12 @@ """ -Provider budget limiting strategy +Provider budget limiting Use this if you want to set $ budget limits for each provider. +Note: This is a filter, like tag-routing + +This means you can use this with weighted-pick, lowest-latency, simple-shuffle, routing etc + Example: ``` openai: @@ -45,7 +49,11 @@ class ProviderBudgetLimiting(CustomLogger): f"Initalized Provider budget config: {self.provider_budget_config}" ) - async def async_get_available_deployments( + # Add self to litellm callbacks if it's a list + if isinstance(litellm.callbacks, list): + litellm.callbacks.append(self) # type: ignore + + async def async_filter_deployments( self, healthy_deployments: List[Dict], request_kwargs: Optional[Dict] = None, From 95f21722a0e66cafeab158e477735aef4bf824d5 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 17:24:17 -0800 Subject: [PATCH 11/16] use async_filter_deployments --- litellm/router.py | 13 +++++++++++++ litellm/router_strategy/provider_budgets.py | 19 +++++++++++-------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 61fcd8b82..f724c96c4 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -522,6 +522,11 @@ class Router: self.service_logger_obj = ServiceLogging() self.routing_strategy_args = routing_strategy_args self.provider_budget_config = provider_budget_config + if self.provider_budget_config is not None: + self.provider_budget_logger = ProviderBudgetLimiting( + router_cache=self.cache, + provider_budget_config=self.provider_budget_config, + ) self.retry_policy: Optional[RetryPolicy] = None if retry_policy is not None: if isinstance(retry_policy, dict): @@ -5114,6 +5119,14 @@ class Router: healthy_deployments=healthy_deployments, ) + if self.provider_budget_config is not None: + healthy_deployments = ( + await self.provider_budget_logger.async_filter_deployments( + healthy_deployments=healthy_deployments, + request_kwargs=request_kwargs, + ) + ) + if len(healthy_deployments) == 0: exception = await async_raise_no_deployment_exception( litellm_router_instance=self, diff --git a/litellm/router_strategy/provider_budgets.py b/litellm/router_strategy/provider_budgets.py index 590716c1b..9610af149 100644 --- a/litellm/router_strategy/provider_budgets.py +++ b/litellm/router_strategy/provider_budgets.py @@ -55,19 +55,23 @@ class ProviderBudgetLimiting(CustomLogger): async def async_filter_deployments( self, - healthy_deployments: List[Dict], + healthy_deployments: Union[List[Dict[str, Any]], Dict[str, Any]], request_kwargs: Optional[Dict] = None, - ) -> Optional[Dict]: + ): """ - For all deployments, check their LLM provider budget is less than their budget limit. + Filter out deployments that have exceeded their provider budget limit. - If multiple deployments are available, randomly pick one. Example: if deployment = openai/gpt-3.5-turbo - check if openai budget limit is exceeded - + and openai spend > openai budget limit + then skip this deployment """ + + # If a single deployment is passed, convert it to a list + if isinstance(healthy_deployments, dict): + healthy_deployments = [healthy_deployments] + potential_deployments: List[Dict] = [] # Extract the parent OpenTelemetry span for tracing @@ -134,8 +138,7 @@ class ProviderBudgetLimiting(CustomLogger): potential_deployments.append(deployment) - # Randomly pick one deployment from potential deployments - return random.choice(potential_deployments) if potential_deployments else None + return potential_deployments async def async_log_success_event(self, kwargs, response_obj, start_time, end_time): """ From 7ceee027c8193f986c344acff4318f4ace671be3 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 17:26:31 -0800 Subject: [PATCH 12/16] fix test provider budgets --- tests/local_testing/test_provider_budgets.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/local_testing/test_provider_budgets.py b/tests/local_testing/test_provider_budgets.py index 2622a79e0..5e685cae6 100644 --- a/tests/local_testing/test_provider_budgets.py +++ b/tests/local_testing/test_provider_budgets.py @@ -39,8 +39,6 @@ async def test_provider_budgets_e2e_test(): } router = Router( - routing_strategy="provider-budget-routing", - routing_strategy_args=provider_budget_config, model_list=[ { "model_name": "gpt-3.5-turbo", # openai model name @@ -60,6 +58,7 @@ async def test_provider_budgets_e2e_test(): "model_info": {"id": "openai-model-id"}, }, ], + provider_budget_config=provider_budget_config, redis_host=os.getenv("REDIS_HOST"), redis_port=int(os.getenv("REDIS_PORT")), redis_password=os.getenv("REDIS_PASSWORD"), @@ -99,8 +98,6 @@ async def test_provider_budgets_e2e_test_expect_to_fail(): } router = Router( - routing_strategy="provider-budget-routing", - routing_strategy_args=provider_budget_config, model_list=[ { "model_name": "anthropic/*", # openai model name @@ -112,6 +109,7 @@ async def test_provider_budgets_e2e_test_expect_to_fail(): redis_host=os.getenv("REDIS_HOST"), redis_port=int(os.getenv("REDIS_PORT")), redis_password=os.getenv("REDIS_PASSWORD"), + provider_budget_config=provider_budget_config, ) response = await router.acompletion( From 98a7a37c86438693aecf16ea69ee7c0be95226c0 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 17:28:52 -0800 Subject: [PATCH 13/16] doc provider budget routing --- .../docs/proxy/provider_budget_routing.md | 75 +++++++++++++++++++ docs/my-website/sidebars.js | 2 +- 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 docs/my-website/docs/proxy/provider_budget_routing.md diff --git a/docs/my-website/docs/proxy/provider_budget_routing.md b/docs/my-website/docs/proxy/provider_budget_routing.md new file mode 100644 index 000000000..e3e3c41e1 --- /dev/null +++ b/docs/my-website/docs/proxy/provider_budget_routing.md @@ -0,0 +1,75 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Provider Budget Routing +Use this to set budgets for LLM Providers - example $100/day for OpenAI, $100/day for Azure. + + + + +```yaml +model_list: + - model_name: gpt-3.5-turbo + litellm_params: + model: openai/gpt-3.5-turbo + api_key: os.environ/OPENAI_API_KEY + - model_name: gpt-3.5-turbo + litellm_params: + model: azure/chatgpt-functioncalling + api_key: os.environ/AZURE_API_KEY + api_version: os.environ/AZURE_API_VERSION + api_base: os.environ/AZURE_API_BASE + +router_settings: + routing_strategy: provider-budget-routing + redis_host: + redis_password: + redis_port: + routing_strategy_args: + openai: + budget_limit: 0.000000000001 # float of $ value budget for time period + time_period: 1d # can be 1d, 2d, 30d + azure: + budget_limit: 100 + time_period: 1d + anthropic: + budget_limit: 100 + time_period: 10d + vertexai: + budget_limit: 100 + time_period: 12d + gemini: + budget_limit: 100 + time_period: 12d + +general_settings: + master_key: sk-1234 +``` + + + + + + + + + +#### How provider-budget-routing works + +1. **Budget Tracking**: + - Uses Redis to track spend for each provider + - Tracks spend over specified time periods (e.g., "1d", "30d") + - Automatically resets spend after time period expires + +2. **Routing Logic**: + - Routes requests to providers under their budget limits + - Skips providers that have exceeded their budget + - If all providers exceed budget, raises an error + +3. **Supported Time Periods**: + - Format: "Xd" where X is number of days + - Examples: "1d" (1 day), "30d" (30 days) + +4. **Requirements**: + - Redis required for tracking spend across instances + - Provider names must be litellm provider names. See [Supported Providers](https://docs.litellm.ai/docs/providers) diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 107a877da..50cc83c08 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -100,7 +100,7 @@ const sidebars = { { type: "category", label: "Routing", - items: ["proxy/load_balancing", "proxy/tag_routing", "proxy/team_based_routing", "proxy/customer_routing",], + items: ["proxy/load_balancing", "proxy/tag_routing", "proxy/provider_budget_routing", "proxy/team_based_routing", "proxy/customer_routing",], }, { type: "category", From d5937c820f30e6b4103a4972fec39ea006d8ac41 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 17:29:52 -0800 Subject: [PATCH 14/16] doc provider budget routing --- .../docs/proxy/provider_budget_routing.md | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/docs/my-website/docs/proxy/provider_budget_routing.md b/docs/my-website/docs/proxy/provider_budget_routing.md index e3e3c41e1..a945ef89a 100644 --- a/docs/my-website/docs/proxy/provider_budget_routing.md +++ b/docs/my-website/docs/proxy/provider_budget_routing.md @@ -4,9 +4,6 @@ import TabItem from '@theme/TabItem'; # Provider Budget Routing Use this to set budgets for LLM Providers - example $100/day for OpenAI, $100/day for Azure. - - - ```yaml model_list: - model_name: gpt-3.5-turbo @@ -21,11 +18,10 @@ model_list: api_base: os.environ/AZURE_API_BASE router_settings: - routing_strategy: provider-budget-routing redis_host: redis_password: redis_port: - routing_strategy_args: + provider_budget_config: openai: budget_limit: 0.000000000001 # float of $ value budget for time period time_period: 1d # can be 1d, 2d, 30d @@ -46,13 +42,6 @@ general_settings: master_key: sk-1234 ``` - - - - - - - #### How provider-budget-routing works From f4369f0427d2253bc60e599c247c6c20a5efa44c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 17:30:17 -0800 Subject: [PATCH 15/16] fix docs changes --- docs/my-website/docs/routing.md | 91 ++------------------------------- 1 file changed, 4 insertions(+), 87 deletions(-) diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index 49e73c4b7..702cafa7f 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -159,7 +159,7 @@ Router provides 4 strategies for routing your calls across multiple deployments: -This is an async implementation of usage-based-routing. +**🎉 NEW** This is an async implementation of usage-based-routing. **Filters out deployment if tpm/rpm limit exceeded** - If you pass in the deployment's tpm/rpm limits. @@ -502,88 +502,6 @@ asyncio.run(router_acompletion()) - - - -Use this to set budgets for LLM Providers - example $100/day for OpenAI, $100/day for Azure. - - - - -```yaml -model_list: - - model_name: gpt-3.5-turbo - litellm_params: - model: openai/gpt-3.5-turbo - api_key: os.environ/OPENAI_API_KEY - - model_name: gpt-3.5-turbo - litellm_params: - model: azure/chatgpt-functioncalling - api_key: os.environ/AZURE_API_KEY - api_version: os.environ/AZURE_API_VERSION - api_base: os.environ/AZURE_API_BASE - -router_settings: - routing_strategy: provider-budget-routing - redis_host: - redis_password: - redis_port: - routing_strategy_args: - openai: - budget_limit: 0.000000000001 # float of $ value budget for time period - time_period: 1d # can be 1d, 2d, 30d - azure: - budget_limit: 100 - time_period: 1d - anthropic: - budget_limit: 100 - time_period: 10d - vertexai: - budget_limit: 100 - time_period: 12d - gemini: - budget_limit: 100 - time_period: 12d - -general_settings: - master_key: sk-1234 -``` - - - - - - - - - -#### How provider-budget-routing works - -1. **Budget Tracking**: - - Uses Redis to track spend for each provider - - Tracks spend over specified time periods (e.g., "1d", "30d") - - Automatically resets spend after time period expires - -2. **Routing Logic**: - - Routes requests to providers under their budget limits - - Skips providers that have exceeded their budget - - If all providers exceed budget, raises an error - -3. **Supported Time Periods**: - - Format: "Xd" where X is number of days - - Examples: "1d" (1 day), "30d" (30 days) - -4. **Requirements**: - - Redis required for tracking spend across instances - - Provider names must be litellm provider names. See [Supported Providers](https://docs.litellm.ai/docs/providers) - - - - - - + @@ -695,7 +612,7 @@ asyncio.run(router_acompletion()) - + **Plugin a custom routing strategy to select deployments** @@ -810,7 +727,7 @@ for _ in range(10): - + Picks a deployment based on the lowest cost From fdd9dda2d9df505d1de792820194c84fe36f7e60 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Tue, 19 Nov 2024 17:53:34 -0800 Subject: [PATCH 16/16] fix comment --- litellm/router_strategy/provider_budgets.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/litellm/router_strategy/provider_budgets.py b/litellm/router_strategy/provider_budgets.py index 9610af149..c1805fea9 100644 --- a/litellm/router_strategy/provider_budgets.py +++ b/litellm/router_strategy/provider_budgets.py @@ -3,7 +3,7 @@ Provider budget limiting Use this if you want to set $ budget limits for each provider. -Note: This is a filter, like tag-routing +Note: This is a filter, like tag-routing. Meaning it will accept healthy deployments and then filter out deployments that have exceeded their budget limit. This means you can use this with weighted-pick, lowest-latency, simple-shuffle, routing etc @@ -18,7 +18,6 @@ anthropic: ``` """ -import random from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union import litellm