forked from phoenix/litellm-mirror
clean up provider budgets
This commit is contained in:
parent
60aa3ed6a1
commit
b3b237a597
1 changed files with 33 additions and 43 deletions
|
@ -1,3 +1,19 @@
|
||||||
|
"""
|
||||||
|
Provider budget limiting strategy
|
||||||
|
|
||||||
|
Use this if you want to set $ budget limits for each provider.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```
|
||||||
|
openai:
|
||||||
|
budget_limit: 0.000000000001
|
||||||
|
time_period: 1d
|
||||||
|
anthropic:
|
||||||
|
budget_limit: 100
|
||||||
|
time_period: 7d
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
|
||||||
import random
|
import random
|
||||||
from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional, TypedDict, Union
|
||||||
|
|
||||||
|
@ -21,20 +37,6 @@ else:
|
||||||
Span = Any
|
Span = Any
|
||||||
|
|
||||||
|
|
||||||
class ProviderSpend(TypedDict, total=False):
|
|
||||||
"""
|
|
||||||
Provider spend data
|
|
||||||
|
|
||||||
{
|
|
||||||
"openai": 300.0,
|
|
||||||
"anthropic": 100.0
|
|
||||||
}
|
|
||||||
"""
|
|
||||||
|
|
||||||
provider: str
|
|
||||||
spend: float
|
|
||||||
|
|
||||||
|
|
||||||
class ProviderBudgetLimiting(CustomLogger):
|
class ProviderBudgetLimiting(CustomLogger):
|
||||||
def __init__(self, router_cache: DualCache, provider_budget_config: dict):
|
def __init__(self, router_cache: DualCache, provider_budget_config: dict):
|
||||||
self.router_cache = router_cache
|
self.router_cache = router_cache
|
||||||
|
@ -49,7 +51,14 @@ class ProviderBudgetLimiting(CustomLogger):
|
||||||
request_kwargs: Optional[Dict] = None,
|
request_kwargs: Optional[Dict] = None,
|
||||||
) -> Optional[Dict]:
|
) -> Optional[Dict]:
|
||||||
"""
|
"""
|
||||||
Filter list of healthy deployments based on provider budget
|
For all deployments, check their LLM provider budget is less than their budget limit.
|
||||||
|
|
||||||
|
If multiple deployments are available, randomly pick one.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
if deployment = openai/gpt-3.5-turbo
|
||||||
|
check if openai budget limit is exceeded
|
||||||
|
|
||||||
"""
|
"""
|
||||||
potential_deployments: List[Dict] = []
|
potential_deployments: List[Dict] = []
|
||||||
|
|
||||||
|
@ -119,6 +128,15 @@ class ProviderBudgetLimiting(CustomLogger):
|
||||||
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
|
||||||
"""
|
"""
|
||||||
Increment provider spend in DualCache (InMemory + Redis)
|
Increment provider spend in DualCache (InMemory + Redis)
|
||||||
|
|
||||||
|
Handles saving current provider spend to Redis.
|
||||||
|
|
||||||
|
Spend is stored as:
|
||||||
|
provider_spend:{provider}:{time_period}
|
||||||
|
ex. provider_spend:openai:1d
|
||||||
|
ex. provider_spend:anthropic:7d
|
||||||
|
|
||||||
|
The time period is tracked for time_periods set in the provider budget config.
|
||||||
"""
|
"""
|
||||||
verbose_router_logger.debug("in ProviderBudgetLimiting.async_log_success_event")
|
verbose_router_logger.debug("in ProviderBudgetLimiting.async_log_success_event")
|
||||||
standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
|
standard_logging_payload: Optional[StandardLoggingPayload] = kwargs.get(
|
||||||
|
@ -174,18 +192,6 @@ class ProviderBudgetLimiting(CustomLogger):
|
||||||
raise e
|
raise e
|
||||||
return custom_llm_provider
|
return custom_llm_provider
|
||||||
|
|
||||||
def _get_unique_custom_llm_providers_in_deployments(
|
|
||||||
self, deployments: List[Dict]
|
|
||||||
) -> list:
|
|
||||||
"""
|
|
||||||
Get unique custom LLM providers in deployments
|
|
||||||
"""
|
|
||||||
unique_providers = set()
|
|
||||||
for deployment in deployments:
|
|
||||||
provider = self._get_llm_provider_for_deployment(deployment)
|
|
||||||
unique_providers.add(provider)
|
|
||||||
return list(unique_providers)
|
|
||||||
|
|
||||||
def get_ttl_seconds(self, time_period: str) -> int:
|
def get_ttl_seconds(self, time_period: str) -> int:
|
||||||
"""
|
"""
|
||||||
Convert time period (e.g., '1d', '30d') to seconds for Redis TTL
|
Convert time period (e.g., '1d', '30d') to seconds for Redis TTL
|
||||||
|
@ -194,19 +200,3 @@ class ProviderBudgetLimiting(CustomLogger):
|
||||||
days = int(time_period[:-1])
|
days = int(time_period[:-1])
|
||||||
return days * 24 * 60 * 60
|
return days * 24 * 60 * 60
|
||||||
raise ValueError(f"Unsupported time period format: {time_period}")
|
raise ValueError(f"Unsupported time period format: {time_period}")
|
||||||
|
|
||||||
def get_budget_limit(self, custom_llm_provider: str, time_period: str) -> float:
|
|
||||||
"""
|
|
||||||
Fetch the budget limit for a given provider and time period.
|
|
||||||
This can be fetched from a config or database.
|
|
||||||
"""
|
|
||||||
_provider_budget_settings = self.provider_budget_config.get(
|
|
||||||
custom_llm_provider, None
|
|
||||||
)
|
|
||||||
if _provider_budget_settings is None:
|
|
||||||
return float("inf")
|
|
||||||
|
|
||||||
verbose_router_logger.debug(
|
|
||||||
f"Provider budget settings: {_provider_budget_settings}"
|
|
||||||
)
|
|
||||||
return _provider_budget_settings.budget_limit
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue