mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
(feat - Router / Proxy ) Allow setting budget limits per LLM deployment (#7220)
* fix test_deployment_budget_limits_e2e_test * refactor async_log_success_event to track spend for provider + deployment * fix format * rename class to RouterBudgetLimiting * rename func * rename types used for budgets * add new types for deployment budgets * add budget limits for deployments * fix checking budgets set for provider * update file names * fix linting error * _track_provider_remaining_budget_prometheus * async_filter_deployments * fix model list passed to router * update error * test_deployment_budgets_e2e_test_expect_to_fail * fix test case * run deployment budget limits
This commit is contained in:
parent
b150faff90
commit
163529b40b
8 changed files with 557 additions and 151 deletions
|
@ -56,12 +56,12 @@ from litellm.integrations.custom_logger import CustomLogger
|
|||
from litellm.litellm_core_utils.core_helpers import _get_parent_otel_span_from_kwargs
|
||||
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLogging
|
||||
from litellm.llms.azure.azure import get_azure_ad_token_from_oidc
|
||||
from litellm.router_strategy.budget_limiter import RouterBudgetLimiting
|
||||
from litellm.router_strategy.least_busy import LeastBusyLoggingHandler
|
||||
from litellm.router_strategy.lowest_cost import LowestCostLoggingHandler
|
||||
from litellm.router_strategy.lowest_latency import LowestLatencyLoggingHandler
|
||||
from litellm.router_strategy.lowest_tpm_rpm import LowestTPMLoggingHandler
|
||||
from litellm.router_strategy.lowest_tpm_rpm_v2 import LowestTPMLoggingHandler_v2
|
||||
from litellm.router_strategy.provider_budgets import ProviderBudgetLimiting
|
||||
from litellm.router_strategy.simple_shuffle import simple_shuffle
|
||||
from litellm.router_strategy.tag_based_routing import get_deployments_for_tag
|
||||
from litellm.router_utils.batch_utils import (
|
||||
|
@ -123,11 +123,11 @@ from litellm.types.router import (
|
|||
CustomRoutingStrategyBase,
|
||||
Deployment,
|
||||
DeploymentTypedDict,
|
||||
GenericBudgetConfigType,
|
||||
LiteLLM_Params,
|
||||
LiteLLMParamsTypedDict,
|
||||
ModelGroupInfo,
|
||||
ModelInfo,
|
||||
ProviderBudgetConfigType,
|
||||
RetryPolicy,
|
||||
RouterCacheEnum,
|
||||
RouterErrors,
|
||||
|
@ -248,7 +248,7 @@ class Router:
|
|||
"usage-based-routing-v2",
|
||||
] = "simple-shuffle",
|
||||
routing_strategy_args: dict = {}, # just for latency-based
|
||||
provider_budget_config: Optional[ProviderBudgetConfigType] = None,
|
||||
provider_budget_config: Optional[GenericBudgetConfigType] = None,
|
||||
alerting_config: Optional[AlertingConfig] = None,
|
||||
router_general_settings: Optional[
|
||||
RouterGeneralSettings
|
||||
|
@ -537,10 +537,14 @@ class Router:
|
|||
self.service_logger_obj = ServiceLogging()
|
||||
self.routing_strategy_args = routing_strategy_args
|
||||
self.provider_budget_config = provider_budget_config
|
||||
if self.provider_budget_config is not None:
|
||||
self.provider_budget_logger = ProviderBudgetLimiting(
|
||||
self.router_budget_logger: Optional[RouterBudgetLimiting] = None
|
||||
if RouterBudgetLimiting.should_init_router_budget_limiter(
|
||||
model_list=model_list, provider_budget_config=self.provider_budget_config
|
||||
):
|
||||
self.router_budget_logger = RouterBudgetLimiting(
|
||||
router_cache=self.cache,
|
||||
provider_budget_config=self.provider_budget_config,
|
||||
model_list=self.model_list,
|
||||
)
|
||||
self.retry_policy: Optional[RetryPolicy] = None
|
||||
if retry_policy is not None:
|
||||
|
@ -5318,9 +5322,9 @@ class Router:
|
|||
healthy_deployments=healthy_deployments,
|
||||
)
|
||||
|
||||
if self.provider_budget_config is not None:
|
||||
if self.router_budget_logger:
|
||||
healthy_deployments = (
|
||||
await self.provider_budget_logger.async_filter_deployments(
|
||||
await self.router_budget_logger.async_filter_deployments(
|
||||
healthy_deployments=healthy_deployments,
|
||||
request_kwargs=request_kwargs,
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue