(feat - Router / Proxy ) Allow setting budget limits per LLM deployment (#7220)

* fix test_deployment_budget_limits_e2e_test

* refactor async_log_success_event to track spend for provider + deployment

* fix format

* rename class to RouterBudgetLimiting

* rename func

* rename types used for budgets

* add new types for deployment budgets

* add budget limits for deployments

* fix checking budgets set for provider

* update file names

* fix linting error

* _track_provider_remaining_budget_prometheus

* async_filter_deployments

* fix model list passed to router

* update error

* test_deployment_budgets_e2e_test_expect_to_fail

* fix test case

* run deployment budget limits
This commit is contained in:
Ishaan Jaff 2024-12-13 19:15:51 -08:00 committed by GitHub
parent c3f637012b
commit bc46916bb3
8 changed files with 557 additions and 151 deletions

View file

@ -2533,13 +2533,15 @@ async def provider_budgets() -> ProviderBudgetResponse:
provider_budget_response_dict: Dict[str, ProviderBudgetResponseObject] = {}
for _provider, _budget_info in provider_budget_config.items():
if llm_router.router_budget_logger is None:
raise ValueError("No router budget logger found")
_provider_spend = (
await llm_router.provider_budget_logger._get_current_provider_spend(
await llm_router.router_budget_logger._get_current_provider_spend(
_provider
)
or 0.0
)
_provider_budget_ttl = await llm_router.provider_budget_logger._get_current_provider_budget_reset_at(
_provider_budget_ttl = await llm_router.router_budget_logger._get_current_provider_budget_reset_at(
_provider
)
provider_budget_response_object = ProviderBudgetResponseObject(