(feat proxy) v2 - model max budgets (#7302)

* clean up unused code

* add _PROXY_VirtualKeyModelMaxBudgetLimiter

* adjust type imports

* working _PROXY_VirtualKeyModelMaxBudgetLimiter

* fix user_api_key_model_max_budget

* fix user_api_key_model_max_budget

* update naming

* update naming

* fix changes to RouterBudgetLimiting

* test_call_with_key_over_model_budget

* test_call_with_key_over_model_budget

* handle _get_request_model_budget_config

* e2e test for test_call_with_key_over_model_budget

* clean up test

* run ci/cd again

* add validate_model_max_budget

* docs fix

* update doc

* add e2e testing for _PROXY_VirtualKeyModelMaxBudgetLimiter

* test_unit_test_max_model_budget_limiter.py
This commit is contained in:
Ishaan Jaff 2024-12-18 19:42:46 -08:00 committed by GitHub
parent 1a4910f6c0
commit 6220e17ebf
14 changed files with 628 additions and 261 deletions

View file

@ -631,7 +631,7 @@ class Router:
_callback = PromptCachingDeploymentCheck(cache=self.cache)
elif pre_call_check == "router_budget_limiting":
_callback = RouterBudgetLimiting(
router_cache=self.cache,
dual_cache=self.cache,
provider_budget_config=self.provider_budget_config,
model_list=self.model_list,
)
@ -5292,14 +5292,6 @@ class Router:
healthy_deployments=healthy_deployments,
)
# if self.router_budget_logger:
# healthy_deployments = (
# await self.router_budget_logger.async_filter_deployments(
# healthy_deployments=healthy_deployments,
# request_kwargs=request_kwargs,
# )
# )
if len(healthy_deployments) == 0:
exception = await async_raise_no_deployment_exception(
litellm_router_instance=self,