mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 18:54:30 +00:00
(feat proxy) v2 - model max budgets (#7302)
* clean up unused code * add _PROXY_VirtualKeyModelMaxBudgetLimiter * adjust type imports * working _PROXY_VirtualKeyModelMaxBudgetLimiter * fix user_api_key_model_max_budget * fix user_api_key_model_max_budget * update naming * update naming * fix changes to RouterBudgetLimiting * test_call_with_key_over_model_budget * test_call_with_key_over_model_budget * handle _get_request_model_budget_config * e2e test for test_call_with_key_over_model_budget * clean up test * run ci/cd again * add validate_model_max_budget * docs fix * update doc * add e2e testing for _PROXY_VirtualKeyModelMaxBudgetLimiter * test_unit_test_max_model_budget_limiter.py
This commit is contained in:
parent
5253f639cd
commit
6261ec3599
14 changed files with 628 additions and 261 deletions
|
@ -631,7 +631,7 @@ class Router:
|
|||
_callback = PromptCachingDeploymentCheck(cache=self.cache)
|
||||
elif pre_call_check == "router_budget_limiting":
|
||||
_callback = RouterBudgetLimiting(
|
||||
router_cache=self.cache,
|
||||
dual_cache=self.cache,
|
||||
provider_budget_config=self.provider_budget_config,
|
||||
model_list=self.model_list,
|
||||
)
|
||||
|
@ -5292,14 +5292,6 @@ class Router:
|
|||
healthy_deployments=healthy_deployments,
|
||||
)
|
||||
|
||||
# if self.router_budget_logger:
|
||||
# healthy_deployments = (
|
||||
# await self.router_budget_logger.async_filter_deployments(
|
||||
# healthy_deployments=healthy_deployments,
|
||||
# request_kwargs=request_kwargs,
|
||||
# )
|
||||
# )
|
||||
|
||||
if len(healthy_deployments) == 0:
|
||||
exception = await async_raise_no_deployment_exception(
|
||||
litellm_router_instance=self,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue