(feat proxy) v2 - model max budgets (#7302)

* clean up unused code * add _PROXY_VirtualKeyModelMaxBudgetLimiter * adjust type imports * working _PROXY_VirtualKeyModelMaxBudgetLimiter * fix user_api_key_model_max_budget * fix user_api_key_model_max_budget * update naming * update naming * fix changes to RouterBudgetLimiting * test_call_with_key_over_model_budget * test_call_with_key_over_model_budget * handle _get_request_model_budget_config * e2e test for test_call_with_key_over_model_budget * clean up test * run ci/cd again * add validate_model_max_budget * docs fix * update doc * add e2e testing for _PROXY_VirtualKeyModelMaxBudgetLimiter * test_unit_test_max_model_budget_limiter.py
2025-04-25 18:54:30 +00:00 · 2024-12-18 19:42:46 -08:00 · 2024-12-18 19:42:46 -08:00 · 6261ec3599
commit 6261ec3599
parent 5253f639cd
14 changed files with 628 additions and 261 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -631,7 +631,7 @@ class Router:
                    _callback = PromptCachingDeploymentCheck(cache=self.cache)
                elif pre_call_check == "router_budget_limiting":
                    _callback = RouterBudgetLimiting(
-                        router_cache=self.cache,
+                        dual_cache=self.cache,
                        provider_budget_config=self.provider_budget_config,
                        model_list=self.model_list,
                    )
@ -5292,14 +5292,6 @@ class Router:
                healthy_deployments=healthy_deployments,
            )

-            # if self.router_budget_logger:
-            #     healthy_deployments = (
-            #         await self.router_budget_logger.async_filter_deployments(
-            #             healthy_deployments=healthy_deployments,
-            #             request_kwargs=request_kwargs,
-            #         )
-            #     )
-
            if len(healthy_deployments) == 0:
                exception = await async_raise_no_deployment_exception(
                    litellm_router_instance=self,