(feat proxy) v2 - model max budgets (#7302)

* clean up unused code

* add _PROXY_VirtualKeyModelMaxBudgetLimiter

* adjust type imports

* working _PROXY_VirtualKeyModelMaxBudgetLimiter

* fix user_api_key_model_max_budget

* fix user_api_key_model_max_budget

* update naming

* update naming

* fix changes to RouterBudgetLimiting

* test_call_with_key_over_model_budget

* test_call_with_key_over_model_budget

* handle _get_request_model_budget_config

* e2e test for test_call_with_key_over_model_budget

* clean up test

* run ci/cd again

* add validate_model_max_budget

* docs fix

* update doc

* add e2e testing for _PROXY_VirtualKeyModelMaxBudgetLimiter

* test_unit_test_max_model_budget_limiter.py
This commit is contained in:
Ishaan Jaff 2024-12-18 19:42:46 -08:00 committed by GitHub
parent 1a4910f6c0
commit 6220e17ebf
14 changed files with 628 additions and 261 deletions

View file

@ -173,6 +173,9 @@ from litellm.proxy.guardrails.init_guardrails import (
)
from litellm.proxy.health_check import perform_health_check
from litellm.proxy.health_endpoints._health_endpoints import router as health_router
from litellm.proxy.hooks.model_max_budget_limiter import (
_PROXY_VirtualKeyModelMaxBudgetLimiter,
)
from litellm.proxy.hooks.prompt_injection_detection import (
_OPTIONAL_PromptInjectionDetection,
)
@ -495,6 +498,10 @@ prisma_client: Optional[PrismaClient] = None
user_api_key_cache = DualCache(
default_in_memory_ttl=UserAPIKeyCacheTTLEnum.in_memory_cache_ttl.value
)
model_max_budget_limiter = _PROXY_VirtualKeyModelMaxBudgetLimiter(
dual_cache=user_api_key_cache
)
litellm.callbacks.append(model_max_budget_limiter)
redis_usage_cache: Optional[RedisCache] = (
None # redis cache used for tracking spend, tpm/rpm limits
)