mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
(feat proxy) v2 - model max budgets (#7302)
* clean up unused code * add _PROXY_VirtualKeyModelMaxBudgetLimiter * adjust type imports * working _PROXY_VirtualKeyModelMaxBudgetLimiter * fix user_api_key_model_max_budget * fix user_api_key_model_max_budget * update naming * update naming * fix changes to RouterBudgetLimiting * test_call_with_key_over_model_budget * test_call_with_key_over_model_budget * handle _get_request_model_budget_config * e2e test for test_call_with_key_over_model_budget * clean up test * run ci/cd again * add validate_model_max_budget * docs fix * update doc * add e2e testing for _PROXY_VirtualKeyModelMaxBudgetLimiter * test_unit_test_max_model_budget_limiter.py
This commit is contained in:
parent
5253f639cd
commit
6261ec3599
14 changed files with 628 additions and 261 deletions
|
@ -173,6 +173,9 @@ from litellm.proxy.guardrails.init_guardrails import (
|
|||
)
|
||||
from litellm.proxy.health_check import perform_health_check
|
||||
from litellm.proxy.health_endpoints._health_endpoints import router as health_router
|
||||
from litellm.proxy.hooks.model_max_budget_limiter import (
|
||||
_PROXY_VirtualKeyModelMaxBudgetLimiter,
|
||||
)
|
||||
from litellm.proxy.hooks.prompt_injection_detection import (
|
||||
_OPTIONAL_PromptInjectionDetection,
|
||||
)
|
||||
|
@ -495,6 +498,10 @@ prisma_client: Optional[PrismaClient] = None
|
|||
user_api_key_cache = DualCache(
|
||||
default_in_memory_ttl=UserAPIKeyCacheTTLEnum.in_memory_cache_ttl.value
|
||||
)
|
||||
model_max_budget_limiter = _PROXY_VirtualKeyModelMaxBudgetLimiter(
|
||||
dual_cache=user_api_key_cache
|
||||
)
|
||||
litellm.callbacks.append(model_max_budget_limiter)
|
||||
redis_usage_cache: Optional[RedisCache] = (
|
||||
None # redis cache used for tracking spend, tpm/rpm limits
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue