diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 86c7e7ecf..5ce526d50 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -239,6 +239,8 @@ health_check_interval = None health_check_results = {} queue: List = [] litellm_proxy_budget_name = "litellm-proxy-budget" +proxy_budget_rescheduler_min_time = 597 +proxy_budget_rescheduler_max_time = 605 ### INITIALIZE GLOBAL LOGGING OBJECT ### proxy_logging_obj = ProxyLogging(user_api_key_cache=user_api_key_cache) ### REDIS QUEUE ### @@ -1406,7 +1408,7 @@ class ProxyConfig: """ Load config values into proxy global state """ - global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client + global master_key, user_config_file_path, otel_logging, user_custom_auth, user_custom_auth_path, user_custom_key_generate, use_background_health_checks, health_check_interval, use_queue, custom_db_client, proxy_budget_rescheduler_max_time, proxy_budget_rescheduler_min_time # Load existing config config = await self.get_config(config_file_path=config_file_path) @@ -1713,6 +1715,13 @@ class ProxyConfig: ) ## COST TRACKING ## cost_tracking() + ## BUDGET RESCHEDULER ## + proxy_budget_rescheduler_min_time = general_settings.get( + "proxy_budget_rescheduler_min_time", proxy_budget_rescheduler_min_time + ) + proxy_budget_rescheduler_max_time = general_settings.get( + "proxy_budget_rescheduler_max_time", proxy_budget_rescheduler_max_time + ) ### BACKGROUND HEALTH CHECKS ### # Enable background health checks use_background_health_checks = general_settings.get( @@ -2196,7 +2205,7 @@ def parse_cache_control(cache_control): @router.on_event("startup") async def startup_event(): - global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings + global prisma_client, master_key, use_background_health_checks, llm_router, llm_model_list, general_settings, proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time import json ### LOAD MASTER KEY ### @@ -2307,7 +2316,7 @@ async def startup_event(): if prisma_client is not None: scheduler = AsyncIOScheduler() interval = random.randint( - 597, 605 + proxy_budget_rescheduler_min_time, proxy_budget_rescheduler_max_time ) # random interval, so multiple workers avoid resetting budget at the same time scheduler.add_job( reset_budget, "interval", seconds=interval, args=[prisma_client] diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py index fb2ebc0d6..1de26d3b9 100644 --- a/litellm/tests/test_amazing_vertex_completion.py +++ b/litellm/tests/test_amazing_vertex_completion.py @@ -110,6 +110,7 @@ def test_vertex_ai(): "code-bison@001", "text-bison@001", "gemini-1.5-pro", + "gemini-1.5-pro-preview-0215", "gemini-1.5-pro-vision", ]: # our account does not have access to this model @@ -160,6 +161,7 @@ def test_vertex_ai_stream(): "code-bison@001", "text-bison@001", "gemini-1.5-pro", + "gemini-1.5-pro-preview-0215", "gemini-1.5-pro-vision", ]: # our account does not have access to this model @@ -211,6 +213,7 @@ async def test_async_vertexai_response(): "code-bison@001", "text-bison@001", "gemini-1.5-pro", + "gemini-1.5-pro-preview-0215", "gemini-1.5-pro-vision", ]: # our account does not have access to this model @@ -255,6 +258,7 @@ async def test_async_vertexai_streaming_response(): "code-bison@001", "text-bison@001", "gemini-1.5-pro", + "gemini-1.5-pro-preview-0215", "gemini-1.5-pro-vision", ]: # our account does not have access to this model diff --git a/proxy_server_config.yaml b/proxy_server_config.yaml index d0cb5739e..198d33013 100644 --- a/proxy_server_config.yaml +++ b/proxy_server_config.yaml @@ -40,6 +40,8 @@ litellm_settings: budget_duration: 30d general_settings: master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234) + proxy_budget_rescheduler_min_time: 30 + proxy_budget_rescheduler_max_time: 60 # database_url: "postgresql://:@:/" # [OPTIONAL] use for token-based auth to proxy environment_variables: diff --git a/tests/test_keys.py b/tests/test_keys.py index 28ce02511..c2b957180 100644 --- a/tests/test_keys.py +++ b/tests/test_keys.py @@ -449,7 +449,7 @@ async def test_key_with_budgets(): reset_at_init_value = key_info["info"]["budget_reset_at"] reset_at_new_value = None i = 0 - await asyncio.sleep(610) + await asyncio.sleep(120) while i < 3: key_info = await get_key_info(session=session, get_key=key, call_key=key) reset_at_new_value = key_info["info"]["budget_reset_at"]