fix(proxy_cli.py): don't double load the router config

was causing callbacks to be instantiated twice - double couting usage in cache
This commit is contained in:
Krrish Dholakia 2024-04-10 13:23:56 -07:00
parent d8da4cf8bb
commit b2741933dc
5 changed files with 12 additions and 16 deletions

View file

@ -27,12 +27,11 @@ litellm_settings:
upperbound_key_generate_params:
max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET
# litellm_settings:
# drop_params: True
# max_budget: 800021
# budget_duration: 30d
# # cache: true
router_settings:
routing_strategy: usage-based-routing
redis_host: redis-16337.c322.us-east-1-2.ec2.cloud.redislabs.com
redis_password: madeBerri@992
redis_port: 16337
general_settings:
master_key: sk-1234

View file

@ -425,9 +425,10 @@ def run_server(
)
proxy_config = ProxyConfig()
_, _, general_settings = asyncio.run(
proxy_config.load_config(router=None, config_file_path=config)
)
_config = asyncio.run(proxy_config.get_config(config_file_path=config))
general_settings = _config.get("general_settings", {})
if general_settings is None:
general_settings = {}
database_url = general_settings.get("database_url", None)
db_connection_pool_limit = general_settings.get(
"database_connection_pool_limit", 100

View file

@ -2335,6 +2335,7 @@ class ProxyConfig:
"background_health_checks", False
)
health_check_interval = general_settings.get("health_check_interval", 300)
router_params: dict = {
"cache_responses": litellm.cache
!= None, # cache if user passed in cache values

View file

@ -2374,7 +2374,6 @@ class Router:
"""
Returns the deployment based on routing strategy
"""
# users need to explicitly call a specific deployment, by setting `specific_deployment = True` as completion()/embedding() kwarg
# When this was no explicit we had several issues with fallbacks timing out
if specific_deployment == True:

View file

@ -1990,9 +1990,6 @@ class Logging:
else:
litellm.cache.add_cache(result, **kwargs)
if isinstance(callback, CustomLogger): # custom logger class
print_verbose(
f"Running Async success callback: {callback}; self.stream: {self.stream}; async_complete_streaming_response: {self.model_call_details.get('async_complete_streaming_response', None)} result={result}"
)
if self.stream == True:
if (
"async_complete_streaming_response"
@ -2376,7 +2373,6 @@ def client(original_function):
if litellm.use_client or (
"use_client" in kwargs and kwargs["use_client"] == True
):
print_verbose(f"litedebugger initialized")
if "lite_debugger" not in litellm.input_callback:
litellm.input_callback.append("lite_debugger")
if "lite_debugger" not in litellm.success_callback: