fix(proxy_cli.py): don't double load the router config

was causing callbacks to be instantiated twice - double couting usage in cache
2024-04-10 13:23:56 -07:00 · 2024-04-10 13:23:56 -07:00 · b2741933dc
commit b2741933dc
parent d8da4cf8bb
5 changed files with 12 additions and 16 deletions
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -27,12 +27,11 @@ litellm_settings:
  upperbound_key_generate_params: 
    max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET

-# litellm_settings:
-#   drop_params: True
-#   max_budget: 800021
-#   budget_duration: 30d
-#   # cache: true
-  
+router_settings:
+  routing_strategy: usage-based-routing
+  redis_host: redis-16337.c322.us-east-1-2.ec2.cloud.redislabs.com
+  redis_password: madeBerri@992
+  redis_port: 16337

 general_settings:
  master_key: sk-1234
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@ -425,9 +425,10 @@ def run_server(
                )

            proxy_config = ProxyConfig()
-            _, _, general_settings = asyncio.run(
-                proxy_config.load_config(router=None, config_file_path=config)
-            )
+            _config = asyncio.run(proxy_config.get_config(config_file_path=config))
+            general_settings = _config.get("general_settings", {})
+            if general_settings is None:
+                general_settings = {}
            database_url = general_settings.get("database_url", None)
            db_connection_pool_limit = general_settings.get(
                "database_connection_pool_limit", 100
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -2335,6 +2335,7 @@ class ProxyConfig:
                "background_health_checks", False
            )
            health_check_interval = general_settings.get("health_check_interval", 300)
+
        router_params: dict = {
            "cache_responses": litellm.cache
            != None,  # cache if user passed in cache values
--- a/litellm/router.py
+++ b/litellm/router.py
@ -2374,7 +2374,6 @@ class Router:
        """
        Returns the deployment based on routing strategy
        """
-
        # users need to explicitly call a specific deployment, by setting `specific_deployment = True` as completion()/embedding() kwarg
        # When this was no explicit we had several issues with fallbacks timing out
        if specific_deployment == True:
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1990,9 +1990,6 @@ class Logging:
                            else:
                                litellm.cache.add_cache(result, **kwargs)
                if isinstance(callback, CustomLogger):  # custom logger class
-                    print_verbose(
-                        f"Running Async success callback: {callback}; self.stream: {self.stream}; async_complete_streaming_response: {self.model_call_details.get('async_complete_streaming_response', None)} result={result}"
-                    )
                    if self.stream == True:
                        if (
                            "async_complete_streaming_response"
@ -2376,7 +2373,6 @@ def client(original_function):
            if litellm.use_client or (
                "use_client" in kwargs and kwargs["use_client"] == True
            ):
-                print_verbose(f"litedebugger initialized")
                if "lite_debugger" not in litellm.input_callback:
                    litellm.input_callback.append("lite_debugger")
                if "lite_debugger" not in litellm.success_callback: