Merge pull request #2461 from BerriAI/litellm_improve_mem_use

LiteLLM - improve memory utilization - don't use inMemCache on Router
2025-04-26 11:14:04 +00:00 · 2024-03-11 18:59:57 -07:00 · 2024-03-11 18:59:57 -07:00 · fa655d62fb
commit fa655d62fb
parent 7030d6d684 e9b511338a
2 changed files with 149 additions and 14 deletions
--- a/litellm/router.py
+++ b/litellm/router.py
@ -210,9 +210,6 @@ class Router:
        self.context_window_fallbacks = (
            context_window_fallbacks or litellm.context_window_fallbacks
        )
-        self.model_exception_map: dict = (
-            {}
-        )  # dict to store model: list exceptions. self.exceptions = {"gpt-3.5": ["API KEY Error", "Rate Limit Error", "good morning error"]}
        self.total_calls: defaultdict = defaultdict(
            int
        )  # dict to store total calls made to each model
@ -1524,17 +1521,6 @@ class Router:
                self._set_cooldown_deployments(
                    deployment_id
                )  # setting deployment_id in cooldown deployments
-            if metadata:
-                deployment = metadata.get("deployment", None)
-                deployment_exceptions = self.model_exception_map.get(deployment, [])
-                deployment_exceptions.append(exception_str)
-                self.model_exception_map[deployment] = deployment_exceptions
-                verbose_router_logger.debug("\nEXCEPTION FOR DEPLOYMENTS\n")
-                verbose_router_logger.debug(self.model_exception_map)
-                for model in self.model_exception_map:
-                    verbose_router_logger.debug(
-                        f"Model {model} had {len(self.model_exception_map[model])} exception"
-                    )
            if custom_llm_provider:
                model_name = f"{custom_llm_provider}/{model_name}"