diff --git a/litellm/router.py b/litellm/router.py index dba886b856..6e358a6266 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -166,6 +166,20 @@ class RoutingArgs(enum.Enum): ttl = 60 # 1min (RPM/TPM expire key) +class CopyOnAccessList(list): + """A list wrapper that returns deep copies of elements when accessed.""" + + def __getitem__(self, index): + """Return a deep copy when an item is accessed by index.""" + item = super().__getitem__(index) + return copy.deepcopy(item) + + def __iter__(self): + """Return an iterator that yields deep copies of elements.""" + for item in super().__iter__(): + yield copy.deepcopy(item) + + class Router: model_names: List = [] cache_responses: Optional[bool] = False @@ -387,6 +401,7 @@ class Router: self.provider_default_deployment_ids: List[str] = [] self.pattern_router = PatternMatchRouter() + self.model_list: CopyOnAccessList = CopyOnAccessList() if model_list is not None: model_list = copy.deepcopy(model_list) self.set_model_list(model_list) @@ -394,10 +409,6 @@ class Router: for m in model_list: if "model" in m["litellm_params"]: self.deployment_latency_map[m["litellm_params"]["model"]] = 0 - else: - self.model_list: List = ( - [] - ) # initialize an empty list - to allow _add_deployment and delete_deployment to work if allowed_fails is not None: self.allowed_fails = allowed_fails @@ -4417,7 +4428,7 @@ class Router: def set_model_list(self, model_list: list): original_model_list = copy.deepcopy(model_list) - self.model_list = [] + self.model_list = CopyOnAccessList() # we add api_base/api_key each model so load balancing between azure/gpt on api_base1 and api_base2 works for model in original_model_list: