diff --git a/.circleci/config.yml b/.circleci/config.yml
index a23192b49..edbe59113 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -3,6 +3,9 @@ jobs:
   local_testing:
     docker:
       - image: cimg/python:3.11
+        auth:
+          username: ${DOCKERHUB_USERNAME}
+          password: ${DOCKERHUB_PASSWORD}
     working_directory: ~/project
 
     steps:
@@ -114,6 +117,9 @@ jobs:
   ui_endpoint_testing:
     docker:
       - image: cimg/python:3.11
+        auth:
+          username: ${DOCKERHUB_USERNAME}
+          password: ${DOCKERHUB_PASSWORD}
     working_directory: ~/project
 
     steps:
@@ -152,6 +158,9 @@ jobs:
   litellm_router_testing: # Runs all tests with the "router" keyword
     docker:
         - image: cimg/python:3.11
+          auth:
+            username: ${DOCKERHUB_USERNAME}
+            password: ${DOCKERHUB_PASSWORD}
     working_directory: ~/project
 
     steps:
@@ -179,6 +188,9 @@ jobs:
   litellm_assistants_api_testing: # Runs all tests with the "assistants" keyword
     docker:
         - image: cimg/python:3.11
+          auth:
+            username: ${DOCKERHUB_USERNAME}
+            password: ${DOCKERHUB_PASSWORD}
     working_directory: ~/project
 
     steps:
@@ -206,6 +218,9 @@ jobs:
   load_testing:
     docker:
       - image: cimg/python:3.11
+        auth:
+          username: ${DOCKERHUB_USERNAME}
+          password: ${DOCKERHUB_PASSWORD}
     working_directory: ~/project
 
     steps:
@@ -233,6 +248,9 @@ jobs:
   llm_translation_testing:
     docker:
       - image: cimg/python:3.11
+        auth:
+          username: ${DOCKERHUB_USERNAME}
+          password: ${DOCKERHUB_PASSWORD}
     working_directory: ~/project
 
     steps:
diff --git a/litellm/caching.py b/litellm/caching.py
index 9bb03b99a..b2632be67 100644
--- a/litellm/caching.py
+++ b/litellm/caching.py
@@ -123,7 +123,7 @@ class InMemoryCache(BaseCache):
     async def async_set_cache(self, key, value, **kwargs):
         self.set_cache(key=key, value=value, **kwargs)
 
-    async def async_set_cache_pipeline(self, cache_list, ttl=None):
+    async def async_set_cache_pipeline(self, cache_list, ttl=None, **kwargs):
         for cache_key, cache_value in cache_list:
             if ttl is not None:
                 self.set_cache(key=cache_key, value=cache_value, ttl=ttl)
@@ -2038,7 +2038,7 @@ class DualCache(BaseCache):
 
             if self.redis_cache is not None and local_only == False:
                 await self.redis_cache.async_set_cache_pipeline(
-                    cache_list=cache_list, ttl=kwargs.get("ttl", None), **kwargs
+                    cache_list=cache_list, ttl=kwargs.pop("ttl", None), **kwargs
                 )
         except Exception as e:
             verbose_logger.exception(
diff --git a/litellm/proxy/hooks/parallel_request_limiter.py b/litellm/proxy/hooks/parallel_request_limiter.py
index 7eaf515f2..d75440337 100644
--- a/litellm/proxy/hooks/parallel_request_limiter.py
+++ b/litellm/proxy/hooks/parallel_request_limiter.py
@@ -327,8 +327,13 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                 user_api_key_dict=user_api_key_dict,
             )
             # get user tpm/rpm limits
-            if _user_id_rate_limits is not None and isinstance(
-                _user_id_rate_limits, dict
+            if (
+                _user_id_rate_limits is not None
+                and isinstance(_user_id_rate_limits, dict)
+                and (
+                    _user_id_rate_limits.get("tpm_limit", None) is not None
+                    or _user_id_rate_limits.get("rpm_limit", None) is not None
+                )
             ):
                 user_tpm_limit = _user_id_rate_limits.get("tpm_limit", None)
                 user_rpm_limit = _user_id_rate_limits.get("rpm_limit", None)
@@ -472,6 +477,8 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
             # Update usage - API Key
             # ------------
 
+            values_to_update_in_cache = []
+
             if user_api_key is not None:
                 request_count_api_key = (
                     f"{user_api_key}::{precise_minute}::request_count"
@@ -495,12 +502,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                 self.print_verbose(
                     f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
                 )
-                await self.internal_usage_cache.async_set_cache(
-                    request_count_api_key,
-                    new_val,
-                    ttl=60,
-                    litellm_parent_otel_span=litellm_parent_otel_span,
-                )  # store in cache for 1 min.
+                values_to_update_in_cache.append((request_count_api_key, new_val))
 
             # ------------
             # Update usage - model group + API Key
@@ -536,12 +538,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                 self.print_verbose(
                     f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
                 )
-                await self.internal_usage_cache.async_set_cache(
-                    request_count_api_key,
-                    new_val,
-                    ttl=60,
-                    litellm_parent_otel_span=litellm_parent_otel_span,
-                )
+                values_to_update_in_cache.append((request_count_api_key, new_val))
 
             # ------------
             # Update usage - User
@@ -574,12 +571,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                 self.print_verbose(
                     f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
                 )
-                await self.internal_usage_cache.async_set_cache(
-                    request_count_api_key,
-                    new_val,
-                    ttl=60,
-                    litellm_parent_otel_span=litellm_parent_otel_span,
-                )  # store in cache for 1 min.
+                values_to_update_in_cache.append((request_count_api_key, new_val))
 
             # ------------
             # Update usage - Team
@@ -612,12 +604,7 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                 self.print_verbose(
                     f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
                 )
-                await self.internal_usage_cache.async_set_cache(
-                    request_count_api_key,
-                    new_val,
-                    ttl=60,
-                    litellm_parent_otel_span=litellm_parent_otel_span,
-                )  # store in cache for 1 min.
+                values_to_update_in_cache.append((request_count_api_key, new_val))
 
             # ------------
             # Update usage - End User
@@ -650,13 +637,13 @@ class _PROXY_MaxParallelRequestsHandler(CustomLogger):
                 self.print_verbose(
                     f"updated_value in success call: {new_val}, precise_minute: {precise_minute}"
                 )
-                await self.internal_usage_cache.async_set_cache(
-                    request_count_api_key,
-                    new_val,
-                    ttl=60,
-                    litellm_parent_otel_span=litellm_parent_otel_span,
-                )  # store in cache for 1 min.
+                values_to_update_in_cache.append((request_count_api_key, new_val))
 
+            await self.internal_usage_cache.async_batch_set_cache(
+                cache_list=values_to_update_in_cache,
+                ttl=60,
+                litellm_parent_otel_span=litellm_parent_otel_span,
+            )
         except Exception as e:
             self.print_verbose(e)  # noqa
 
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index dda6f8274..c8ca606a9 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -1,5 +1,5 @@
 model_list:
- - model_name: gpt-3.5-turbo
+ - model_name: db-openai-endpoint
    litellm_params:
      model: openai/gpt-3.5-turbo
      api_key: fake-key
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 18361bca1..8c61783a2 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -242,6 +242,20 @@ class InternalUsageCache:
             **kwargs,
         )
 
+    async def async_batch_set_cache(
+        self,
+        cache_list: List,
+        litellm_parent_otel_span: Union[Span, None],
+        local_only: bool = False,
+        **kwargs,
+    ) -> None:
+        return await self.dual_cache.async_batch_set_cache(
+            cache_list=cache_list,
+            local_only=local_only,
+            litellm_parent_otel_span=litellm_parent_otel_span,
+            **kwargs,
+        )
+
     async def async_increment_cache(
         self,
         key,
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index f49fb6254..a51dcc693 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -24,7 +24,7 @@ from litellm import RateLimitError, Timeout, completion, completion_cost, embedd
 from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
 from litellm.llms.prompt_templates.factory import anthropic_messages_pt
 
-# litellm.num_retries = 3
+# litellm.num_retries=3
 
 litellm.cache = None
 litellm.success_callback = []
diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py
index 732772e76..143784e88 100644
--- a/litellm/tests/test_embedding.py
+++ b/litellm/tests/test_embedding.py
@@ -316,6 +316,7 @@ def test_openai_azure_embedding():
     os.environ.get("CIRCLE_OIDC_TOKEN") is None,
     reason="Cannot run without being in CircleCI Runner",
 )
+@pytest.mark.skip(reason="Azure east us 2 has a temp outage")
 def test_openai_azure_embedding_with_oidc_and_cf():
     # TODO: Switch to our own Azure account, currently using ai.moda's account
     os.environ["AZURE_TENANT_ID"] = "17c0a27a-1246-4aa1-a3b6-d294e80e783c"