diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 6804d677e..2831f1a5c 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -2454,6 +2454,17 @@
         "mode": "chat",
         "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models"
     },
+    "vertex_ai/meta/llama-3.2-90b-vision-instruct-maas": {
+        "max_tokens": 8192,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "vertex_ai-llama_models",
+        "mode": "chat",
+        "supports_system_messages": true,
+        "source": "https://console.cloud.google.com/vertex-ai/publishers/meta/model-garden/llama-3.2-90b-vision-instruct-maas"
+    },
     "vertex_ai/mistral-large@latest": {
         "max_tokens": 8191,
         "max_input_tokens": 128000,
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 80eea79fb..61448f4bb 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -70,6 +70,7 @@ from litellm.proxy.hooks.max_budget_limiter import _PROXY_MaxBudgetLimiter
 from litellm.proxy.hooks.parallel_request_limiter import (
     _PROXY_MaxParallelRequestsHandler,
 )
+from litellm.proxy.proxy_server import UserAPIKeyCacheTTLEnum
 from litellm.types.utils import CallTypes, LoggedLiteLLMParams
 
 if TYPE_CHECKING:
@@ -301,7 +302,9 @@ class ProxyLogging:
         self.call_details: dict = {}
         self.call_details["user_api_key_cache"] = user_api_key_cache
         self.internal_usage_cache: InternalUsageCache = InternalUsageCache(
-            dual_cache=DualCache(default_in_memory_ttl=1)  # ping redis cache every 1s
+            dual_cache=DualCache(
+                default_in_memory_ttl=UserAPIKeyCacheTTLEnum.in_memory_cache_ttl.value
+            )  # ping redis cache every 1s
         )
         self.max_parallel_request_limiter = _PROXY_MaxParallelRequestsHandler(
             self.internal_usage_cache