diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index d30270c5c8..2dc846df92 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -4201,6 +4201,15 @@
         "litellm_provider": "ollama",
         "mode": "completion"
     },
+    "ollama/llama2:7b": {
+        "max_tokens": 4096, 
+        "max_input_tokens": 4096, 
+        "max_output_tokens": 4096, 
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "completion"
+    },
     "ollama/llama2:13b": {
         "max_tokens": 4096, 
         "max_input_tokens": 4096, 
@@ -4237,6 +4246,15 @@
         "litellm_provider": "ollama",
         "mode": "chat"
     },
+    "ollama/llama3:8b": {
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 8192,
+        "input_cost_per_token": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "ollama",
+        "mode": "chat"
+    },
     "ollama/llama3:70b": {
         "max_tokens": 8192,
         "max_input_tokens": 8192,
diff --git a/litellm/tests/test_parallel_request_limiter.py b/litellm/tests/test_parallel_request_limiter.py
index e6ffa272f7..6b272fa8cf 100644
--- a/litellm/tests/test_parallel_request_limiter.py
+++ b/litellm/tests/test_parallel_request_limiter.py
@@ -948,8 +948,10 @@ async def test_bad_router_tpm_limit_per_model():
         api_key=_api_key,
         max_parallel_requests=10,
         tpm_limit=10,
-        tpm_limit_per_model={model: 5},
-        rpm_limit_per_model={model: 5},
+        metadata={
+            "model_rpm_limit": {model: 5},
+            "model_tpm_limit": {model: 5},
+        },
     )
     local_cache = DualCache()
     pl = ProxyLogging(user_api_key_cache=local_cache)
@@ -1026,7 +1028,9 @@ async def test_pre_call_hook_rpm_limits_per_model():
         max_parallel_requests=100,
         tpm_limit=900000,
         rpm_limit=100000,
-        rpm_limit_per_model={"azure-model": 1},
+        metadata={
+            "model_rpm_limit": {"azure-model": 1},
+        },
     )
     local_cache = DualCache()
     pl = ProxyLogging(user_api_key_cache=local_cache)
@@ -1096,8 +1100,10 @@ async def test_pre_call_hook_tpm_limits_per_model():
         max_parallel_requests=100,
         tpm_limit=900000,
         rpm_limit=100000,
-        rpm_limit_per_model={"azure-model": 100},
-        tpm_limit_per_model={"azure-model": 10},
+        metadata={
+            "model_tpm_limit": {"azure-model": 1},
+            "model_rpm_limit": {"azure-model": 100},
+        },
     )
     local_cache = DualCache()
     pl = ProxyLogging(user_api_key_cache=local_cache)