diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 7d30c959f1..8de8cd9ad1 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -376,6 +376,11 @@ async def user_api_key_auth(
             # 3. If 'user' passed to /chat/completions, /embeddings endpoint is in budget
             # 4. If token is expired
             # 5. If token spend is under Budget for the token
+            # 6. If token spend per model is under budget per model
+
+            request_data = await _read_request_body(
+                request=request
+            )  # request data, used across all checks. Making this easily available
 
             # Check 1. If token can call model
             litellm.model_alias_map = valid_token.aliases
@@ -450,7 +455,6 @@ async def user_api_key_auth(
                 if (
                     litellm.max_user_budget is not None
                 ):  # Check if 'user' passed in /chat/completions is in budget, only checked if litellm.max_user_budget is set
-                    request_data = await _read_request_body(request=request)
                     user_passed_to_chat_completions = request_data.get("user", None)
                     if user_passed_to_chat_completions is not None:
                         user_id_list.append(user_passed_to_chat_completions)
@@ -587,6 +591,25 @@ async def user_api_key_auth(
                         f"ExceededTokenBudget: Current spend for token: {valid_token.spend}; Max Budget for Token: {valid_token.max_budget}"
                     )
 
+            # Check 5. Token Model Spend is under Model budget
+            max_budget_per_model = valid_token.model_max_budget
+            spend_per_model = valid_token.model_spend
+
+            if max_budget_per_model is not None and spend_per_model is not None:
+                current_model = request_data.get("model")
+                if current_model is not None:
+                    current_model_spend = spend_per_model.get(current_model, None)
+                    current_model_budget = max_budget_per_model.get(current_model, None)
+
+                    if (
+                        current_model_spend is not None
+                        and current_model_budget is not None
+                    ):
+                        if current_model_spend > current_model_budget:
+                            raise Exception(
+                                f"ExceededModelBudget: Current spend for model: {current_model_spend}; Max Budget for Model: {current_model_budget}"
+                            )
+
             # Token passed all checks
             api_key = valid_token.token