diff --git a/docs/my-website/docs/proxy/team_budgets.md b/docs/my-website/docs/proxy/team_budgets.md
index f6b7c22824..9ab0c07866 100644
--- a/docs/my-website/docs/proxy/team_budgets.md
+++ b/docs/my-website/docs/proxy/team_budgets.md
@@ -154,7 +154,9 @@ litellm_remaining_team_budget_metric{team_alias="QA Prod Bot",team_id="de35b29e-
 
 ### Dynamic TPM Allocation 
 
-Prevent teams from gobbling too much quota. 
+Prevent projects from gobbling too much quota. 
+
+Dynamically allocate TPM quota to api keys, based on active keys in that minute.
 
 1. Setup config.yaml 
 
@@ -190,6 +192,12 @@ litellm --config /path/to/config.yaml
 - Mock response returns 30 total tokens / request
 - Each team will only be able to make 1 request per minute
 """
+"""
+- Run 2 concurrent teams calling same model
+- model has 60 TPM
+- Mock response returns 30 total tokens / request
+- Each team will only be able to make 1 request per minute
+"""
 import requests
 from openai import OpenAI, RateLimitError
 
@@ -204,7 +212,6 @@ def create_key(api_key: str, base_url: str):
 
     _response = response.json()
 
-    print(f"_response: {_response}")
     return _response["key"]
 
 key_1 = create_key(api_key="sk-1234", base_url="http://0.0.0.0:4000")
@@ -217,19 +224,19 @@ response = openai_client_1.chat.completions.with_raw_response.create(
     model="my-fake-model", messages=[{"role": "user", "content": "Hello world!"}],
 )
 
-print("Headers for call - {}".format(response.headers))
+print("Headers for call 1 - {}".format(response.headers))
 _response = response.parse()
 print("Total tokens for call - {}".format(_response.usage.total_tokens))
 
 
 # call proxy with key 2 -  works 
-openai_client_2 = OpenAI(api_key=key_1, base_url="http://0.0.0.0:4000")
+openai_client_2 = OpenAI(api_key=key_2, base_url="http://0.0.0.0:4000")
 
 response = openai_client_2.chat.completions.with_raw_response.create(
     model="my-fake-model", messages=[{"role": "user", "content": "Hello world!"}],
 )
 
-print("Headers for call - {}".format(response.headers))
+print("Headers for call 2 - {}".format(response.headers))
 _response = response.parse()
 print("Total tokens for call - {}".format(_response.usage.total_tokens))
 # call proxy with key 2 -  fails
@@ -239,6 +246,10 @@ try:
 except RateLimitError as e: 
     print("This was rate limited b/c - {}".format(str(e)))
 
+```
 
+**Expected Response**
 
+```
+This was rate limited b/c - Error code: 429 - {'error': {'message': {'error': 'Key=<hashed_token> over available TPM=0. Model TPM=0, Active keys=2'}, 'type': 'None', 'param': 'None', 'code': 429}}
 ```
\ No newline at end of file
diff --git a/litellm/proxy/hooks/dynamic_rate_limiter.py b/litellm/proxy/hooks/dynamic_rate_limiter.py
index 8b132ff9ee..95f0ccc13e 100644
--- a/litellm/proxy/hooks/dynamic_rate_limiter.py
+++ b/litellm/proxy/hooks/dynamic_rate_limiter.py
@@ -151,8 +151,8 @@ class _PROXY_DynamicRateLimitHandler(CustomLogger):
                 raise HTTPException(
                     status_code=429,
                     detail={
-                        "error": "Team={} over available TPM={}. Model TPM={}, Active teams={}".format(
-                            user_api_key_dict.team_id,
+                        "error": "Key={} over available TPM={}. Model TPM={}, Active keys={}".format(
+                            user_api_key_dict.api_key,
                             available_tpm,
                             model_tpm,
                             active_projects,
@@ -164,7 +164,7 @@ class _PROXY_DynamicRateLimitHandler(CustomLogger):
                 asyncio.create_task(
                     self.internal_usage_cache.async_set_cache_sadd(  # this is a set
                         model=data["model"],  # type: ignore
-                        value=[user_api_key_dict.team_id or "default_team"],
+                        value=[user_api_key_dict.token or "default_key"],
                     )
                 )
         return None