diff --git a/.circleci/config.yml b/.circleci/config.yml
index d95a8c214..0a6327bb3 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -770,6 +770,7 @@ jobs:
       - run: python ./tests/code_coverage_tests/test_router_strategy_async.py
       - run: python ./tests/code_coverage_tests/litellm_logging_code_coverage.py
       - run: python ./tests/documentation_tests/test_env_keys.py
+      - run: python ./tests/documentation_tests/test_api_docs.py
       - run: helm lint ./deploy/charts/litellm-helm
 
   db_migration_disable_update_check:
diff --git a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py
index e8443e6f6..f335f53d9 100644
--- a/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py
+++ b/litellm/llms/vertex_ai_and_google_ai_studio/vertex_ai_partner_models/main.py
@@ -236,4 +236,6 @@ class VertexAIPartnerModels(VertexBase):
             )
 
         except Exception as e:
+            if hasattr(e, "status_code"):
+                raise e
             raise VertexAIError(status_code=500, message=str(e))
diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py
index f5851ded9..8b8dbf2e5 100644
--- a/litellm/proxy/_types.py
+++ b/litellm/proxy/_types.py
@@ -623,6 +623,8 @@ class GenerateRequestBase(LiteLLMBase):
     Overlapping schema between key and user generate/update requests
     """
 
+    key_alias: Optional[str] = None
+    duration: Optional[str] = None
     models: Optional[list] = []
     spend: Optional[float] = 0
     max_budget: Optional[float] = None
@@ -635,13 +637,6 @@ class GenerateRequestBase(LiteLLMBase):
     budget_duration: Optional[str] = None
     allowed_cache_controls: Optional[list] = []
     soft_budget: Optional[float] = None
-
-
-class _GenerateKeyRequest(GenerateRequestBase):
-    key_alias: Optional[str] = None
-    key: Optional[str] = None
-    duration: Optional[str] = None
-    aliases: Optional[dict] = {}
     config: Optional[dict] = {}
     permissions: Optional[dict] = {}
     model_max_budget: Optional[dict] = (
@@ -654,6 +649,11 @@ class _GenerateKeyRequest(GenerateRequestBase):
     model_tpm_limit: Optional[dict] = None
     guardrails: Optional[List[str]] = None
     blocked: Optional[bool] = None
+    aliases: Optional[dict] = {}
+
+
+class _GenerateKeyRequest(GenerateRequestBase):
+    key: Optional[str] = None
 
 
 class GenerateKeyRequest(_GenerateKeyRequest):
@@ -719,7 +719,7 @@ class LiteLLM_ModelTable(LiteLLMBase):
     model_config = ConfigDict(protected_namespaces=())
 
 
-class NewUserRequest(_GenerateKeyRequest):
+class NewUserRequest(GenerateRequestBase):
     max_budget: Optional[float] = None
     user_email: Optional[str] = None
     user_alias: Optional[str] = None
@@ -786,7 +786,51 @@ class DeleteUserRequest(LiteLLMBase):
 AllowedModelRegion = Literal["eu", "us"]
 
 
-class NewCustomerRequest(LiteLLMBase):
+class BudgetNew(LiteLLMBase):
+    budget_id: Optional[str] = Field(default=None, description="The unique budget id.")
+    max_budget: Optional[float] = Field(
+        default=None,
+        description="Requests will fail if this budget (in USD) is exceeded.",
+    )
+    soft_budget: Optional[float] = Field(
+        default=None,
+        description="Requests will NOT fail if this is exceeded. Will fire alerting though.",
+    )
+    max_parallel_requests: Optional[int] = Field(
+        default=None, description="Max concurrent requests allowed for this budget id."
+    )
+    tpm_limit: Optional[int] = Field(
+        default=None, description="Max tokens per minute, allowed for this budget id."
+    )
+    rpm_limit: Optional[int] = Field(
+        default=None, description="Max requests per minute, allowed for this budget id."
+    )
+    budget_duration: Optional[str] = Field(
+        default=None,
+        description="Max duration budget should be set for (e.g. '1hr', '1d', '28d')",
+    )
+
+
+class BudgetRequest(LiteLLMBase):
+    budgets: List[str]
+
+
+class BudgetDeleteRequest(LiteLLMBase):
+    id: str
+
+
+class CustomerBase(LiteLLMBase):
+    user_id: str
+    alias: Optional[str] = None
+    spend: float = 0.0
+    allowed_model_region: Optional[AllowedModelRegion] = None
+    default_model: Optional[str] = None
+    budget_id: Optional[str] = None
+    litellm_budget_table: Optional[BudgetNew] = None
+    blocked: bool = False
+
+
+class NewCustomerRequest(BudgetNew):
     """
     Create a new customer, allocate a budget to them
     """
@@ -794,7 +838,6 @@ class NewCustomerRequest(LiteLLMBase):
     user_id: str
     alias: Optional[str] = None  # human-friendly alias
     blocked: bool = False  # allow/disallow requests for this end-user
-    max_budget: Optional[float] = None
     budget_id: Optional[str] = None  # give either a budget_id or max_budget
     allowed_model_region: Optional[AllowedModelRegion] = (
         None  # require all user requests to use models in this specific region
@@ -1083,39 +1126,6 @@ class OrganizationRequest(LiteLLMBase):
     organizations: List[str]
 
 
-class BudgetNew(LiteLLMBase):
-    budget_id: str = Field(default=None, description="The unique budget id.")
-    max_budget: Optional[float] = Field(
-        default=None,
-        description="Requests will fail if this budget (in USD) is exceeded.",
-    )
-    soft_budget: Optional[float] = Field(
-        default=None,
-        description="Requests will NOT fail if this is exceeded. Will fire alerting though.",
-    )
-    max_parallel_requests: Optional[int] = Field(
-        default=None, description="Max concurrent requests allowed for this budget id."
-    )
-    tpm_limit: Optional[int] = Field(
-        default=None, description="Max tokens per minute, allowed for this budget id."
-    )
-    rpm_limit: Optional[int] = Field(
-        default=None, description="Max requests per minute, allowed for this budget id."
-    )
-    budget_duration: Optional[str] = Field(
-        default=None,
-        description="Max duration budget should be set for (e.g. '1hr', '1d', '28d')",
-    )
-
-
-class BudgetRequest(LiteLLMBase):
-    budgets: List[str]
-
-
-class BudgetDeleteRequest(LiteLLMBase):
-    id: str
-
-
 class KeyManagementSystem(enum.Enum):
     GOOGLE_KMS = "google_kms"
     AZURE_KEY_VAULT = "azure_key_vault"
@@ -2081,3 +2091,45 @@ JWKKeyValue = Union[List[JWTKeyItem], JWTKeyItem]
 
 class JWKUrlResponse(TypedDict, total=False):
     keys: JWKKeyValue
+
+
+class UserManagementEndpointParamDocStringEnums(str, enum.Enum):
+    user_id_doc_str = (
+        "Optional[str] - Specify a user id. If not set, a unique id will be generated."
+    )
+    user_alias_doc_str = (
+        "Optional[str] - A descriptive name for you to know who this user id refers to."
+    )
+    teams_doc_str = "Optional[list] - specify a list of team id's a user belongs to."
+    user_email_doc_str = "Optional[str] - Specify a user email."
+    send_invite_email_doc_str = (
+        "Optional[bool] - Specify if an invite email should be sent."
+    )
+    user_role_doc_str = """Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`"""
+    max_budget_doc_str = """Optional[float] - Specify max budget for a given user."""
+    budget_duration_doc_str = """Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"), months ("1mo")."""
+    models_doc_str = """Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)"""
+    tpm_limit_doc_str = (
+        """Optional[int] - Specify tpm limit for a given user (Tokens per minute)"""
+    )
+    rpm_limit_doc_str = (
+        """Optional[int] - Specify rpm limit for a given user (Requests per minute)"""
+    )
+    auto_create_key_doc_str = """bool - Default=True. Flag used for returning a key as part of the /user/new response"""
+    aliases_doc_str = """Optional[dict] - Model aliases for the user - [Docs](https://litellm.vercel.app/docs/proxy/virtual_keys#model-aliases)"""
+    config_doc_str = """Optional[dict] - [DEPRECATED PARAM] User-specific config."""
+    allowed_cache_controls_doc_str = """Optional[list] - List of allowed cache control values. Example - ["no-cache", "no-store"]. See all values - https://docs.litellm.ai/docs/proxy/caching#turn-on--off-caching-per-request-"""
+    blocked_doc_str = (
+        """Optional[bool] - [Not Implemented Yet] Whether the user is blocked."""
+    )
+    guardrails_doc_str = """Optional[List[str]] - [Not Implemented Yet] List of active guardrails for the user"""
+    permissions_doc_str = """Optional[dict] - [Not Implemented Yet] User-specific permissions, eg. turning off pii masking."""
+    metadata_doc_str = """Optional[dict] - Metadata for user, store information for user. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }"""
+    max_parallel_requests_doc_str = """Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x."""
+    soft_budget_doc_str = """Optional[float] - Get alerts when user crosses given budget, doesn't block requests."""
+    model_max_budget_doc_str = """Optional[dict] - Model-specific max budget for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-budgets-to-keys)"""
+    model_rpm_limit_doc_str = """Optional[float] - Model-specific rpm limit for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-limits-to-keys)"""
+    model_tpm_limit_doc_str = """Optional[float] - Model-specific tpm limit for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-limits-to-keys)"""
+    spend_doc_str = """Optional[float] - Amount spent by user. Default is 0. Will be updated by proxy whenever user is used."""
+    team_id_doc_str = """Optional[str] - [DEPRECATED PARAM] The team id of the user. Default is None."""
+    duration_doc_str = """Optional[str] - Duration for the key auto-created on `/user/new`. Default is None."""
diff --git a/litellm/proxy/management_endpoints/customer_endpoints.py b/litellm/proxy/management_endpoints/customer_endpoints.py
index cb57619b9..48b01b0cb 100644
--- a/litellm/proxy/management_endpoints/customer_endpoints.py
+++ b/litellm/proxy/management_endpoints/customer_endpoints.py
@@ -1,3 +1,14 @@
+"""
+CUSTOMER MANAGEMENT
+
+All /customer management endpoints 
+
+/customer/new   
+/customer/info
+/customer/update
+/customer/delete
+"""
+
 #### END-USER/CUSTOMER MANAGEMENT ####
 import asyncio
 import copy
@@ -129,6 +140,26 @@ async def unblock_user(data: BlockUsers):
     return {"blocked_users": litellm.blocked_user_list}
 
 
+def new_budget_request(data: NewCustomerRequest) -> Optional[BudgetNew]:
+    """
+    Return a new budget object if new budget params are passed.
+    """
+    budget_params = BudgetNew.model_fields.keys()
+    budget_kv_pairs = {}
+
+    # Get the actual values from the data object using getattr
+    for field_name in budget_params:
+        if field_name == "budget_id":
+            continue
+        value = getattr(data, field_name, None)
+        if value is not None:
+            budget_kv_pairs[field_name] = value
+
+    if budget_kv_pairs:
+        return BudgetNew(**budget_kv_pairs)
+    return None
+
+
 @router.post(
     "/end_user/new",
     tags=["Customer Management"],
@@ -157,6 +188,11 @@ async def new_end_user(
     - allowed_model_region: Optional[Union[Literal["eu"], Literal["us"]]] - Require all user requests to use models in this specific region.
     - default_model: Optional[str] - If no equivalent model in the allowed region, default all requests to this model.
     - metadata: Optional[dict] = Metadata for customer, store information for customer. Example metadata = {"data_training_opt_out": True}
+    - budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
+    - tpm_limit: Optional[int] - [Not Implemented Yet] Specify tpm limit for a given customer (Tokens per minute)
+    - rpm_limit: Optional[int] - [Not Implemented Yet] Specify rpm limit for a given customer (Requests per minute)
+    - max_parallel_requests: Optional[int] - [Not Implemented Yet] Specify max parallel requests for a given customer.
+    - soft_budget: Optional[float] - [Not Implemented Yet] Get alerts when customer crosses given budget, doesn't block requests.
     
     
     - Allow specifying allowed regions 
@@ -223,14 +259,19 @@ async def new_end_user(
         new_end_user_obj: Dict = {}
 
         ## CREATE BUDGET ## if set
-        if data.max_budget is not None:
-            budget_record = await prisma_client.db.litellm_budgettable.create(
-                data={
-                    "max_budget": data.max_budget,
-                    "created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,  # type: ignore
-                    "updated_by": user_api_key_dict.user_id or litellm_proxy_admin_name,
-                }
-            )
+        _new_budget = new_budget_request(data)
+        if _new_budget is not None:
+            try:
+                budget_record = await prisma_client.db.litellm_budgettable.create(
+                    data={
+                        **_new_budget.model_dump(exclude_unset=True),
+                        "created_by": user_api_key_dict.user_id or litellm_proxy_admin_name,  # type: ignore
+                        "updated_by": user_api_key_dict.user_id
+                        or litellm_proxy_admin_name,
+                    }
+                )
+            except Exception as e:
+                raise HTTPException(status_code=422, detail={"error": str(e)})
 
             new_end_user_obj["budget_id"] = budget_record.budget_id
         elif data.budget_id is not None:
@@ -239,16 +280,22 @@ async def new_end_user(
         _user_data = data.dict(exclude_none=True)
 
         for k, v in _user_data.items():
-            if k != "max_budget" and k != "budget_id":
+            if k not in BudgetNew.model_fields.keys():
                 new_end_user_obj[k] = v
 
         ## WRITE TO DB ##
         end_user_record = await prisma_client.db.litellm_endusertable.create(
-            data=new_end_user_obj  # type: ignore
+            data=new_end_user_obj,  # type: ignore
+            include={"litellm_budget_table": True},
         )
 
         return end_user_record
     except Exception as e:
+        verbose_proxy_logger.exception(
+            "litellm.proxy.management_endpoints.customer_endpoints.new_end_user(): Exception occured - {}".format(
+                str(e)
+            )
+        )
         if "Unique constraint failed on the fields: (`user_id`)" in str(e):
             raise ProxyException(
                 message=f"Customer already exists, passed user_id={data.user_id}. Please pass a new user_id.",
diff --git a/litellm/proxy/management_endpoints/internal_user_endpoints.py b/litellm/proxy/management_endpoints/internal_user_endpoints.py
index 49ef25149..c69e255f2 100644
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@@ -102,11 +102,27 @@ async def new_user(
     - send_invite_email: Optional[bool] - Specify if an invite email should be sent.
     - user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
     - max_budget: Optional[float] - Specify max budget for a given user.
-    - budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d").
+    - budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"), months ("1mo").
     - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
     - tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
     - rpm_limit: Optional[int] - Specify rpm limit for a given user (Requests per minute)
     - auto_create_key: bool - Default=True. Flag used for returning a key as part of the /user/new response
+    - aliases: Optional[dict] - Model aliases for the user - [Docs](https://litellm.vercel.app/docs/proxy/virtual_keys#model-aliases)
+    - config: Optional[dict] - [DEPRECATED PARAM] User-specific config.
+    - allowed_cache_controls: Optional[list] - List of allowed cache control values. Example - ["no-cache", "no-store"]. See all values - https://docs.litellm.ai/docs/proxy/caching#turn-on--off-caching-per-request-
+    - blocked: Optional[bool] - [Not Implemented Yet] Whether the user is blocked.
+    - guardrails: Optional[List[str]] - [Not Implemented Yet] List of active guardrails for the user
+    - permissions: Optional[dict] - [Not Implemented Yet] User-specific permissions, eg. turning off pii masking.
+    - metadata: Optional[dict] - Metadata for user, store information for user. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
+    - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
+    - soft_budget: Optional[float] - Get alerts when user crosses given budget, doesn't block requests.
+    - model_max_budget: Optional[dict] - Model-specific max budget for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-budgets-to-keys)
+    - model_rpm_limit: Optional[float] - Model-specific rpm limit for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-limits-to-keys)
+    - model_tpm_limit: Optional[float] - Model-specific tpm limit for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-limits-to-keys)
+    - spend: Optional[float] - Amount spent by user. Default is 0. Will be updated by proxy whenever user is used. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"), months ("1mo").
+    - team_id: Optional[str] - [DEPRECATED PARAM] The team id of the user. Default is None. 
+    - duration: Optional[str] - Duration for the key auto-created on `/user/new`. Default is None.
+    - key_alias: Optional[str] - Alias for the key auto-created on `/user/new`. Default is None.
 
     Returns:
     - key: (str) The generated api key for the user
@@ -445,54 +461,36 @@ async def user_update(
     }'
 
     Parameters:
-        user_id: Optional[str]
-            Unique identifier for the user to update
-        
-        user_email: Optional[str]
-            Email address for the user
-        
-        password: Optional[str]
-            Password for the user
-        
-        user_role: Optional[Literal["proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer"]]
-            Role assigned to the user. Can be one of:
-            - proxy_admin: Full admin access
-            - proxy_admin_viewer: Read-only admin access
-            - internal_user: Standard internal user
-            - internal_user_viewer: Read-only internal user
-        
-        models: Optional[list]
-            List of model names the user is allowed to access
-        
-        spend: Optional[float]
-            Current spend amount for the user
-        
-        max_budget: Optional[float]
-            Maximum budget allowed for the user
-        
-        team_id: Optional[str]
-            ID of the team the user belongs to
-        
-        max_parallel_requests: Optional[int]
-            Maximum number of concurrent requests allowed
-        
-        metadata: Optional[dict]
-            Additional metadata associated with the user
-        
-        tpm_limit: Optional[int]
-            Maximum tokens per minute allowed
-        
-        rpm_limit: Optional[int]
-            Maximum requests per minute allowed
-        
-        budget_duration: Optional[str]
-            Duration for budget renewal (e.g., "30d" for 30 days)
-        
-        allowed_cache_controls: Optional[list]
-            List of allowed cache control options
-        
-        soft_budget: Optional[float]
-            Soft budget limit for alerting purposes
+        - user_id: Optional[str] - Specify a user id. If not set, a unique id will be generated.
+        - user_email: Optional[str] - Specify a user email.
+        - password: Optional[str] - Specify a user password.
+        - user_alias: Optional[str] - A descriptive name for you to know who this user id refers to.
+        - teams: Optional[list] - specify a list of team id's a user belongs to.
+        - send_invite_email: Optional[bool] - Specify if an invite email should be sent.
+        - user_role: Optional[str] - Specify a user role - "proxy_admin", "proxy_admin_viewer", "internal_user", "internal_user_viewer", "team", "customer". Info about each role here: `https://github.com/BerriAI/litellm/litellm/proxy/_types.py#L20`
+        - max_budget: Optional[float] - Specify max budget for a given user.
+        - budget_duration: Optional[str] - Budget is reset at the end of specified duration. If not set, budget is never reset. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"), months ("1mo").
+        - models: Optional[list] - Model_name's a user is allowed to call. (if empty, key is allowed to call all models)
+        - tpm_limit: Optional[int] - Specify tpm limit for a given user (Tokens per minute)
+        - rpm_limit: Optional[int] - Specify rpm limit for a given user (Requests per minute)
+        - auto_create_key: bool - Default=True. Flag used for returning a key as part of the /user/new response
+        - aliases: Optional[dict] - Model aliases for the user - [Docs](https://litellm.vercel.app/docs/proxy/virtual_keys#model-aliases)
+        - config: Optional[dict] - [DEPRECATED PARAM] User-specific config.
+        - allowed_cache_controls: Optional[list] - List of allowed cache control values. Example - ["no-cache", "no-store"]. See all values - https://docs.litellm.ai/docs/proxy/caching#turn-on--off-caching-per-request-
+        - blocked: Optional[bool] - [Not Implemented Yet] Whether the user is blocked.
+        - guardrails: Optional[List[str]] - [Not Implemented Yet] List of active guardrails for the user
+        - permissions: Optional[dict] - [Not Implemented Yet] User-specific permissions, eg. turning off pii masking.
+        - metadata: Optional[dict] - Metadata for user, store information for user. Example metadata = {"team": "core-infra", "app": "app2", "email": "ishaan@berri.ai" }
+        - max_parallel_requests: Optional[int] - Rate limit a user based on the number of parallel requests. Raises 429 error, if user's parallel requests > x.
+        - soft_budget: Optional[float] - Get alerts when user crosses given budget, doesn't block requests.
+        - model_max_budget: Optional[dict] - Model-specific max budget for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-budgets-to-keys)
+        - model_rpm_limit: Optional[float] - Model-specific rpm limit for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-limits-to-keys)
+        - model_tpm_limit: Optional[float] - Model-specific tpm limit for user. [Docs](https://docs.litellm.ai/docs/proxy/users#add-model-specific-limits-to-keys)
+        - spend: Optional[float] - Amount spent by user. Default is 0. Will be updated by proxy whenever user is used. You can set duration as seconds ("30s"), minutes ("30m"), hours ("30h"), days ("30d"), months ("1mo").
+        - team_id: Optional[str] - [DEPRECATED PARAM] The team id of the user. Default is None. 
+        - duration: Optional[str] - [NOT IMPLEMENTED].
+        - key_alias: Optional[str] - [NOT IMPLEMENTED].
+            
     ```
     """
     from litellm.proxy.proxy_server import prisma_client
diff --git a/litellm/proxy/management_endpoints/key_management_endpoints.py b/litellm/proxy/management_endpoints/key_management_endpoints.py
index c2de82ce7..e4493a28c 100644
--- a/litellm/proxy/management_endpoints/key_management_endpoints.py
+++ b/litellm/proxy/management_endpoints/key_management_endpoints.py
@@ -83,6 +83,13 @@ async def generate_key_fn(  # noqa: PLR0915
     - model_max_budget: Optional[dict] - key-specific model budget in USD. Example - {"text-davinci-002": 0.5, "gpt-3.5-turbo": 0.5}. IF null or {} then no model specific budget.
     - model_rpm_limit: Optional[dict] - key-specific model rpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific rpm limit.
     - model_tpm_limit: Optional[dict] - key-specific model tpm limit. Example - {"text-davinci-002": 1000, "gpt-3.5-turbo": 1000}. IF null or {} then no model specific tpm limit.
+    - allowed_cache_controls: Optional[list] - List of allowed cache control values. Example - ["no-cache", "no-store"]. See all values - https://docs.litellm.ai/docs/proxy/caching#turn-on--off-caching-per-request
+    - blocked: Optional[bool] - Whether the key is blocked.
+    - rpm_limit: Optional[int] - Specify rpm limit for a given key (Requests per minute)
+    - tpm_limit: Optional[int] - Specify tpm limit for a given key (Tokens per minute)
+    - soft_budget: Optional[float] - Specify soft budget for a given key. Will trigger a slack alert when this soft budget is reached.
+    - tags: Optional[List[str]] - Tags for [tracking spend](https://litellm.vercel.app/docs/proxy/enterprise#tracking-spend-for-custom-tags) and/or doing [tag-based routing](https://litellm.vercel.app/docs/proxy/tag_routing).
+
     Examples:
 
     1. Allow users to turn on/off pii masking
@@ -349,6 +356,8 @@ async def update_key_fn(
     - send_invite_email: Optional[bool] - Send invite email to user_id
     - guardrails: Optional[List[str]] - List of active guardrails for the key
     - blocked: Optional[bool] - Whether the key is blocked
+    - aliases: Optional[dict] - Model aliases for the key - [Docs](https://litellm.vercel.app/docs/proxy/virtual_keys#model-aliases)
+    - config: Optional[dict] - [DEPRECATED PARAM] Key-specific config.
 
     Example:
     ```bash
diff --git a/litellm/proxy/management_endpoints/organization_endpoints.py b/litellm/proxy/management_endpoints/organization_endpoints.py
index 5f58c4231..81d135097 100644
--- a/litellm/proxy/management_endpoints/organization_endpoints.py
+++ b/litellm/proxy/management_endpoints/organization_endpoints.py
@@ -5,6 +5,7 @@ Endpoints for /organization operations
 /organization/update
 /organization/delete
 /organization/info
+/organization/list
 """
 
 #### ORGANIZATION MANAGEMENT ####
@@ -55,15 +56,23 @@ async def new_organization(
 
     # Parameters
 
-    - `organization_alias`: *str* = The name of the organization.
-    - `models`: *List* = The models the organization has access to.
-    - `budget_id`: *Optional[str]* = The id for a budget (tpm/rpm/max budget) for the organization.
+    - organization_alias: *str* - The name of the organization.
+    - models: *List* - The models the organization has access to.
+    - budget_id: *Optional[str]* - The id for a budget (tpm/rpm/max budget) for the organization.
     ### IF NO BUDGET ID - CREATE ONE WITH THESE PARAMS ###
-    - `max_budget`: *Optional[float]* = Max budget for org
-    - `tpm_limit`: *Optional[int]* = Max tpm limit for org
-    - `rpm_limit`: *Optional[int]* = Max rpm limit for org
-    - `model_max_budget`: *Optional[dict]* = Max budget for a specific model
-    - `budget_duration`: *Optional[str]* = Frequency of reseting org budget
+    - max_budget: *Optional[float]* - Max budget for org
+    - tpm_limit: *Optional[int]* - Max tpm limit for org
+    - rpm_limit: *Optional[int]* - Max rpm limit for org
+    - max_parallel_requests: *Optional[int]* - [Not Implemented Yet] Max parallel requests for org
+    - soft_budget: *Optional[float]* - [Not Implemented Yet] Get a slack alert when this soft budget is reached. Don't block requests.
+    - model_max_budget: *Optional[dict]* - Max budget for a specific model
+    - budget_duration: *Optional[str]* - Frequency of reseting org budget
+    - metadata: *Optional[dict]* - Metadata for team, store information for team. Example metadata - {"extra_info": "some info"}
+    - blocked: *bool* - Flag indicating if the org is blocked or not - will stop all calls from keys with this org_id.
+    - tags: *Optional[List[str]]* - Tags for [tracking spend](https://litellm.vercel.app/docs/proxy/enterprise#tracking-spend-for-custom-tags) and/or doing [tag-based routing](https://litellm.vercel.app/docs/proxy/tag_routing).
+    - organization_id: *Optional[str]* - The organization id of the team. Default is None. Create via `/organization/new`.
+    - model_aliases: Optional[dict] - Model aliases for the team. [Docs](https://docs.litellm.ai/docs/proxy/team_based_routing#create-team-with-model-alias)
+
 
     Case 1: Create new org **without** a budget_id
 
@@ -185,7 +194,7 @@ async def new_organization(
 )
 async def update_organization():
     """[TODO] Not Implemented yet. Let us know if you need this - https://github.com/BerriAI/litellm/issues"""
-    pass
+    raise NotImplementedError("Not Implemented Yet")
 
 
 @router.post(
@@ -195,7 +204,7 @@ async def update_organization():
 )
 async def delete_organization():
     """[TODO] Not Implemented yet. Let us know if you need this - https://github.com/BerriAI/litellm/issues"""
-    pass
+    raise NotImplementedError("Not Implemented Yet")
 
 
 @router.get(
@@ -204,38 +213,38 @@ async def delete_organization():
     dependencies=[Depends(user_api_key_auth)],
 )
 async def list_organization(
-        user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
+    user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
 ):
-        """
+    """
     ```
     curl --location --request GET 'http://0.0.0.0:4000/organization/list' \
         --header 'Authorization: Bearer sk-1234'
     ```
     """
-        from litellm.proxy.proxy_server import prisma_client
-        
-        if prisma_client is None:
-            raise HTTPException(status_code=500, detail={"error": "No db connected"})    
-    
-        if (
-            user_api_key_dict.user_role is None
-            or user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
-        ):
-            raise HTTPException(
-                status_code=401,
-                detail={
-                    "error": f"Only admins can list orgs. Your role is = {user_api_key_dict.user_role}"
-                },
-            )
-        if prisma_client is None:
-            raise HTTPException(
-                status_code=400,
-                detail={"error": CommonProxyErrors.db_not_connected_error.value},
-            )
-        response= await prisma_client.db.litellm_organizationtable.find_many()
+    from litellm.proxy.proxy_server import prisma_client
+
+    if prisma_client is None:
+        raise HTTPException(status_code=500, detail={"error": "No db connected"})
+
+    if (
+        user_api_key_dict.user_role is None
+        or user_api_key_dict.user_role != LitellmUserRoles.PROXY_ADMIN
+    ):
+        raise HTTPException(
+            status_code=401,
+            detail={
+                "error": f"Only admins can list orgs. Your role is = {user_api_key_dict.user_role}"
+            },
+        )
+    if prisma_client is None:
+        raise HTTPException(
+            status_code=400,
+            detail={"error": CommonProxyErrors.db_not_connected_error.value},
+        )
+    response = await prisma_client.db.litellm_organizationtable.find_many()
+
+    return response
 
-        return response
-    
 
 @router.post(
     "/organization/info",
diff --git a/litellm/proxy/management_endpoints/team_endpoints.py b/litellm/proxy/management_endpoints/team_endpoints.py
index 251fa648e..dc1ec444d 100644
--- a/litellm/proxy/management_endpoints/team_endpoints.py
+++ b/litellm/proxy/management_endpoints/team_endpoints.py
@@ -1,3 +1,14 @@
+"""
+TEAM MANAGEMENT
+
+All /team management endpoints 
+
+/team/new
+/team/info
+/team/update
+/team/delete
+"""
+
 import asyncio
 import copy
 import json
@@ -121,6 +132,10 @@ async def new_team(  # noqa: PLR0915
     - budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
     - models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
     - blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
+    - members: Optional[List] - Control team members via `/team/member/add` and `/team/member/delete`. 
+    - tags: Optional[List[str]] - Tags for [tracking spend](https://litellm.vercel.app/docs/proxy/enterprise#tracking-spend-for-custom-tags) and/or doing [tag-based routing](https://litellm.vercel.app/docs/proxy/tag_routing).
+    - organization_id: Optional[str] - The organization id of the team. Default is None. Create via `/organization/new`.
+    - model_aliases: Optional[dict] - Model aliases for the team. [Docs](https://docs.litellm.ai/docs/proxy/team_based_routing#create-team-with-model-alias)
 
     Returns:
     - team_id: (str) Unique team id - used for tracking spend across multiple keys for same team id.
@@ -353,6 +368,8 @@ async def update_team(
     - budget_duration: Optional[str] - The duration of the budget for the team. Doc [here](https://docs.litellm.ai/docs/proxy/team_budgets)
     - models: Optional[list] - A list of models associated with the team - all keys for this team_id will have at most, these models. If empty, assumes all models are allowed.
     - blocked: bool - Flag indicating if the team is blocked or not - will stop all calls from keys with this team_id.
+    - tags: Optional[List[str]] - Tags for [tracking spend](https://litellm.vercel.app/docs/proxy/enterprise#tracking-spend-for-custom-tags) and/or doing [tag-based routing](https://litellm.vercel.app/docs/proxy/tag_routing).
+    - organization_id: Optional[str] - The organization id of the team. Default is None. Create via `/organization/new`.
 
     Example - update team TPM Limit
 
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index e495f3490..74bf398e7 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -3127,6 +3127,7 @@ def _get_docs_url() -> Optional[str]:
     # default to "/"
     return "/"
 
+
 def handle_exception_on_proxy(e: Exception) -> ProxyException:
     """
     Returns an Exception as ProxyException, this ensures all exceptions are OpenAI API compatible
@@ -3148,4 +3149,3 @@ def handle_exception_on_proxy(e: Exception) -> ProxyException:
         param=getattr(e, "param", "None"),
         code=status.HTTP_500_INTERNAL_SERVER_ERROR,
     )
-
diff --git a/tests/documentation_tests/test_api_docs.py b/tests/documentation_tests/test_api_docs.py
new file mode 100644
index 000000000..407010dcc
--- /dev/null
+++ b/tests/documentation_tests/test_api_docs.py
@@ -0,0 +1,206 @@
+import ast
+from typing import List, Dict, Set, Optional
+import os
+from dataclasses import dataclass
+import argparse
+import re
+import sys
+
+sys.path.insert(
+    0, os.path.abspath("../..")
+)  # Adds the parent directory to the system path
+import litellm
+
+
+@dataclass
+class FunctionInfo:
+    """Store function information."""
+
+    name: str
+    docstring: Optional[str]
+    parameters: Set[str]
+    file_path: str
+    line_number: int
+
+
+class FastAPIDocVisitor(ast.NodeVisitor):
+    """AST visitor to find FastAPI endpoint functions."""
+
+    def __init__(self, target_functions: Set[str]):
+        self.target_functions = target_functions
+        self.functions: Dict[str, FunctionInfo] = {}
+        self.current_file = ""
+
+    def visit_FunctionDef(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
+        """Visit function definitions (both async and sync) and collect info if they match target functions."""
+        if node.name in self.target_functions:
+            # Extract docstring
+            docstring = ast.get_docstring(node)
+
+            # Extract parameters
+            parameters = set()
+            for arg in node.args.args:
+                if arg.annotation is not None:
+                    # Get the parameter type from annotation
+                    if isinstance(arg.annotation, ast.Name):
+                        parameters.add((arg.arg, arg.annotation.id))
+                    elif isinstance(arg.annotation, ast.Subscript):
+                        if isinstance(arg.annotation.value, ast.Name):
+                            parameters.add((arg.arg, arg.annotation.value.id))
+
+            self.functions[node.name] = FunctionInfo(
+                name=node.name,
+                docstring=docstring,
+                parameters=parameters,
+                file_path=self.current_file,
+                line_number=node.lineno,
+            )
+
+    # Also need to add this to handle async functions
+    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
+        """Handle async functions by delegating to the regular function visitor."""
+        return self.visit_FunctionDef(node)
+
+
+def find_functions_in_file(
+    file_path: str, target_functions: Set[str]
+) -> Dict[str, FunctionInfo]:
+    """Find target functions in a Python file using AST."""
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        visitor = FastAPIDocVisitor(target_functions)
+        visitor.current_file = file_path
+        tree = ast.parse(content)
+        visitor.visit(tree)
+        return visitor.functions
+
+    except Exception as e:
+        print(f"Error parsing {file_path}: {str(e)}")
+        return {}
+
+
+def extract_docstring_params(docstring: Optional[str]) -> Set[str]:
+    """Extract parameter names from docstring."""
+    if not docstring:
+        return set()
+
+    params = set()
+    # Match parameters in format:
+    # - parameter_name: description
+    # or
+    # parameter_name: description
+    param_pattern = r"-?\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*(?:\([^)]*\))?\s*:"
+
+    for match in re.finditer(param_pattern, docstring):
+        params.add(match.group(1))
+
+    return params
+
+
+def analyze_function(func_info: FunctionInfo) -> Dict:
+    """Analyze function documentation and return validation results."""
+
+    docstring_params = extract_docstring_params(func_info.docstring)
+
+    print(f"func_info.parameters: {func_info.parameters}")
+    pydantic_params = set()
+
+    for name, type_name in func_info.parameters:
+        if type_name.endswith("Request") or type_name.endswith("Response"):
+            pydantic_model = getattr(litellm.proxy._types, type_name, None)
+            if pydantic_model is not None:
+                for param in pydantic_model.model_fields.keys():
+                    pydantic_params.add(param)
+
+    print(f"pydantic_params: {pydantic_params}")
+
+    missing_params = pydantic_params - docstring_params
+
+    return {
+        "function": func_info.name,
+        "file_path": func_info.file_path,
+        "line_number": func_info.line_number,
+        "has_docstring": bool(func_info.docstring),
+        "pydantic_params": list(pydantic_params),
+        "documented_params": list(docstring_params),
+        "missing_params": list(missing_params),
+        "is_valid": len(missing_params) == 0,
+    }
+
+
+def print_validation_results(results: Dict) -> None:
+    """Print validation results in a readable format."""
+    print(f"\nChecking function: {results['function']}")
+    print(f"File: {results['file_path']}:{results['line_number']}")
+    print("-" * 50)
+
+    if not results["has_docstring"]:
+        print("❌ No docstring found!")
+        return
+
+    if not results["pydantic_params"]:
+        print("ℹ️  No Pydantic input models found.")
+        return
+
+    if results["is_valid"]:
+        print("✅ All Pydantic parameters are documented!")
+    else:
+        print("❌ Missing documentation for parameters:")
+        for param in sorted(results["missing_params"]):
+            print(f"  - {param}")
+
+
+def main():
+    function_names = [
+        "new_end_user",
+        "end_user_info",
+        "update_end_user",
+        "delete_end_user",
+        "generate_key_fn",
+        "info_key_fn",
+        "update_key_fn",
+        "delete_key_fn",
+        "new_user",
+        "new_team",
+        "team_info",
+        "update_team",
+        "delete_team",
+        "new_organization",
+        "update_organization",
+        "delete_organization",
+        "list_organization",
+        "user_update",
+    ]
+    directory = "../../litellm/proxy/management_endpoints"  # LOCAL
+    # directory = "./litellm/proxy/management_endpoints"
+
+    # Convert function names to set for faster lookup
+    target_functions = set(function_names)
+    found_functions: Dict[str, FunctionInfo] = {}
+
+    # Walk through directory
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if file.endswith(".py"):
+                file_path = os.path.join(root, file)
+                found = find_functions_in_file(file_path, target_functions)
+                found_functions.update(found)
+
+    # Analyze and output results
+    for func_name in function_names:
+        if func_name in found_functions:
+            result = analyze_function(found_functions[func_name])
+            if not result["is_valid"]:
+                raise Exception(print_validation_results(result))
+    #         results.append(result)
+    #         print_validation_results(result)
+
+    # # Exit with error code if any validation failed
+    # if any(not r["is_valid"] for r in results):
+    #     exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/proxy_unit_tests/test_key_generate_prisma.py b/tests/proxy_unit_tests/test_key_generate_prisma.py
index 8ad773d63..e6f8ca541 100644
--- a/tests/proxy_unit_tests/test_key_generate_prisma.py
+++ b/tests/proxy_unit_tests/test_key_generate_prisma.py
@@ -1018,7 +1018,7 @@ def test_generate_and_call_with_expired_key(prisma_client):
             # use generated key to auth in
             result = await user_api_key_auth(request=request, api_key=bearer_token)
             print("result from user auth with new key", result)
-            pytest.fail(f"This should have failed!. IT's an expired key")
+            pytest.fail("This should have failed!. It's an expired key")
 
         asyncio.run(test())
     except Exception as e: